diff --git a/3.14/core-functionality/gene-fusions/index.html b/3.14/core-functionality/gene-fusions/index.html deleted file mode 100644 index e938fbac..00000000 --- a/3.14/core-functionality/gene-fusions/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Gene Fusion Detection | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

Gene Fusion Detection

Overview

Gene fusions often result from large genomic rearrangements such as structural variants. While WGS secondary analysis pipelines typically contain alignment and variant calling stages, very few of them contain dedicated gene fusion callers. When they are included, they are usually associated with RNA-Seq pipelines where gene fusions can be readily observed.

Since gene fusions are frequently observed in cancer and since many sequencing experiments do not include paired RNA-Seq data, we have added gene fusion detection and annotation to Nirvana.

The rich diversity in gene fusion architectures and their likely mechanisms can be seen below:

Publication

Kumar-Sinha, C., Kalyana-Sundaram, S. & Chinnaiyan, A.M. Landscape of gene fusions in epithelial cancers: seq and ye shall find. Genome Med 7, 129 (2015)

Approach

Nirvana uses structural variant calls to evaluate if they form either putative intra-chromosomal or inter-chromosomal gene fusions.

For each originating transcript, we report the following:

  • originating intron or exon number
  • for each partner transcript fused with the originating transcript, we report:
    • HGVS coding notation
    • partner intron or exon number

Variant Types

Specifically we can identify gene fusions from the following structural variant types:

  • deletions (<DEL>)
  • tandem_duplications (<DUP:TANDEM>)
  • inversions (<INV>)
  • translocation breakpoints (AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[)

Criteria

The following criteria must be met for Nirvana to identify a gene fusion:

  1. Both transcripts must possess a coding region
  2. After accounting for genomic rearrangements, both transcripts must have the same orientation
  3. Both transcripts must be from the same transcript source (i.e. we won't mix and match between RefSeq and Ensembl transcripts)
  4. Both transcripts must belong to different genes
  5. Both transcripts cannot have a coding region that already overlaps without the variant (i.e. in cases where two genes naturally overlap, we don't want to call a gene fusion)
UTR overlap

In the past, we also required that the coding regions from the two genes intersected. However, in oncology literature, there are many documented gene fusions where only the UTRs overlap. As a result, we adjusted our algorithm to allow for UTR overlaps as well.

ETV6/RUNX1 Example

ETV6/RUNX1 is the most common gene fusion in childhood B-cell precursor acute lymphoblastic leukemia (ALL). Patients with this translocation are associated with a good prognosis and excellent response to treatment.

VCF

Here's a simplified representation of the translocation breakends called by the Manta structural variant caller:

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
chr12 12026270 . C [chr21:36420865[C . PASS SVTYPE=BND
chr12 12026305 . A A]chr21:36420571] . PASS SVTYPE=BND
chr21 36420571 . C C]chr12:12026305] . PASS SVTYPE=BND
chr21 36420865 . C [chr12:12026270[C . PASS SVTYPE=BND

Interpreting translocation breakends

REFALTMeaning
st[p[piece extending to the right of p is joined after t
st]p]reverse comp piece extending left of p is joined after t
s]p]tpiece extending to the left of p is joined before t
s[p[treverse comp piece extending right of p is joined before t

Visualization

JSON Output

The annotation for the first variant in the VCF looks like this:

    {
"chromosome": "chr12",
"position": 12026270,
"refAllele": "C",
"altAlleles": [
"[chr21:36420865[C"
],
"filters": [
"PASS"
],
"cytogeneticBand": "12p13.2",
"clingen": [
{
"chromosome": "12",
"begin": 173786,
"end": 34835837,
"variantType": "copy_number_gain",
"id": "nsv995956",
"clinicalInterpretation": "pathogenic",
"phenotypes": [
"Decreased calvarial ossification",
"Delayed gross motor development",
"Feeding difficulties",
"Frontal bossing",
"Morphological abnormality of the central nervous system",
"Patchy alopecia"
],
"phenotypeIds": [
"HP:0002007",
"HP:0002011",
"HP:0002194",
"HP:0002232",
"HP:0005474",
"HP:0011968",
"MedGen:C0232466",
"MedGen:C1862862",
"MedGen:CN001816",
"MedGen:CN001820",
"MedGen:CN001989",
"MedGen:CN004852"
],
"observedGains": 1,
"validated": true
}
],
"variants": [
{
"vid": "12-12026270-C-[chr21:36420865[C",
"chromosome": "chr12",
"begin": 12026270,
"end": 12026270,
"isStructuralVariant": true,
"refAllele": "C",
"altAllele": "[chr21:36420865[C",
"variantType": "translocation_breakend",
"transcripts": [
{
"transcript": "ENST00000396373.4",
"source": "Ensembl",
"bioType": "protein_coding",
"introns": "5/7",
"geneId": "ENSG00000139083",
"hgnc": "ETV6",
"consequence": [
"transcript_variant",
"unidirectional_gene_fusion"
],
"geneFusion": {
"intron": 5,
"fusions": [
{
"hgvsc": "RUNX1{ENST00000437180.1}:c.1_58+274_ETV6{ENST00000396373.4}:c.1009+3367_1359",
"intron": 2
},
{
"hgvsc": "RUNX1{ENST00000300305.3}:c.1_58+274_ETV6{ENST00000396373.4}:c.1009+3367_1359",
"intron": 1
},
{
"hgvsc": "RUNX1{ENST00000482318.1}:c.1_58+274_ETV6{ENST00000396373.4}:c.1009+3367_1359",
"intron": 2
},
{
"hgvsc": "RUNX1{ENST00000486278.2}:c.?_156195_ETV6{ENST00000396373.4}:c.1009+3367_1359",
"intron": 2
},
{
"hgvsc": "RUNX1{ENST00000455571.1}:c.1_58+274_ETV6{ENST00000396373.4}:c.1009+3367_1359",
"intron": 2
},
{
"hgvsc": "RUNX1{ENST00000475045.2}:c.1_58+274_ETV6{ENST00000396373.4}:c.1009+3367_1359",
"intron": 11
},
{
"hgvsc": "RUNX1{ENST00000416754.1}:c.1_58+274_ETV6{ENST00000396373.4}:c.1009+3367_1359",
"intron": 2
}
]
},
"isCanonical": true,
"proteinId": "ENSP00000379658.3"
},
{
"transcript": "NM_001987.4",
"source": "RefSeq",
"bioType": "protein_coding",
"introns": "5/7",
"geneId": "2120",
"hgnc": "ETV6",
"consequence": [
"transcript_variant",
"unidirectional_gene_fusion"
],
"geneFusion": {
"intron": 5,
"fusions": [
{
"hgvsc": "RUNX1{NM_001754.4}:c.1_58+274_ETV6{NM_001987.4}:c.1009+3367_1359",
"intron": 2
}
]
},
"isCanonical": true,
"proteinId": "NP_001978.1"
}
]
}
]
}

Consequences

When a gene fusion is identified, we add the following Sequence Ontology consequence:

              "consequence": [
"transcript_variant",
"unidirectional_gene_fusion"
],

Introns & Exons

In this section we describe all the pairwise gene fusions that obey the criteria outlined above. In the case of ENST00000396373.4, there 7 other Ensembl transcripts that would produce a gene fusion. For NM_001987.4, there was only one transcript (NM_001754.4) that produce a gene fusion.

In each case, Nirvana outputs which intron or exon contained the breakpoint in both of the transcripts that form the gene fusion.

HGVS coding notation

Finally, Nirvana also describes the gene fusion using HGVS c. notation:

                "fusions": [
{
"hgvsc": "RUNX1{NM_001754.4}:c.1_58+274_ETV6{NM_001987.4}:c.1009+3367_1359",
"intron": 2
}

This means that gene fusion uses CDS positions 1-58 from NM_001754.4 (RUNX1) and CDS positions 1009-1359 from NM_001987.4 (ETV6). 1009+3367 indicates that the fusion occurred 3367 bp within intron 2.

- - - - \ No newline at end of file diff --git a/3.14/core-functionality/variant-ids/index.html b/3.14/core-functionality/variant-ids/index.html deleted file mode 100644 index 2fa759a6..00000000 --- a/3.14/core-functionality/variant-ids/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Variant IDs | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

Variant IDs

Overview

Many downstream tools use a variant identifier to store annotation results. We've standardized on using variant identifiers (VIDs) that originated from the notation used by the Broad Institute.

The Broad VID scheme is not only simple, but it has the advantage that a user could create a bare bones VCF entry from the information captured in the identifier. One of the limitations of the Broad VID scheme is that it does not define how to handle structural variants. Our VID scheme attempts to fill that gap.

Conventions
  • all chromosomes use Ensembl style notation (i.e. 22 instead of chr22)
  • for a reference variant (i.e. no alt allele), replace the period (.) with the reference base
  • padding bases are used, neither the reference nor alternate allele can be empty
  • some large variant callers lazily output N for the reference allele. If this is the case, replace it with the true reference base

Small Variants

VCF Examples

chr1    66507   .   T   A   184.45  PASS    .
chr1 66521 . T TATATA 144.53 PASS .
chr1 66572 . GTA G,GTACTATATATTATA 45.45 PASS .

Format

chromosomepositionreference allelealternate allele

VID Examples

  • 1-66507-T-A
  • 1-66521-T-TATATA
  • 1-66572-GTA-G
  • 1-66572-G-GTACTATATATTA

Translocation Breakends

VCF Example

chr1    2617277 .   A   AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[  .   PASS    SVTYPE=BND

Format

chromosomepositionreference allelealternate allele

VID Example

  • 1-2617277-A-AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[

All Other Structural Variants

VCF Examples

chr1    1000    .   G   <ROH>   .   PASS    END=3001000;SVTYPE=ROH
chr1 1350082 . G <DEL> . PASS END=1351320;SVTYPE=DEL
chr1 1477854 . C <DUP:TANDEM> . PASS END=1477984;SVTYPE=DUP
chr1 1477968 . T <INS> . PASS END=1477968;SVTYPE=INS
chr1 1715898 . N <DUP> . PASS SVTYPE=CNV;END=1750149
chr1 2650426 . N <DEL> . PASS SVTYPE=CNV;END=2653074
chr2 321682 . T <INV> . PASS SVTYPE=INV;END=421681
chr20 2633403 . G <STR2> . PASS END=2633421

Format

chromosomepositionend positionreference allelealternate alleleSVTYPE

VID Examples

  • 1-1000-3001000-G-<ROH>-ROH
  • 1-1350082-1351320-G-<DEL>-DEL
  • 1-1477854-1477984-C-<DUP:TANDEM>-DUP
  • 1-1477968-1477968-T-<INS>-INS
  • 1-1715898-1750149-A-<DUP>-CNV (replace the N with A)
  • 1-2650426-2653074-N-<DEL>-CNV (keep the N)
  • 2-321682-421681-T-<INV>-INV
  • 20-2633403-2633421-G-<STR2>-STR
- - - - \ No newline at end of file diff --git a/3.14/data-sources/1000Genomes-snv-json/index.html b/3.14/data-sources/1000Genomes-snv-json/index.html deleted file mode 100644 index 4646acc3..00000000 --- a/3.14/data-sources/1000Genomes-snv-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -1000Genomes-snv-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

1000Genomes-snv-json

"oneKg":{
"allAf":0.200879,
"afrAf":0.210287,
"amrAf":0.139769,
"easAf":0.275794,
"eurAf":0.181909,
"sasAf":0.173824,
"allAn":5008,
"afrAn":1322,
"amrAn":694,
"easAn":1008,
"eurAn":1006,
"sasAn":978,
"allAc":1006,
"afrAc":278,
"amrAc":97,
"easAc":278,
"eurAc":183,
"sasAc":170
}
FieldTypeNotes
allAffloatallele frequency for all populations. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
allAnintallele number for all populations. Non-zero integer.
afrAffloatallele frequency for the African super population. Range: 0 - 1.0
afrAcintallele count for the African super population. Integer.
afrAnintallele number for the African super population. Non-zero integer.
amrAffloatallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
amrAcintallele count for the Ad Mixed American super population. Integer.
amrAnintallele number for the Ad Mixed American super population. Non-zero integer.
easAffloatallele frequency for the East Asian super population. Range: 0 - 1.0
easAcintallele count for the East Asian super population. Integer.
easAnintallele number for the East Asian super population. Non-zero integer.
eurAffloatallele frequency for the European super population. Range: 0 - 1.0
eurAcintallele count for the European super population. Integer.
eurAnintallele number for the European super population. Non-zero integer.
sasAffloatallele frequency for the South Asian super population. Range: 0 - 1.0
sasAcintallele count for the South Asian super population. Integer.
sasAnintallele number for the South Asian super population. Non-zero integer.
- - - - \ No newline at end of file diff --git a/3.14/data-sources/1000Genomes-sv-json/index.html b/3.14/data-sources/1000Genomes-sv-json/index.html deleted file mode 100644 index 72ff783b..00000000 --- a/3.14/data-sources/1000Genomes-sv-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -1000Genomes-sv-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

1000Genomes-sv-json

"oneKg":[
{
"chromosome":"1",
"begin":1595369,
"end":1612441,
"variantType": "copy_number_variation",
"id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",
"allAn": 5008,
"allAc": 2702,
"allAf": 0.539537,
"afrAf": 0.6052,
"amrAf": 0.3675,
"eurAf": 0.5357,
"easAf": 0.5368,
"sasAf": 0.5797,
"reciprocalOverlap": 0.07555
}
],
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring
idstring
allAnfloating pointallele number for all populations. Non-zero integer.
allAcfloating pointallele count for all populations. Integer.
allAffloating pointallele frequency for all populations. Range: 0 - 1.0
afrAffloating pointallele frequency for the African super population. Range: 0 - 1.0
amrAffloating pointallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
eurAffloating pointallele frequency for the European super population. Range: 0 - 1.0
easAfintegerallele frequency for the East Asian super population. Range: 0 - 1.0
sasAfintegerallele frequency for the South Asian super population. Range: 0 - 1.0
reciprocalOverlapfloating pointrange: 0 - 1.
- - - - \ No newline at end of file diff --git a/3.14/data-sources/1000Genomes/index.html b/3.14/data-sources/1000Genomes/index.html deleted file mode 100644 index 3552fa2f..00000000 --- a/3.14/data-sources/1000Genomes/index.html +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - - -1000 Genomes | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

1000 Genomes

Overview

The goal of the 1000 Genomes Project was to find most genetic variants with frequencies of at least 1% in the populations studied. It was the first project to sequence the genomes of a large number of people, to provide a comprehensive resource on human genetic variation. Data from the 1000 Genomes Project was quickly made available to the worldwide scientific community through freely accessible public databases.

Publication

Sudmant, P., Rausch, T., Gardner, E. et al. An integrated map of structural variation in 2,504 human genomes. Nature 526, 75–81 (2015). https://doi.org/10.1038/nature15394

Populations

Small Variants

VCF File Parsing

The original VCF files come with allele frequency fields (e.g. ALL_AF, AMR_AF) but we recompute them using allele counts and allele numbers in order to get 6 digit precision. The allele counts and allele numbers (e.g. AMR_AC, AMR_AN) are not expressed in the INFO field. Instead the genotypes need to be parsed to compute that information. Our team converted the original data to VCF entries with allele counts and allele numbers like the following.

#CHROM  POS ID  REF ALT QUAL    FILTER  INFO
1 15274 rs62636497 A G,T 100 PASS AC=1739,3210;AF=0.347244,0.640974;AN=5008;NS=2504;DP=23255;EAS_AF=0.4812,0.5188;AMR_AF=0.2752,0.7205;AFR_AF=0.323,0.6369;EUR_AF=0.2922,0.7078;SAS_AF=0.3497,0.6472;AA=g|||;VT=SNP;MULTI_ALLELIC;EAS_AN=1008;EAS_AC=485,523;EUR_AN=1006;EUR_AC=294,712;AFR_AN=1322;AFR_AC=427,842;AMR_AN=694;AMR_AC=191,500;SAS_AN=978;SAS_AC=342,633

The ancestral allele, if it exists, is the first value in the pipe separated AA fields (the Indel specific REF, ALT, IndelType fields are ignored).

We parse the VCF file and extract the following fields from INFO:

  • AA
  • AC
  • AN
  • EAS_AN
  • AMR_AN
  • AFR_AN
  • EUR_AN
  • SAS_AN
  • EAS_AC
  • AMR_AC
  • AFR_AC
  • EUR_AC
  • SAS_AC

Conflict Resolution

We have observed conflicting allele frequency information in the source. Take the following example:

#CHROM  POS ID  REF ALT QUAL    FILTER  INFO
1 20505705 rs35377696 C CTCTG,CTG,CTGTG 100 PASS AC=46,1513,152;AF=0.0091853,0.302117,0.0303514;
1 20505705 rs35377696 C CTG 100 PASS AC=4;AF=0.000798722;

That is, the variant 1-20505705-C-CTG has conflicting entries. To get an idea of how frequently we observe this, here is a table summarizing ChrX and all chromosomes. Note that almost all such entries are found in ChrX.

Chromosome# of alleles# of conflicting allelespercentage
chrX83480027330.33%
Total2141309827430.013%

Currently, we removed the allele frequency of the conflicting allele (i.e., insertion TG in the example) but keep allele frequencies of all other alleles in the VCF line.

Potential Alternate Solutions

  • Remove all alleles that are contained in the vcf lines which have conflicting allele. (Recommended by 1000 genome group Holly Zheng-Bradley, 7/29/2015)
  • Recalculate the allele frequency for the conflicting allele.
  • Pick the allele frequency that has the highest data support.

Download URL

GRCh37 -GRCh38

JSON Output

"oneKg":{
"allAf":0.200879,
"afrAf":0.210287,
"amrAf":0.139769,
"easAf":0.275794,
"eurAf":0.181909,
"sasAf":0.173824,
"allAn":5008,
"afrAn":1322,
"amrAn":694,
"easAn":1008,
"eurAn":1006,
"sasAn":978,
"allAc":1006,
"afrAc":278,
"amrAc":97,
"easAc":278,
"eurAc":183,
"sasAc":170
}
FieldTypeNotes
allAffloatallele frequency for all populations. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
allAnintallele number for all populations. Non-zero integer.
afrAffloatallele frequency for the African super population. Range: 0 - 1.0
afrAcintallele count for the African super population. Integer.
afrAnintallele number for the African super population. Non-zero integer.
amrAffloatallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
amrAcintallele count for the Ad Mixed American super population. Integer.
amrAnintallele number for the Ad Mixed American super population. Non-zero integer.
easAffloatallele frequency for the East Asian super population. Range: 0 - 1.0
easAcintallele count for the East Asian super population. Integer.
easAnintallele number for the East Asian super population. Non-zero integer.
eurAffloatallele frequency for the European super population. Range: 0 - 1.0
eurAcintallele count for the European super population. Integer.
eurAnintallele number for the European super population. Non-zero integer.
sasAffloatallele frequency for the South Asian super population. Range: 0 - 1.0
sasAcintallele count for the South Asian super population. Integer.
sasAnintallele number for the South Asian super population. Non-zero integer.

Structural Variants

VCF File Parsing

The VCF files contain entries like the following:

#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103
22 16050654 esv3647175;esv3647176;esv3647177;esv3647178 A <CN0>,<CN2>,<CN3>,<CN4> 100 PASS AC=9,87,599,20;AF=0.00179712,0.0173722,0.119609,0.00399361;AN=5008;CS=DUP_gs;END=16063474;NS=2504;SVTYPE=CNV;DP=22545;EAS_AF=0.001,0.0169,0.2361,0.0099;AMR_AF=0,0.0101,0.219,0.0072;AFR_AF=0.0061,0.0363,0.0053,0;EUR_AF=0,0.007,0.0944,0.003;SAS_AF=0,0.0082,0.1094,0.002;VT=SV GT 3|0 0|0 0|0 0|0 0|0 0|0 0|4

Please note that, CNVs are allele-specific. For example, HG00096 is effectively copy number 4, which would be a net gain on chr22.

1000 Genomes contains 5 types of structural variants:

  • CNV
  • DEL
  • DUP
  • INS
  • INV

Since data of 1000 genomes is provided in VCF format, we assume that the coordinates follow the vcf format, i.e., there is a padding base for symbolic alleles. So all the interval can be interpreted as [BEGIN+1, END]. -Similarly, for all other variant types except insertion, END is far larger than BEGIN. The distribution of BEGIN and END for insertions is summarized below.

Insertion issues

  • END = BEGIN for 6/165
  • END = BEGIN+2 for 93/165
  • END = BEGIN+3 for 11/165
  • END = BEGIN+4 for 11/165
  • END – BEGIN range from 5 to 1156 for others.

Converting VCF svTypes to SO sequence alterations

The svType will be captured in our JSON file under the sequenceAlteration key. Here's the translation we'll use according to svType in 1000 Genomes.

svTypeAlternative Alleles contain <CN*>sequenceAlteration
ALUFALSEmobile_element_insertion
DUPTRUEcopy_number_gain
CNVTRUEcopy_number_gain (observed_gains >0 and observed_losses =0)
copy_number_loss (observed_gains = 0 and observed_losses > 0)
copy_number_variation (otherwise)
DELTRUEcopy_number_loss
LINE1FALSEmobile_element_insertion
SVAFALSEmobile_element_insertion
INVFALSEinversion
INSFALSEinsertion

Exceptions

We discard structural variants without END

#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103
21 9495848 esv3646347 A <INS:ME:LINE1> 100 PASS AC=1543;AF=0.308107;AN=5008;CS=L1_umary;MEINFO=LINE1,5669,6005,+;NS=2504;SVLEN=336;SVTYPE=LINE1;TSD=null;DP=20015;EAS_AF=0.3125;AMR_AF=0.2911;AFR_AF=0.3026;EUR_AF=0.2922;SAS_AF=0.3395;VT=SV GT 0|0 1|1 1|0 0|1 1|0 1|0 0|0

CNVs in chrY

  • No other types of structural variants exist in chrY
  • Since copy number is provided in genotype field, we directly parse the copy number from "CN" field.
  • For most CNVs in chrY, the reference copy number is 1, but the refence number for CNVs in segmental duplication sites is 2 (<CN2> in the 2nd example). All segmental duplication calls have identifiers starting with GS_SD_M2.
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  HG00096 HG00101 HG00103 HG00105 HG00107 HG00108
Y 2888555 CNV_Y_2888555_3014661 T <CN2> 100 PASS AC=1;AF=0.000817661;AN=1223;END=3014661;NS=1233;SVTYPE=CNV;AMR_AF=0.0000;AFR_AF=0.0000;EUR_AF=0.0000;SAS_AF=0.0019;EAS_AF=0.0000;VT=SV GT:CN:CNL:CNP:CNQ:GP:GQ:PL 0:1:-1000,0,-58.45:-1000,0,-61.55:99:0,-61.55:99:0,585 0:1:-296.36,0,-16.6:-300.46,0,-19.7:99:0,-19.7:99:0,166 0:1:-1000,0,-39.44:-1000,0,-42.54:99:0,-42.54:99:0,394
Y 6128381 GS_SD_M2_Y_6128381_6230094_Y_9650284_9752225 C <CN1>,<CN3> 100 PASS AC=4,2;AF=0.00327065,0.00163532;AN=1223;END=6230094;NS=1233;SVTYPE=CNV;AMR_AF=0.0029,0.0029;AFR_AF=0.0016,0.0016;EUR_AF=0.0000,0.0000;SAS_AF=0.0038,0.0000;EAS_AF=0.0000,0.0000;VT=SV;EX_TARGET GT:CN:CNL:CNP:CNQ:GP:GQ 0:2:-1000,-138.78,0,-38.53:-1000,-141.27,0,-41.33:99:0,-141.27,-41.33:99 0:2:-1000,-53.32,0,-17.85:-1000,-55.81,0,-20.64:99:0,-55.81,-20.64:99 0:2:-1000,-71.83,0,-32.5:-1000,-74.32,0,-35.29:99:0,-74.32,-35.29:99 0:2:-1000,-60.96,0,-20.29:-1000,-63.45,0,-23.08:99:0,-63.45,-23.08:99 0:2:-1000,-77.6,0,-31.45:-1000,-80.09,0,-34.24:99:0,-80.09,-34.24:99

JSON Output

"oneKg":[
{
"chromosome":"1",
"begin":1595369,
"end":1612441,
"variantType": "copy_number_variation",
"id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",
"allAn": 5008,
"allAc": 2702,
"allAf": 0.539537,
"afrAf": 0.6052,
"amrAf": 0.3675,
"eurAf": 0.5357,
"easAf": 0.5368,
"sasAf": 0.5797,
"reciprocalOverlap": 0.07555
}
],
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring
idstring
allAnfloating pointallele number for all populations. Non-zero integer.
allAcfloating pointallele count for all populations. Integer.
allAffloating pointallele frequency for all populations. Range: 0 - 1.0
afrAffloating pointallele frequency for the African super population. Range: 0 - 1.0
amrAffloating pointallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
eurAffloating pointallele frequency for the European super population. Range: 0 - 1.0
easAfintegerallele frequency for the East Asian super population. Range: 0 - 1.0
sasAfintegerallele frequency for the South Asian super population. Range: 0 - 1.0
reciprocalOverlapfloating pointrange: 0 - 1.
- - - - \ No newline at end of file diff --git a/3.14/data-sources/clinvar-json/index.html b/3.14/data-sources/clinvar-json/index.html deleted file mode 100644 index 7fc38fb8..00000000 --- a/3.14/data-sources/clinvar-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -clinvar-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

clinvar-json

"clinvar":[
{
"id":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"significance":[
"benign"
],
"refAllele":"G",
"altAllele":"A",
"lastUpdatedDate":"2020-03-01",
"isAlleleSpecific":true
},
{
"id":"RCV000030258.4",
"variationId":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"alleleOrigins":[
"germline"
],
"refAllele":"G",
"altAllele":"A",
"phenotypes":[
"Lynch syndrome"
],
"medGenIds":[
"C1333990"
],
"omimIds":[
"120435"
],
"significance":[
"benign"
],
"lastUpdatedDate":"2017-05-01",
"isAlleleSpecific":true
}
]
FieldTypeNotes
idstringClinVar ID
variationIdstringClinVar VCV ID
reviewStatusstringsee possible values below
alleleOriginsstring arraysee possible values below
refAllelestring
altAllelestring
phenotypesstring array
medGenIdsstring arrayMedGen IDs
omimIdsstring arrayOMIM IDs
orphanetIdsstring arrayOrphanet IDs
significancestring arraysee possible values below
lastUpdatedDatestringyyyy-MM-dd
pubMedIdsstring arrayPubMed IDs
isAlleleSpecificbooltrue when the current variant alternate allele matches the ClinVar alternate allele

reviewStatus:

  • no assertion provided
  • no assertion criteria provided
  • criteria provided, single submitter
  • practice guideline
  • classified by multiple submitters
  • criteria provided, conflicting interpretations
  • criteria provided, multiple submitters, no conflicts
  • no interpretation for the single variant

alleleOrigins:

  • unknown
  • other
  • germline
  • somatic
  • inherited
  • paternal
  • maternal
  • de-novo
  • biparental
  • uniparental
  • not-tested
  • tested-inconclusive

significance:

  • uncertain significance
  • not provided
  • benign
  • likely benign
  • likely pathogenic
  • pathogenic
  • drug response
  • histocompatibility
  • association
  • risk factor
  • protective
  • affects
  • conflicting data from submitters
  • other
  • no interpretation for the single variant
  • conflicting interpretations of pathogenicity
- - - - \ No newline at end of file diff --git a/3.14/data-sources/clinvar/index.html b/3.14/data-sources/clinvar/index.html deleted file mode 100644 index 74e0fadf..00000000 --- a/3.14/data-sources/clinvar/index.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - -ClinVar | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

ClinVar

Overview

ClinVar is a freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence. ClinVar thus facilitates access to and communication about the relationships asserted between human variation and observed health status, and the history of that interpretation.

Publication

Melissa J Landrum, Jennifer M Lee, Mark Benson, Garth R Brown, Chen Chao, Shanmuga Chitipiralla, Baoshan Gu, Jennifer Hart, Douglas Hoffman, Wonhee Jang, Karen Karapetyan, Kenneth Katz, Chunlei Liu, Zenith Maddipatla, Adriana Malheiro, Kurt McDaniel, Michael Ovetsky, George Riley, George Zhou, J Bradley Holmes, Brandi L Kattman, Donna R Maglott, ClinVar: improving access to variant interpretations and supporting evidence, Nucleic Acids Research, 46, Issue D1, 4 January 2018, Pages D1062–D1067, https://doi.org/10.1093/nar/gkx1153

RCV File

Example

Here's a full RCV entry.

Parsing

In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output.

ID

<ClinVarSet>
<ReferenceClinVarAssertion>
<ClinVarAccession Acc="RCV000000001" Version="2">
</ClinVarSet>

The Acc and Version fields are merged to form the ID (RCV000000001.2)

LastUpdatedDate

<ClinVarSet>
<ReferenceClinVarAssertion DateCreated="2012-08-13" DateLastUpdated="2016-02-17" ID="57604" >
</ClinVarSet>

Significance

<ClinVarSet>
<ReferenceClinVarAssertion>
<ClinicalSignificance DateLastEvaluated="1996-04-01">
<ReviewStatus>no assertion criteria provided</ReviewStatus>
<Description>Pathogenic</Description>
</ClinicalSignificance>
</ClinVarSet>

ReviewStatus

<ClinVarSet>
<ReferenceClinVarAssertion>
<ClinicalSignificance DateLastEvaluated="1996-04-01">
<ReviewStatus>no assertion criteria provided</ReviewStatus>
<Description>Pathogenic</Description>
</ClinicalSignificance>
</ClinVarSet>

Phenotypes

<ReferenceClinVarAssertion>
<TraitSet Type="Disease" ID="62">
<Trait Type="Disease">
<Name>
<ElementValue Type="Preferred">Joubert syndrome 9</ElementValue>
</Name>
</Trait>
</TraitSet>
</ReferenceClinVarAssertion>

We only use the field with Type="Preferred". Multiple phenotypes may be reported

Location and Variant Id

<ReferenceClinVarAssertion>
<GenotypeSet Type="CompoundHeterozygote" ID="424709">
<MeasureSet Type="Variant" ID="81">
<Measure Type="single nucleotide variant" ID="15120">
<SequenceLocation Assembly="GRCh38" AssemblyAccessionVersion="GCF_000001405.38"
AssemblyStatus="current" Chr="10" Accession="NC_000010.11" start="89222510"
stop="89222510" display_start="89222510" display_stop="89222510" variantLength="1"
positionVCF="89222510" referenceAlleleVCF="C" alternateAlleleVCF="T"/>
<SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25"
AssemblyStatus="previous" Chr="10" Accession="NC_000010.10" start="90982267"
stop="90982267" display_start="90982267" display_stop="90982267" variantLength="1"
positionVCF="90982267" referenceAlleleVCF="C" alternateAlleleVCF="T"/>
</Measure>
</MeasureSet>
</GenotypeSet>
</ReferenceClinVarAssertion>
  • The variant position is extracted from the fields for their respective assemblies.
  • Updated records contain positionVCF, referenceAlleleVCF and alternateAlleleVCF fields and when present, we use them to create the variant.
  • For older records, since "start' and "stop" fields are not always available, we use the "display_start" and "display_end" fields.
  • If a required allele is not available, we extract it from the reference sequence.
  • Only variants having a dbSNP id are extracted.
  • Note that a ClinVar accession may have multiple variants associated with it (possible in different locations)
  • VariantId is extracted from the MeasureSet attributes.

MedGen, OMIM, Orphanet IDs

<ReferenceClinVarAssertion>
<TraitSet Type="Disease" ID="175">
<Trait ID="3036" Type="Disease">
<XRef ID="C0086651" DB="MedGen"/>
<XRef ID="309297" DB="Orphanet"/>
<XRef ID="582" DB="Orphanet"/>
<XRef Type="MIM" ID="253000" DB="OMIM"/>
</Trait>
</TraitSet>
</ReferenceClinVarAssertion>

AlleleOrigins

<ClinVarAssertion>
<Origin>germline</Origin>
</ClinVarAssertion>

We only extract all Allele Origins from Submissions (SCV) entries.

PubMedIds

<ClinVarAssertion>
<ClinicalSignificance DateLastEvaluated="1996-04-01">
<Citation Type="general">
<ID Source="PubMed">12114475</ID>
</Citation>
</ClinicalSignificance>
<AttributeSet>
<Attribute Type="AssertionMethod">LMM Criteria</Attribute>
<Citation>
<ID Source="PubMed">24033266</ID>
</Citation>
</AttributeSet>
<ObservedIn>
<ObservedData ID="9727445">
<Citation Type="general">
<ID Source="PubMed">9113933</ID>
</Citation>
</ObservedData>
</ObservedIn>
<Citation Type="general">
<ID Source="PubMed">23757202</ID>
</Citation>
</ClinVarAssertion>

We only extract all Pubmed Ids from Submissions (SCV) entries.

Parsing Significance

Extracting significance(s) may involve parsing multiple fields. Take the following snippets into consideration.

<ClinicalSignificance DateLastEvaluated="1996-04-01">
<ReviewStatus>no assertion criteria provided</ReviewStatus>
<Description>Pathogenic</Description>
</ClinicalSignificance>

<ClinicalSignificance DateLastEvaluated="2016-10-13">
<ReviewStatus>criteria provided, multiple submitters, no conflicts</ReviewStatus>
<Description>Pathogenic/Likely pathogenic</Description>
</ClinicalSignificance>

<ClinicalSignificance DateLastEvaluated="2012-06-07">
<ReviewStatus>no assertion criteria provided</ReviewStatus>
<Description>Conflicting interpretations of pathogenicity</Description>
<Explanation DataSource="ClinVar" Type="public">Pathogenic(1);Uncertain significance(1)</Explanation>
</ClinicalSignificance>

Given the evidence, we converted the significance field into an array of strings which may be parsed out of the Descriptions or Explanation fields.

Varying Delimiters

The delimiters in each field may vary. Currently, the delimiters for Description are , and /. The delimiters for Explanation are ; and /.

VCV File

Example

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<ClinVarVariationRelease xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_variation/variation_archive_1.4.xsd" ReleaseDate="2019-12-31">
<VariationArchive VariationID="431749" VariationName="GRCh37/hg19 1p36.31(chr1:6051187-6158763)" VariationType="copy number gain" DateCreated="2017-08-12" DateLastUpdated="2019-09-10" Accession="VCV000431749" Version="1" RecordType="included" NumberOfSubmissions="0" NumberOfSubmitters="0">
<RecordStatus>current</RecordStatus>
<Species>Homo sapiens</Species>
<IncludedRecord>
<SimpleAllele AlleleID="425239" VariationID="431749">
<GeneList>
<Gene Symbol="KCNAB2" FullName="potassium voltage-gated channel subfamily A regulatory beta subunit 2" GeneID="8514" HGNC_ID="HGNC:6229" Source="calculated" RelationshipType="genes overlapped by variant">
<Location>
<CytogeneticLocation>1p36.31</CytogeneticLocation>
<SequenceLocation Assembly="GRCh38" AssemblyAccessionVersion="GCF_000001405.38" AssemblyStatus="current" Chr="1" Accession="NC_000001.11" start="5992639" stop="6101186" display_start="5992639" display_stop="6101186" Strand="+"/>
<SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25" AssemblyStatus="previous" Chr="1" Accession="NC_000001.10" start="6052357" stop="6161252" display_start="6052357" display_stop="6161252" Strand="+"/>
</Location>
<OMIM>601142</OMIM>
</Gene>
<Gene Symbol="NPHP4" FullName="nephrocystin 4" GeneID="261734" HGNC_ID="HGNC:19104" Source="calculated" RelationshipType="genes overlapped by variant">
<Location>
<CytogeneticLocation>1p36.31</CytogeneticLocation>
<SequenceLocation Assembly="GRCh38" AssemblyAccessionVersion="GCF_000001405.38" AssemblyStatus="current" Chr="1" Accession="NC_000001.11" start="5862810" stop="5992425" display_start="5862810" display_stop="5992425" Strand="-"/>
<SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25" AssemblyStatus="previous" Chr="1" Accession="NC_000001.10" start="5922869" stop="6052532" display_start="5922869" display_stop="6052532" Strand="-"/>
</Location>
<OMIM>607215</OMIM>
</Gene>
</GeneList>
<Name>GRCh37/hg19 1p36.31(chr1:6051187-6158763)</Name>
<VariantType>copy number gain</VariantType>
<Location>
<CytogeneticLocation>1p36.31</CytogeneticLocation>
<SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25" forDisplay="true" AssemblyStatus="previous" Chr="1" Accession="NC_000001.10" start="6051187" stop="6158763" display_start="6051187" display_stop="6158763"/> </Location>
<Interpretations>
<Interpretation NumberOfSubmissions="0" NumberOfSubmitters="0" Type="Clinical significance">
<Description>no interpretation for the single variant</Description>
</Interpretation>
</Interpretations>
<XRefList>
<XRef Type="Interpreted" ID="431733" DB="ClinVar"/>
</XRefList>
</SimpleAllele>
<ReviewStatus>no interpretation for the single variant</ReviewStatus>
<Interpretations>
<Interpretation NumberOfSubmissions="0" NumberOfSubmitters="0" Type="Clinical significance">
<Description>no interpretation for the single variant</Description>
</Interpretation>
</Interpretations>
<SubmittedInterpretationList>
<SCV Title="SUB1895145" Accession="SCV000296057" Version="1"/>
</SubmittedInterpretationList>
<InterpretedVariationList>
<InterpretedVariation VariationID="431733" Accession="VCV000431733" Version="1"/>
</InterpretedVariationList>
</IncludedRecord>
</VariationArchive>
</ClinVarVariationRelease>

Parsing

In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output.

id

<VariationArchive VariationID="431749" VariationName="GRCh37/hg19 1p36.31(chr1:6051187-6158763)" VariationType="copy number gain" DateCreated="2017-08-12" DateLastUpdated="2019-09-10" Accession="VCV000431749" Version="1" RecordType="included" NumberOfSubmissions="0" NumberOfSubmitters="0">

The Acc and Version fields are merged to form the ID (RCV000000001.2)

significance

<ClinVarVariationRelease>
<VariationArchive>
<IncludedRecord>
<SimpleAllele>
<Interpretations>
<Interpretation NumberOfSubmissions="0" NumberOfSubmitters="0" Type="Clinical significance">
<Description>no interpretation for the single variant</Description>
</Interpretation>
</Interpretations>
</SimpleAllele>
</IncludedRecord>
</VariationArchive>
</ClinVarVariationRelease>

May have multiple significances listed.

reviewStatus

<ClinVarVariationRelease>
<VariationArchive>
<IncludedRecord>
<ReviewStatus>no interpretation for the single variant</ReviewStatus>
</IncludedRecord>
</VariationArchive>
</ClinVarVariationRelease>

Known Issues

Known Issues
  • The XML file contains ~1k more entries (out of 162K) than the VCF file
  • The XML file does not have a field indicating that a record is associated with the reference base - something that was present in VCF
  • The XML file contains entries (e.g. RCV000016645 version=1) which have IUPAC ambiguous bases ("R", "Y", "H", -etc.) as their alternate allele

Download URL

ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz

JSON Output

"clinvar":[
{
"id":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"significance":[
"benign"
],
"refAllele":"G",
"altAllele":"A",
"lastUpdatedDate":"2020-03-01",
"isAlleleSpecific":true
},
{
"id":"RCV000030258.4",
"variationId":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"alleleOrigins":[
"germline"
],
"refAllele":"G",
"altAllele":"A",
"phenotypes":[
"Lynch syndrome"
],
"medGenIds":[
"C1333990"
],
"omimIds":[
"120435"
],
"significance":[
"benign"
],
"lastUpdatedDate":"2017-05-01",
"isAlleleSpecific":true
}
]
FieldTypeNotes
idstringClinVar ID
variationIdstringClinVar VCV ID
reviewStatusstringsee possible values below
alleleOriginsstring arraysee possible values below
refAllelestring
altAllelestring
phenotypesstring array
medGenIdsstring arrayMedGen IDs
omimIdsstring arrayOMIM IDs
orphanetIdsstring arrayOrphanet IDs
significancestring arraysee possible values below
lastUpdatedDatestringyyyy-MM-dd
pubMedIdsstring arrayPubMed IDs
isAlleleSpecificbooltrue when the current variant alternate allele matches the ClinVar alternate allele

reviewStatus:

  • no assertion provided
  • no assertion criteria provided
  • criteria provided, single submitter
  • practice guideline
  • classified by multiple submitters
  • criteria provided, conflicting interpretations
  • criteria provided, multiple submitters, no conflicts
  • no interpretation for the single variant

alleleOrigins:

  • unknown
  • other
  • germline
  • somatic
  • inherited
  • paternal
  • maternal
  • de-novo
  • biparental
  • uniparental
  • not-tested
  • tested-inconclusive

significance:

  • uncertain significance
  • not provided
  • benign
  • likely benign
  • likely pathogenic
  • pathogenic
  • drug response
  • histocompatibility
  • association
  • risk factor
  • protective
  • affects
  • conflicting data from submitters
  • other
  • no interpretation for the single variant
  • conflicting interpretations of pathogenicity
- - - - \ No newline at end of file diff --git a/3.14/data-sources/dbsnp-json/index.html b/3.14/data-sources/dbsnp-json/index.html deleted file mode 100644 index 520acd1d..00000000 --- a/3.14/data-sources/dbsnp-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -dbsnp-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

dbsnp-json

"dbsnp":[
"rs1042821"
]
FieldTypeNotes
dbsnpstring arraydbSNP rsIDs
- - - - \ No newline at end of file diff --git a/3.14/data-sources/dbsnp/index.html b/3.14/data-sources/dbsnp/index.html deleted file mode 100644 index 9b35b3db..00000000 --- a/3.14/data-sources/dbsnp/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -dbSNP | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

dbSNP

Overview

dbSNP contains human single nucleotide variations, microsatellites, and small-scale insertions and deletions along with publication, population frequency, molecular consequence, and genomic and RefSeq mapping information for both common variations and clinical mutations.

Publication

Sherry, S.T., Ward, M. and Sirotkin, K. (1999) dbSNP—Database for Single Nucleotide Polymorphisms and Other Classes of Minor Genetic Variation. Genome Res., 9, 677–679.

VCF File

Example

#CHROM  POS ID  REF ALT QUAL    FILTER  INFO
1 10177 rs367896724 A AC . . RS=367896724;RSPOS=10177;dbSNPBuildID=138; \
SSR=0;SAO=0;VP=0x050000020005130026000200;GENEINFO=DDX11L1:100287102;WGT=1; \
VC=DIV;R5;ASP;G5A;G5;KGPhase3;CAF=0.5747,0.4253;COMMON=1; \
TOPMED=0.76728147298674821,0.23271852701325178

Parsing

From the VCF file, we're mainly interested in the following:

  • rsID from the ID field
  • CAF from the INFO field

Global allele extraction

The global major and minor alleles are extracted based on the frequency of the alleles provided in the CAF field. The global minor allele frequency is the second highest value of the CAF comma delimited field (ignoring '.' values).

Tie Breaking: Global Major Allele

If there are two candidates for global major and the reference allele is one of them, we prefer the reference allele.

Tie Breaking: Global Minor Allele

If there are two candidates for global minor and the reference allele is one of them, we prefer the other allele. If the reference allele is not involved, they are chosen arbitrarily.

Equal Allele Frequency Example (2 alleles)

chr1    100 A   C   CAF=0.5,0.5

We will select A to be the global major allele and C to be the global minor allele.

Equal Allele Frequency Example (3 alleles)

chr1    100 A   C,T CAF=0.33,0.33,0.33

We will select A to be the global major allele and either C or T is chosen (arbitrarily) to be the global minor allele.

Equal Allele Frequency in Alternate Alleles

chr1    100 A   C,T CAF=0.2,0.4,0.4

We will select C or T to be arbitrarily assigned to be the global major or global minor allele.

Equal Allele Frequency Between Reference & Alternate Allele

chr1    100 A   C,T CAF=0.2,0.2,0.6

We will select T to be the global major allele and C to be the global minor allele.

Known Issues

Known Issues

If there are multiple entries with different CAF values for the same allele, we use the first CAF value.

Download URL

https://ftp.ncbi.nih.gov/snp/organisms/

JSON Output

"dbsnp":[
"rs1042821"
]
FieldTypeNotes
dbsnpstring arraydbSNP rsIDs
- - - - \ No newline at end of file diff --git a/3.14/data-sources/gnomad-lof-json/index.html b/3.14/data-sources/gnomad-lof-json/index.html deleted file mode 100644 index add572d3..00000000 --- a/3.14/data-sources/gnomad-lof-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -gnomad-lof-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

gnomad-lof-json

"gnomAD":{ 
"pLi":1.00e0,
"pNull":8.94e-40,
"pRec":1.84e-16,
"synZ":-8.44e-2,
"misZ":5.96e-1,
"loeuf":1.13e0
}
FieldTypeNotes
pLifloatprobability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)
pNullfloatprobability of being completely tolerant of loss of function variation (observed = expected)
pRecfloatprobability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)
synZfloatcorrected synonymous Z score
misZfloatcorrected missense Z score
loeuffloatloss of function observed/expected upper bound fraction (LOEUF)
- - - - \ No newline at end of file diff --git a/3.14/data-sources/gnomad-small-variants-json/index.html b/3.14/data-sources/gnomad-small-variants-json/index.html deleted file mode 100644 index 6bbc01e6..00000000 --- a/3.14/data-sources/gnomad-small-variants-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -gnomad-small-variants-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

gnomad-small-variants-json

"gnomad":{ 
"coverage":20,
"allAf":0.190317,
"maleAf":0.193,
"femaleAf": 0.1935,
"afrAf":0.222876,
"amrAf":0.121394,
"easAf":0.239802,
"finAf":0.136833,
"nfeAf":0.181282,
"asjAf":0.258278,
"othAf":0.186094,
"allAn":30796,
"maleAn":15096,
"femaleAn":15700
"afrAn":8664,
"amrAn":832,
"easAn":1618,
"finAn":3486,
"nfeAn":14916,
"asjAn":302,
"othAn":978,
"allAc":5861,
"maleAc":2930,
"femaleAc": 2931,
"afrAc":1931,
"amrAc":101,
"easAc":388,
"finAc":477,
"nfeAc":2704,
"asjAc":78,
"othAc":182,
"allHc":561,
"afrHc":208,
"amrHc":6,
"easHc":42,
"finHc":31,
"nfeHc":242,
"asjHc":13,
"othHc":19,
"maleHc":280,
"femaleHc":281,
"controlsAllAf":0.190317,
"controlsAllAn":30796,
"controlsAllAc":5861,
"lowComplexityRegion":true,
"failedFilter":true
}
FieldTypeNotes
coverageintaverage coverage (non-negative integer values)
allAffloatallele frequency for all populations. Range: 0 - 1.0
maleAffloatallele frequency for male population. Range: 0 - 1.0
femaleAffloatallele frequency for female population. Range: 0 - 1.0
controlsAllAffloatallele frequency for the controls subset. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
maleAcintallele count for male population. Integer.
femaleAcintallele count for female population. Integer.
controlsAllAcintallele count for the controls subset. Integer.
allAnintallele number for all populations. Non-zero integer.
maleAnintallele number for male population. Non-zero integer.
femaleAnintallele number for female population. Non-zero integer.
controlsAllAnintallele number for the controls subset. Non-zero integer.
allHcintcount of homozygous individuals for all populations. Non-negative integer.
maleHcintcount of homozygous individuals for male population. Non-negative integer.
femaleHcintcount of homozygous individuals for female population. Non-negative integer.
afrAffloatallele frequency for the African / African American population. Range: 0 - 1.0
afrAcintallele count for the African / African American population. Integer.
afrAnintallele number for the African / African American population. Non-zero integer.
afrHcintcount of homozygous individuals for African / African American population. Non-negative integer.
amrAffloatallele frequency for the Latino population. Range: 0 - 1.0
amrAcintallele count for the Latino population. Integer.
amrAnintallele number for the Latino population. Non-zero integer.
amrHcintcount of homozygous individuals for Latino population. Non-negative integer.
easAffloatallele frequency for the East Asian population. Range: 0 - 1.0
easAcintallele count for the East Asian population. Integer.
easAnintallele number for the East Asian population. Non-zero integer.
easHcintcount of homozygous individuals for East Asian population. Non-negative integer.
finAffloatallele frequency for the Finnish population. Range: 0 - 1.0
finAcintallele count for the Finnish population. Integer.
finAnintallele number for the Finnish population. Non-zero integer.
finHcintcount of homozygous individuals for Finnish population. Non-negative integer
nfeAffloatallele frequency for the Non-Finnish European population. Range: 0 - 1.0
nfeAcintallele count for the Non-Finnish European population. Integer.
nfeAnintallele number for the Non-Finnish European population. Non-zero integer.
nfeHcintcount of homozygous individuals for Non-Finnish European population. Non-negative integer
othAffloatallele frequency for the Other population. Range: 0 - 1.0
othAcintallele count for the Other population. Integer.
othAnintallele number for the Other population. Non-zero integer.
othHcintcount of homozygous individuals for Other population. Non-negative integer
asjAffloatallele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0
asjAcintallele count for the Ashkenazi Jewish population Integer.
asjAnintallele number for the Ashkenazi Jewish population. Non-zero integer.
asjHcintcount of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer
sasAffloatallele frequency for the South Asian population. Range: 0 - 1.0
sasAcintallele count for the South Asian population Integer.
sasAnintallele number for the South Asian population. Non-zero integer.
sasHcintcount of homozygous individuals for the South Asian population. Non-negative integer.
failedFilterboolTrue if this variant failed any filters (Note: we do not list the failed filters)
lowComplexityRegionboolTrue if this variant is located in a low complexity region.
- - - - \ No newline at end of file diff --git a/3.14/data-sources/gnomad/index.html b/3.14/data-sources/gnomad/index.html deleted file mode 100644 index 983e60ad..00000000 --- a/3.14/data-sources/gnomad/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -gnomAD | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

gnomAD

Overview

The Genome Aggregation Database (gnomAD) is a resource developed by an international coalition of investigators, with the goal of aggregating and harmonizing both exome and genome sequencing data from a wide variety of large-scale sequencing projects, and making summary data available for the wider scientific community.

Small Variants

VCF extraction

We currently extract the following info fields from gnomAD genome and exome VCF files:

##INFO=<ID=AC,Number=A,Type=Integer,Description="Alternate allele count for samples">
##INFO=<ID=AN,Number=A,Type=Integer,Description="Total number of alleles in samples">
##INFO=<ID=nhomalt,Number=A,Type=Integer,Description="Count of homozygous individuals in samples">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Depth of informative coverage for each sample; reads with MQ=255 or with bad mates are filtered">
##INFO=<ID=lcr,Number=0,Type=Flag,Description="Variant falls within a low complexity region">
##INFO=<ID=AC_afr,Number=A,Type=Integer,Description="Alternate allele count for samples of African-American ancestry">
##INFO=<ID=AN_afr,Number=A,Type=Integer,Description="Total number of alleles in samples of African-American ancestry">
##INFO=<ID=AF_afr,Number=A,Type=Float,Description="Alternate allele frequency in samples of African-American ancestry">
##INFO=<ID=nhomalt_afr,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of African-American ancestry">
##INFO=<ID=AC_amr,Number=A,Type=Integer,Description="Alternate allele count for samples of Latino ancestry">
##INFO=<ID=AN_amr,Number=A,Type=Integer,Description="Total number of alleles in samples of Latino ancestry">
##INFO=<ID=nhomalt_amr,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of Latino ancestry">
##INFO=<ID=AC_eas,Number=A,Type=Integer,Description="Alternate allele count for samples of East Asian ancestry">
##INFO=<ID=AN_eas,Number=A,Type=Integer,Description="Total number of alleles in samples of East Asian ancestry">
##INFO=<ID=nhomalt_eas,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of East Asian ancestry">
##INFO=<ID=AC_female,Number=A,Type=Integer,Description="Alternate allele count for female samples">
##INFO=<ID=AN_female,Number=A,Type=Integer,Description="Total number of alleles in female samples">
##INFO=<ID=nhomalt_female,Number=A,Type=Integer,Description="Count of homozygous individuals in female samples">
##INFO=<ID=AC_nfe,Number=A,Type=Integer,Description="Alternate allele count for samples of non-Finnish European ancestry">
##INFO=<ID=AN_nfe,Number=A,Type=Integer,Description="Total number of alleles in samples of non-Finnish European ancestry">
##INFO=<ID=nhomalt_nfe,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of non-Finnish European ancestry">
##INFO=<ID=AC_fin,Number=A,Type=Integer,Description="Alternate allele count for samples of Finnish ancestry">
##INFO=<ID=AN_fin,Number=A,Type=Integer,Description="Total number of alleles in samples of Finnish ancestry">
##INFO=<ID=nhomalt_fin,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of Finnish ancestry">
##INFO=<ID=AC_asj,Number=A,Type=Integer,Description="Alternate allele count for samples of Ashkenazi Jewish ancestry">
##INFO=<ID=AN_asj,Number=A,Type=Integer,Description="Total number of alleles in samples of Ashkenazi Jewish ancestry">
##INFO=<ID=nhomalt_asj,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of Ashkenazi Jewish ancestry">
##INFO=<ID=AC_oth,Number=A,Type=Integer,Description="Alternate allele count for samples of uncertain ancestry">
##INFO=<ID=AN_oth,Number=A,Type=Integer,Description="Total number of alleles in samples of uncertain ancestry">
##INFO=<ID=nhomalt_oth,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of uncertain ancestry">
##INFO=<ID=AC_male,Number=A,Type=Integer,Description="Alternate allele count for male samples">
##INFO=<ID=AN_male,Number=A,Type=Integer,Description="Total number of alleles in male samples">
##INFO=<ID=nhomalt_male,Number=A,Type=Integer,Description="Count of homozygous individuals in male samples">
##INFO=<ID=controls_AC,Number=A,Type=Integer,Description="Alternate allele count for samples in the controls subset">
##INFO=<ID=controls_AN,Number=A,Type=Integer,Description="Total number of alleles in samples in the controls subset">

We also extract the following extra fields from gnomAD exome VCF file:

##INFO=<ID=AC_sas,Number=A,Type=Integer,Description="Alternate allele count for samples of South Asian ancestry">
##INFO=<ID=AN_sas,Number=A,Type=Integer,Description="Total number of alleles in samples of South Asian ancestry">
##INFO=<ID=nhomalt_sas,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of South Asian ancestry">

Computation

Using these, we compute the following:

  • Coverage
  • Allele count, Homozygous count, allele number and allele frequencies for:
    • Global population
    • African/African Americans
    • Admixed Americans
    • Ashkenazi Jews
    • East Asians
    • Finnish
    • Non-Finnish Europeans
    • South Asian
    • Others (population not assigned)
    • Male
    • Female
    • Controls
Note
  • Coverage = DP / AN. Frequencies are computed using AC/AN for each population.
  • Please note that currently there is no genome sequencing data of south asian (SAS) population available in gnomAD.
  • Allele Count, Homozygous count, allele number and allele frequencies for control groups are also provided for the global population.

Merging genomes and exomes

When merging the genomes and exomes, the allele counts and allele numbers will be summed across both of the data sets.

info
  • For GRCh37, Nirvana currently uses gnomAD version 2.1 which contains both genomes and exomes data. Genomes and exomes data are merged in the output.
  • For GRCh38, Nirvana currently uses gnomAD version 3.0 which doesn't contain the exomes data. Therefore, only genomes data are presented in the output.

Filters

The following strategy will be used when there's a conflict in filter status:

Genomes PASSGenomes Filtered
Exomes PASSPASSOnly use exome data
Exomes FilteredOnly use genome dataFiltered

VCF download instructions

https://gnomad.broadinstitute.org/downloads

JSON output

"gnomad":{ 
"coverage":20,
"allAf":0.190317,
"maleAf":0.193,
"femaleAf": 0.1935,
"afrAf":0.222876,
"amrAf":0.121394,
"easAf":0.239802,
"finAf":0.136833,
"nfeAf":0.181282,
"asjAf":0.258278,
"othAf":0.186094,
"allAn":30796,
"maleAn":15096,
"femaleAn":15700
"afrAn":8664,
"amrAn":832,
"easAn":1618,
"finAn":3486,
"nfeAn":14916,
"asjAn":302,
"othAn":978,
"allAc":5861,
"maleAc":2930,
"femaleAc": 2931,
"afrAc":1931,
"amrAc":101,
"easAc":388,
"finAc":477,
"nfeAc":2704,
"asjAc":78,
"othAc":182,
"allHc":561,
"afrHc":208,
"amrHc":6,
"easHc":42,
"finHc":31,
"nfeHc":242,
"asjHc":13,
"othHc":19,
"maleHc":280,
"femaleHc":281,
"controlsAllAf":0.190317,
"controlsAllAn":30796,
"controlsAllAc":5861,
"lowComplexityRegion":true,
"failedFilter":true
}
FieldTypeNotes
coverageintaverage coverage (non-negative integer values)
allAffloatallele frequency for all populations. Range: 0 - 1.0
maleAffloatallele frequency for male population. Range: 0 - 1.0
femaleAffloatallele frequency for female population. Range: 0 - 1.0
controlsAllAffloatallele frequency for the controls subset. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
maleAcintallele count for male population. Integer.
femaleAcintallele count for female population. Integer.
controlsAllAcintallele count for the controls subset. Integer.
allAnintallele number for all populations. Non-zero integer.
maleAnintallele number for male population. Non-zero integer.
femaleAnintallele number for female population. Non-zero integer.
controlsAllAnintallele number for the controls subset. Non-zero integer.
allHcintcount of homozygous individuals for all populations. Non-negative integer.
maleHcintcount of homozygous individuals for male population. Non-negative integer.
femaleHcintcount of homozygous individuals for female population. Non-negative integer.
afrAffloatallele frequency for the African / African American population. Range: 0 - 1.0
afrAcintallele count for the African / African American population. Integer.
afrAnintallele number for the African / African American population. Non-zero integer.
afrHcintcount of homozygous individuals for African / African American population. Non-negative integer.
amrAffloatallele frequency for the Latino population. Range: 0 - 1.0
amrAcintallele count for the Latino population. Integer.
amrAnintallele number for the Latino population. Non-zero integer.
amrHcintcount of homozygous individuals for Latino population. Non-negative integer.
easAffloatallele frequency for the East Asian population. Range: 0 - 1.0
easAcintallele count for the East Asian population. Integer.
easAnintallele number for the East Asian population. Non-zero integer.
easHcintcount of homozygous individuals for East Asian population. Non-negative integer.
finAffloatallele frequency for the Finnish population. Range: 0 - 1.0
finAcintallele count for the Finnish population. Integer.
finAnintallele number for the Finnish population. Non-zero integer.
finHcintcount of homozygous individuals for Finnish population. Non-negative integer
nfeAffloatallele frequency for the Non-Finnish European population. Range: 0 - 1.0
nfeAcintallele count for the Non-Finnish European population. Integer.
nfeAnintallele number for the Non-Finnish European population. Non-zero integer.
nfeHcintcount of homozygous individuals for Non-Finnish European population. Non-negative integer
othAffloatallele frequency for the Other population. Range: 0 - 1.0
othAcintallele count for the Other population. Integer.
othAnintallele number for the Other population. Non-zero integer.
othHcintcount of homozygous individuals for Other population. Non-negative integer
asjAffloatallele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0
asjAcintallele count for the Ashkenazi Jewish population Integer.
asjAnintallele number for the Ashkenazi Jewish population. Non-zero integer.
asjHcintcount of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer
sasAffloatallele frequency for the South Asian population. Range: 0 - 1.0
sasAcintallele count for the South Asian population Integer.
sasAnintallele number for the South Asian population. Non-zero integer.
sasHcintcount of homozygous individuals for the South Asian population. Non-negative integer.
failedFilterboolTrue if this variant failed any filters (Note: we do not list the failed filters)
lowComplexityRegionboolTrue if this variant is located in a low complexity region.

LoF Gene Metrics

Tab delimited file example

gene transcript obs_mis exp_mis oe_mis mu_mis possible_mis obs_mis_pphen exp_mis_pphen oe_mis_pphen possible_mis_pphen obs_syn exp_syn oe_syn mu_syn possible_syn obs_lof mu_lof possible_lof exp_lof pLI pNull pRec oe_lof oe_syn_lower oe_syn_upper oe_mis_lower oe_mis_upper oe_lof_lower oe_lof_upper constraint_flag syn_zmis_z lof_z oe_lof_upper_rank oe_lof_upper_bin oe_lof_upper_bin_6 n_sites classic_caf max_af no_lofs obs_het_lof obs_hom_lof defined p exp_hom_lof classic_caf_afr classic_caf_amr classic_caf_asj classic_caf_eas classic_caf_fin classic_caf_nfe classic_caf_oth classic_caf_sas p_afr p_amr p_asj p_eas p_fin p_nfep_oth p_sas transcript_type gene_id transcript_level cds_length num_coding_exons gene_type gene_length exac_pLI exac_obs_lof exac_exp_lof exac_oe_lof brain_expression chromosome start_positionend_position
MED13 ENST00000397786 871 1.1178e+03 7.7921e-01 5.5598e-05 14195 314 5.2975e+02 5.9273e-01 6708 422 3.8753e+02 1.0890e+00 1.9097e-05 4248 0 4.9203e-06 1257 9.8429e+01 1.0000e+00 8.9436e-40 1.8383e-16 0.0000e+00 1.0050e+00 1.1800e+00 7.3600e-01 8.2400e-01 0.0000e+00 3.0000e-02 -1.3765e+00 2.6232e+00 9.1935e+00 0 0 0 2 1.2058e-05 8.0492e-06 124782 3 0 124785 1.2021e-05 1.8031e-05 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 9.2812e-05 8.8571e-06 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 9.2760e-05 8.8276e-06 0.0000e+00 0.0000e+00 protein_coding ENSG00000108510 2 6522 30 protein_coding 122678 1.0000e+00 0 6.4393e+01 0.0000e+00 NA 17 60019966 60142643

JSON key to TSV column mapping

JSON keyTSV columnDescription
pLipLIprobability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)
pNullpNullprobability of being completely tolerant of loss of function variation (observed = expected)
pRecpRecprobability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)
synZsyn_zcorrected synonymous Z score
misZmis_zcorrected missense Z score
loeufoe_lof_upperloss of function observed/expected upper bound fraction (LOEUF)

Gene symbol update

The input file provides Ensembl gene ids for each entry. We observed that they were unique while gene symbols may be repeated (multiple lines may have the same gene symbol). Since Ensembl gene Ids are more stable, and Nirvana transcript cache data contains Ensembl gene ids, we use these ids to extract the gene symbols from the transcript cache. For example, if ENSG0001 has gene symbol GENE1 in the input but Nirvana cache say ENSG0001 maps to GENE2, we use GENE2 as the gene symbol for that entry.

Conflict resolution

gnomAD uses Ensembl GeneID as unique identifiers in the source file but Nirvana uses HGNC gene symbols. Multiple Ensembl GeneIDs can map to the same HGNC symbol and therefore may result is conflict.

MDGA2   ENST00000426342 306 4.0043e+02  7.6419e-01  2.1096e-05  4724    78  1.6525e+02  4.7202e-01  1923    125 1.3737e+02  9.0993e-01  7.1973e-06  1413    4   2.0926e-06  453 3.8316e+01  9.9922e-01  8.6490e-12  7.8128e-04  1.0440e-01  7.8600e-01  1.0560e+00  6.9500e-01  8.4000e-01  5.0000e-02  2.3900e-01      8.2988e-01  1.6769e+00  5.1372e+00  1529    0   0   7   2.8103e-05  4.0317e-06  124784  7   0   124791  2.8047e-05  9.8167e-05  0.0000e+00  2.8962e-05  0.0000e+00  0.0000e+00  0.0000e+00  3.5391e-05  1.6672e-04  3.2680e-05  0.0000e+00  2.8962e-05  0.0000e+00  0.0000e+00  0.0000e+00  3.5308e-05  1.6492e-04  3.2678e-05  protein_coding  ENSG00000139915 2   2181    13  protein_coding  835332  9.9322e-01  3   2.7833e+01  1.0779e-01  NA  14  47308826    48144157
MDGA2 ENST00000439988 438 5.5311e+02 7.9189e-01 2.9490e-05 6608 105 2.0496e+02 5.1228e-01 2386 180 1.9491e+02 9.2351e-01 9.8371e-06 2048 11 2.8074e-06 627 5.1882e+01 6.6457e-01 5.5841e-10 3.3543e-01 2.1202e-01 8.1700e-01 1.0450e+00 7.3100e-01 8.5700e-01 1.3200e-01 3.5100e-01 8.3940e-01 1.7393e+00 5.2595e+00 2989 1 0 9 3.6173e-05 4.0463e-06 124782 9 0 124791 3.6061e-05 1.6228e-04 6.4986e-05 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 4.4275e-05 1.6672e-04 3.2680e-05 6.4577e-05 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 4.4135e-05 1.6492e-04 3.2678e-05 protein_coding ENSG00000272781 3 3075 17 protein_coding 832866 NA NA NA NA NA 14 47311134 48143999

In such cases, Nirvana chooses the entry with the smallest "LOEUF" value. The reason for choosing this value can be highlighted by the following table:

LOEUF decileHaplo-insufficientAutosomal DominantAutosomal RecessiveOlfactory Genes
0-10%104140360
10-20%47128721
20-30%17861120
30-40%8801734
40-50%7652068
50-60%4542076
60-70%04615418
70-80%24912049
80-90%0345896
90-100%02640174
Note

List of genes with conflicting entries

MDGA2:
{"pLI":9.99e-1,"pRec":7.81e-4,"pNull":8.65e-12,"synZ":8.30e-1,"misZ":1.68e0,"loeuf":2.39e-1}
{"pLI":6.65e-1,"pRec":3.35e-1,"pNull":5.58e-10,"synZ":8.39e-1,"misZ":1.74e0,"loeuf":3.51e-1}
CRYBG3:
{"pLI":9.27e-5,"pRec":1.00e0,"pNull":1.88e-7,"synZ":1.82e0,"misZ":4.68e-1,"loeuf":4.93e-1}
{"pLI":2.69e-4,"pRec":1.00e0,"pNull":1.20e-4,"synZ":2.63e0,"misZ":9.80e-1,"loeuf":5.98e-1}
CHTF8:
{"pLI":8.29e-1,"pRec":1.67e-1,"pNull":3.21e-3,"synZ":1.94e0,"misZ":9.48e-1,"loeuf":5.13e-1}
{"pLI":3.73e-1,"pRec":5.84e-1,"pNull":4.29e-2,"synZ":3.33e-1,"misZ":2.91e-1,"loeuf":9.92e-1}
SEPT1:
{"pLI":6.77e-8,"pRec":8.90e-1,"pNull":1.10e-1,"synZ":1.58e-1,"misZ":1.57e0,"loeuf":9.68e-1}
{"pLI":1.96e-8,"pRec":6.71e-1,"pNull":3.29e-1,"synZ":1.68e-1,"misZ":1.41e0,"loeuf":1.08e0}
ARL14EPL:
{"pLI":3.48e-2,"pRec":8.38e-1,"pNull":1.28e-1,"synZ":3.56e-1,"misZ":-1.87e-1,"loeuf":1.23e0}
{"pLI":3.23e-2,"pRec":8.29e-1,"pNull":1.38e-1,"synZ":1.15e0,"misZ":-4.05e-1,"loeuf":1.26e0}
UGT2A1:
{"pLI":2.90e-13,"pRec":1.40e-1,"pNull":8.60e-1,"synZ":-1.29e0,"misZ":-1.77e0,"loeuf":1.18e0}
{"pLI":3.88e-17,"pRec":2.87e-3,"pNull":9.97e-1,"synZ":-8.00e-1,"misZ":-1.40e0,"loeuf":1.53e0}
LTB4R2:
{"pLI":4.39e-4,"pRec":6.71e-1,"pNull":3.29e-1,"synZ":-5.24e-1,"misZ":-2.96e-1,"loeuf":1.40e0}
{"pLI":1.38e-5,"pRec":4.12e-1,"pNull":5.88e-1,"synZ":-4.58e-1,"misZ":-2.02e-1,"loeuf":1.54e0}
CDRT1:
{"pLI":4.98e-14,"pRec":5.31e-1,"pNull":4.69e-1,"synZ":8.18e-1,"misZ":6.57e-1,"loeuf":1.00e0}
{"pLI":3.50e-3,"pRec":6.37e-1,"pNull":3.59e-1,"synZ":4.89e-1,"misZ":6.90e-1,"loeuf":1.63e0}
MUC3A:
{"pLI":1.48e-10,"pRec":5.76e-1,"pNull":4.24e-1,"synZ":5.81e-2,"misZ":-6.01e-1,"loeuf":1.06e0}
{"pLI":4.03e-1,"pRec":4.79e-1,"pNull":1.17e-1,"synZ":4.05e-2,"misZ":-1.60e-1,"loeuf":1.70e0}
COG8:
{"pLI":2.97e-9,"pRec":5.04e-1,"pNull":4.96e-1,"synZ":-1.35e0,"misZ":-9.37e-2,"loeuf":1.13e0}
{"pLI":2.31e-3,"pRec":5.47e-1,"pNull":4.50e-1,"synZ":-4.94e-1,"misZ":-1.48e-1,"loeuf":1.76e0}
AC006486.1:
{"pLI":9.37e-1,"pRec":6.27e-2,"pNull":2.47e-4,"synZ":1.44e0,"misZ":2.12e0,"loeuf":3.41e-1}
{"pLI":1.14e-1,"pRec":6.16e-1,"pNull":2.70e-1,"synZ":-7.57e-2,"misZ":8.33e-2,"loeuf":1.84e0}
AL645922.1:
{"pLI":4.67e-16,"pRec":1.00e0,"pNull":4.15e-5,"synZ":7.99e-1,"misZ":1.61e0,"loeuf":6.92e-1}
{"pLI":1.60e-3,"pRec":2.78e-1,"pNull":7.21e-1,"synZ":-7.30e-2,"misZ":3.21e-1,"loeuf":1.96e0}
NBPF20:
{"pLI":1.42e-7,"pRec":3.40e-2,"pNull":9.66e-1,"synZ":-1.86e0,"misZ":-2.88e0,"loeuf":1.97e0}
{"pLI":1.92e-22,"pRec":7.96e-6,"pNull":1.00e0,"synZ":-9.73e0,"misZ":-7.67e0,"loeuf":1.97e0}
PRAMEF11:
{"pLI":6.16e-4,"pRec":7.42e-1,"pNull":2.58e-1,"synZ":-4.02e0,"misZ":-3.69e0,"loeuf":1.31e0}
{"synZ":-3.33e0,"misZ":-2.59e0}
FAM231D:
{"synZ":-1.98e0,"misZ":-1.44e0}
{"synZ":1.07e0,"misZ":3.13e-1}

Conflict resolution

  • Pick the entry with the lowest LOEUF score
  • If the same, pick the lowest pLI
  • Otherwise pick the entry with the max absolute value of synZ + misZ

Download URL

https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz

JSON output

"gnomAD":{ 
"pLi":1.00e0,
"pNull":8.94e-40,
"pRec":1.84e-16,
"synZ":-8.44e-2,
"misZ":5.96e-1,
"loeuf":1.13e0
}
FieldTypeNotes
pLifloatprobability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)
pNullfloatprobability of being completely tolerant of loss of function variation (observed = expected)
pRecfloatprobability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)
synZfloatcorrected synonymous Z score
misZfloatcorrected missense Z score
loeuffloatloss of function observed/expected upper bound fraction (LOEUF)
- - - - \ No newline at end of file diff --git a/3.14/data-sources/mito-heteroplasmy/index.html b/3.14/data-sources/mito-heteroplasmy/index.html deleted file mode 100644 index 795adbc3..00000000 --- a/3.14/data-sources/mito-heteroplasmy/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Mitochondrial Heteroplasmy | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

Mitochondrial Heteroplasmy

Overview

Mitochondrial Heteroplasmy is an aggregate population data set that characterizes the amount of heteroplasmy observed for each variant. The latest version of this data set is based on re-processed 1000 Genomes Project data using the Illumina DRAGEN pipeline.

JSON File

Example

{
"T:C":{
"ad":[
1,
1,
1,
1,
1,
1
],
"allele_type":"alt",
"vrf":[
0.002369668246445498,
0.0024937655860349127,
0.0016129032258064516,
0.0025188916876574307,
0.0022935779816513763,
0.002008032128514056
],
"vrf_stats":{
"kurtosis":38.889891511122556,
"max":0.0025188916876574307,
"mean":5.4052190471990743e-05,
"min":0.0,
"nobs":246,
"skewness":6.346664692283075,
"stdev":0.0003461416264750575,
"variance":1.1981402557879823e-07
}
}
}

Parsing

From the JSON file, we're mainly interested in the following keys:

  • variant (i.e. T:C)
  • ad
  • vrf
  • nobs (number of observations)
Adjusting for null observations

The nobs value indicates how many observations were made. Ideally this would have been represented in the ad and vrf arrays, but it's left as an exercise for the reader.

Binning VRF Data

The vrf (variant read frequency) array in the JSON object above is paired with with the ad array (allele depths) shown above.

The data in the JSON object has a crazy number of significant digits. This means that as the number of samples increase, this array will grow. To make this more future-proof, Nirvana bins everything according to 0.1% increments.

With the binned data, we end up having 775 distinct vrf values in the entire JSON file. This also means that the variant with the largest number of VRFs would originally have 246 entries, but due to binning this will decrease to 143.

Pre-processing the Data

The JSON file is converted into a small TSV file that is embedded in Nirvana. Here is an example of the TSV file:

#CHROM  POS REF ALT VRF_BINS    VRF_COUNTS
chrM 1 G . 0.981,0.987,0.988,0.989,0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.998,0.999 1,2,2,4,7,8,11,19,43,60,48,64,499,1736
chrM 2 A . 0.981,0.987,0.988,0.989,0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.998,0.999 1,2,2,4,7,8,11,19,43,60,48,64,499,1736

Algorithm

Nirvana will calculate mitochondrial heteroplasmy data for every sample in the VCF. Using the computed VRF for each sample, we compute where in the empirical mitochondrial heteroplasmy distribution that VRF occurs and express that as a percentile.

Percentiles

Nirvana uses the statistical definition of percentile (indicating the value below which a given percentage of observations in a group of observations falls). Unless the sample's VRF is higher than all the VRFs represented in the distribution, the range will be [0, 1).

Download URL

Unavailable

The original data set is only available internally at Illumina at the moment.

JSON Output

"samples":[
{
"genotype":"0/1",
"variantFrequencies":[
0.333,
0.5
],
],
"alleleDepths":[
10,
20,
30
],
"heteroplasmyPercentile":[
23.13,
12.65
]
}
]
FieldTypeNotes
heteroplasmyPercentilefloat arrayone percentile for each variant frequency (each alternate allele)
- - - - \ No newline at end of file diff --git a/3.14/data-sources/mitomap-small-variants-json/index.html b/3.14/data-sources/mitomap-small-variants-json/index.html deleted file mode 100644 index c58c6e5f..00000000 --- a/3.14/data-sources/mitomap-small-variants-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -mitomap-small-variants-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

mitomap-small-variants-json

"mitomap":[ 
{
"refAllele":"G",
"altAllele":"A",
"diseases":[
"Bipolar disorder",
"Melanoma"
],
"hasHomoplasmy":false,
"hasHeteroplasmy":true,
"status":"Reported",
"clinicalSignificance":"confirmed pathogenic",
"scorePercentile":83.30,
"numGenBankFullLengthSeqs":2,
"pubMedIds":["2316527","6299878","6301949"],
"isAlleleSpecific":true
}
]
FieldTypeNotes
refAllelestring
altAllelestring
diseasesstring arrayassociated diseases
hasHomoplasmyboolean
hasHeteroplasmyboolean
statusstringrecord status
clinicalSignificancestringpredicted pathogenicity
scorePercentilefloatMitoTIP score
numGenBankFullLengthSeqsinteger# of GenBank full-length sequences
pubMedIdsstring array
isAlleleSpecificbooleantrue when the current variant alternate allele matches the MITOMAP alternate allele
- - - - \ No newline at end of file diff --git a/3.14/data-sources/mitomap-structural-variants-json/index.html b/3.14/data-sources/mitomap-structural-variants-json/index.html deleted file mode 100644 index 5fdb2e56..00000000 --- a/3.14/data-sources/mitomap-structural-variants-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -mitomap-structural-variants-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

mitomap-structural-variants-json

"mitomap":[ 
{
"chromosome":"MT",
"begin":"3166",
"end":"14152",
"variantType":"deletion",
"reciprocalOverlap":0.18068,
"annotationOverlap":0.42405
}
]
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring array
reciprocalOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
annotationOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
- - - - \ No newline at end of file diff --git a/3.14/data-sources/mitomap/index.html b/3.14/data-sources/mitomap/index.html deleted file mode 100644 index 9c8e6d2e..00000000 --- a/3.14/data-sources/mitomap/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -MITOMAP | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

MITOMAP

Overview

MITOMAP provides a compendium of polymorphisms and mutations in human mitochondrial DNA.

Publication

Lott, M.T., Leipzig, J.N., Derbeneva, O., Xie, H.M., Chalkia, D., Sarmady, M., Procaccio, V., and Wallace, D.C. mtDNA variation and analysis using MITOMAP and MITOMASTER. Current Protocols in Bioinformatics 1(123):1.23.1-26 (2013). http://www.mitomap.org

Scraping HTML Pages

Example

MITOMAP is unique in that it doesn't offer the data in a downloadable format. As a result, the annotation content in Nirvana is scraped from the following MITOMAP pages:

  1. mtDNA Control Region Sequence Variants
  2. mtDNA Coding Region & RNA Sequence Variants
  3. Reported Mitochondrial DNA Base Substitution Diseases: rRNA/tRNA mutations
  4. Reported Mitochondrial DNA Base Substitution Diseases: Coding and Control Region Point Mutations
  5. Reported mtDNA Deletions
  6. mtDNA Simple Insertions

Parsing

Here's what the HTML code looks like:

["582","<a href='/MITOMAP/GenomeLoci#MTTF'>MT-TF</a>","Mitochondrial myopathy","T582C","tRNA Phe","-","+","Reported","<span style='display:inline-block;white-space:nowrap;'><a href='/cgi-bin/mitotip?pos=582&alt=C&quart=2'><u>72.90%</u></a> <i class='fa fa-arrow-up' style='color:orange' aria-hidden='true'></i></span>","0","<a href='/cgi-bin/print_ref_list?refs=90165,91590&title=RNA+Mutation+T582C' target='_blank'>2</a>"],
["583","<a href='/MITOMAP/GenomeLoci#MTTF'>MT-TF</a>","MELAS / MM & EXIT","G583A","tRNA Phe","-","+","Cfrm","<span style='display:inline-block;white-space:nowrap;'><a href='/cgi-bin/mitotip?pos=583&alt=A&quart=0'><u>93.10%</u></a> <i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i><i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i><i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i></span>","0","<a href='/cgi-bin/print_ref_list?refs=2066,90532,91590&title=RNA+Mutation+G583A' target='_blank'>3</a>"],

We're mainly interested in the following columns (numbers indicate the HTML page above):

  • Position1,2,3,4
  • Disease3,4
  • Nucleotide Change1,2
  • Allele3,4
  • Homoplasmy3,4
  • Heteroplasmy3,4
  • Status3,4
  • MitoTIP3,4
  • GB Seqs FL(CR)1,2,3,4
  • Deletion Junction5
  • Insert (nt)6
  • Insert Point (nt)6
  • References/Curated References1,2,3,4
MitoTIP

The MitoTIP information is used to populate the clinicalSignificance and scorePercentile JSON keys. The "frequency alert" entries are skipped since it's not directly relevant to clinical significance.

Left alignment

Many of the variants in MITOMAP have not been normalized. As part of our import procedure, we left align all insertions and deletions.

Variant Enumeration

Sometimes MITOMAP provides data that indicates that multiple values have been observed. Some examples of this are C-C(2-8) and A-AC or ACC. Alternate alleles containing IUPAC ambiguity codes are similarly enumerated.

Inversions

MITOMAP inversions are currently treated as MNVs.

Allele Parsing

The following MITOMAP allele parsing conventions are supported:

  • C123T
  • 16021_16022del
  • 8042del2
  • C9537insC
  • 3902_3908invACCTTGC
  • A-AC or ACC
  • C-C(2-8)
  • 8042delAT

PostgreSQL Dump File

Example

COPY mitomap.reference (id, authors, title, publication, editors, volume, number, pages, date, city, publisher, keywords, abstract, nlmid) FROM stdin;
1 Albring, M., Griffith, J. and Attardi, G. Association of a protein structure of probable membrane derivation with HeLa cell mitochondrial DNA near its origin of replication Proceedings of the National Academy of Sciences of the United States of America . 74 4 1348-1352 1977 . . Deoxyribonucleoproteins; DNA Replication; DNA, Mitochondrial; Hela Cells; Membrane Proteins; Microscopy, Electron; Molecular Weight; Neoplasm Proteins; Protein Binding Almost all (about 95 percent) of the mitochondrial DNA molecules released by Triton X-100 lysis of HeLa cell mitochondria in the presence of 0.15 M salt are associated with a single protein-containing structure varying in appearance between a 10-20 nm knob and a 100-500 nm membrane-like patch. Analysis by high resolution electron microscopy and by polyacrylamide gel electrophoresis after cleavage of mitochondrial DNA with the endonucleases EcoRI, HindIII, and Hpa II has shown that the protein structure is attached to the DNA in the region of the D-loop, and probably near the origin of mitochondrial DNA replication. The data strongly suggest that HeLa cell mitochondrial DNA is attached in vivo to the inner mitochondrial membrane at or near the origin of replication, and that a membrane fragment of variable size remains associated with the DNA during the isolation. After sodium dodecyl sulfate extraction of mitochondrial DNA, a small 5-10 nm protein is found at the same site on a fraction of the mitochondrial DNA molecules. 266177
2 Anderson, S., Bankier, A.T., Barrell, B.G., de Bruijn, M.H., Coulson, A.R., Drouin, J., Eperon, I.C., Nierlich, D.P., Roe, B.A., Sanger, F., Schreier, P.H., Smith, A.J., Staden, R., Young, I.G. Sequence and organization of the human mitochondrial genome Nature . 290 5806 457-465 1981 . . Base Sequence; Codon; DNA Replication; mtDNA; Evolution; Genes, Structural; Human; Nucleic Acid Precursors; Peptide Chain Initiation; Peptide Chain Termination; RNA, Ribosomal; RNA, Transfer; Transcription, Genetic The complete sequence of the 16,569-base pair human mitochondrial genome is presented. The genes for the 12S and 16S rRNAs, 22 tRNAs, cytochrome c oxidase subunits I, II and III, ATPase subunit 6, cytochrome b and eight other predicted protein coding genes have been located. The sequence shows extreme economy in that the genes have none or only a few noncoding bases between them, and in many cases the termination codons are not coded in the DNA but are created post- transcriptionally by polyadenylation of the mRNAs. 7219534

Parsing

From the PostgreSQL dump file, we're interested in parsing the mapping between reference IDs and the PubMed IDs:

  • id
  • nlmid
Why not use the PostgreSQL file for everything?

Ideally we would use this file for parsing all of our data, but the schema contains 80+ tables and we haven't invested the time yet to see how the tables are linked together to produce the 6 main HTML pages that we're interested in.

Known Issues

Duplicated records

Multiple records describing the same nucleotide change are merged into the same record. If any conflicting information is found (homoplasmy, heteroplasmy, status, clinical significance, score percentile, end coordinate, variant type), an exception is thrown.

  • For diseases and PubMed IDs, we take the union of the values in the duplicated records.
  • For full length GenBank sequences, we take the largest number from each of the duplicated records since it provides the strongest evidence for this variant.
Skipped records

Records that represent an alternate notation of the original variant are skipped. Similarly some variants with confusing alleles (T961delT+ / -C(n)ins) are also skipped.

Download URLs

JSON Output

Small Variants

"mitomap":[ 
{
"refAllele":"G",
"altAllele":"A",
"diseases":[
"Bipolar disorder",
"Melanoma"
],
"hasHomoplasmy":false,
"hasHeteroplasmy":true,
"status":"Reported",
"clinicalSignificance":"confirmed pathogenic",
"scorePercentile":83.30,
"numGenBankFullLengthSeqs":2,
"pubMedIds":["2316527","6299878","6301949"],
"isAlleleSpecific":true
}
]
FieldTypeNotes
refAllelestring
altAllelestring
diseasesstring arrayassociated diseases
hasHomoplasmyboolean
hasHeteroplasmyboolean
statusstringrecord status
clinicalSignificancestringpredicted pathogenicity
scorePercentilefloatMitoTIP score
numGenBankFullLengthSeqsinteger# of GenBank full-length sequences
pubMedIdsstring array
isAlleleSpecificbooleantrue when the current variant alternate allele matches the MITOMAP alternate allele

Structural Variants

"mitomap":[ 
{
"chromosome":"MT",
"begin":"3166",
"end":"14152",
"variantType":"deletion",
"reciprocalOverlap":0.18068,
"annotationOverlap":0.42405
}
]
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring array
reciprocalOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
annotationOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
- - - - \ No newline at end of file diff --git a/3.14/data-sources/omim-json/index.html b/3.14/data-sources/omim-json/index.html deleted file mode 100644 index 3baf24fd..00000000 --- a/3.14/data-sources/omim-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -omim-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

omim-json

"omim":[ 
{
"mimNumber":600678,
"geneName":"MutS, E. coli, homolog of, 6",
"description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",
"phenotypes":[
{
"mimNumber":614350,
"phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",
"description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal dominant"
]
},
{
"mimNumber":608089,
"phenotype":"Endometrial cancer, familial",
"mapping":"molecular basis of the disorder is known"
},
{
"mimNumber":276300,
"phenotype":"Mismatch repair cancer syndrome",
"description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal recessive"
],
"comments" : [
"contribute to susceptibility to multifactorial disorders or to susceptibility to infection",
"unconfirmed or possibly spurious mapping"
]
}
]
}
]
FieldTypeNotes
mimNumberintOMIM ID for gene
geneNamestringgene name
descriptionstring
phenotypesobject arraysee Phenotype entry below

Phenotype

FieldTypeNotes
mimNumberint
phenotypestring
descriptionstring
mappingstringsee possible values below
inheritancestring arraysee possible values below
commentsstring arraysee possible values below

Mapping

  1. disorder was positioned by mapping of the wild type gene
  2. disease phenotype itself was mapped
  3. molecular basis of the disorder is known
  4. disorder is a chromosome deletion or duplication syndrome

Inheritance

  • autosomal recessive
  • autosomal dominant

Comments

  • contributes to the susceptibility to multifactorial disorders
  • variations that lead to apparently abnormal laboratory test values
  • unconfirmed mapping
- - - - \ No newline at end of file diff --git a/3.14/data-sources/omim/index.html b/3.14/data-sources/omim/index.html deleted file mode 100644 index 50c12abc..00000000 --- a/3.14/data-sources/omim/index.html +++ /dev/null @@ -1,23 +0,0 @@ - - - - - - - -OMIM | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

OMIM

Overview

OMIM is a comprehensive, authoritative compendium of human genes and genetic phenotypes that is freely available and updated daily.

Publications

Amberger JS, Bocchini CA, Scott AF, Hamosh A. OMIM.org: leveraging knowledge across phenotype-gene relationships. Nucleic Acids Res. 2019 Jan 8;47(D1):D1038-D1043. doi:10.1093/nar/gky1151. PMID: 30445645.

Amberger JS, Bocchini CA, Schiettecatte FJM, Scott AF, Hamosh A. OMIM.org: Online Mendelian Inheritance in Man (OMIM®), an online catalog of human genes and genetic disorders. Nucleic Acids Res. 2015 Jan;43(Database issue):D789-98. PMID: 25428349.

Parse OMIM data

Nirvana uses gene symbols as the gene identifiers internally. To generate the OMIM database, we first map the MIM numbers, which are the primary identifiers used by OMIM, to gene symbols supported by Nirvana. Please note that there can be multiple MIM numbers mapped to one gene symbol. Only MIM numbers successfully mapped to a Nirvana gene symbol are further processed. The OMIM API is used to fetch all the information associated with a gene MIM number, except the gene symbols.

mim2gene.txt

This mim2gene.txt (http://omim.org/static/omim/data/mim2gene.txt) file provides the mapping between MIM numbers and gene symbols. An example of this file is given below:

# MIM Number    MIM Entry Type (see FAQ 1.3 at https://omim.org/help/faq)   Entrez Gene ID (NCBI)   Approved Gene Symbol (HGNC) Ensembl Gene ID (Ensembl)
100050 predominantly phenotypes
100070 phenotype 100329167
100100 phenotype
100200 predominantly phenotypes
100300 phenotype
100500 moved/removed
100600 phenotype
100640 gene 216 ALDH1A1 ENSG00000165092
100650 gene/phenotype 217 ALDH2 ENSG00000111275
100660 gene 218 ALDH3A1 ENSG00000108602
100670 gene 219 ALDH1B1 ENSG00000137124
100675 predominantly phenotypes
100678 gene 39 ACAT2 ENSG00000120437

The information in the "Entrez Gene ID (NCBI)", "Approved Gene Symbol (HGNC)" and "Ensembl Gene ID (Ensembl)" columns are used to find the proper gene symbol supported by Nirvana, which may or may not be the same as the gene symbol listed here.

OMIM API

Nirvana retrieves the OMIM annotations from the OMIM API JSON responses. The "entry" handler is used to fetch all the annotations associated with a given OMIM gene. A sample JSON response from the API is provided there.

{
"omim": {
"version": "1.0",
"entryList": [
{
"entry": {
"prefix": "*",
"mimNumber": 100640,
"status": "live",
"titles": {
"preferredTitle": "ALDEHYDE DEHYDROGENASE 1 FAMILY, MEMBER A1; ALDH1A1",
"alternativeTitles": "ALDEHYDE DEHYDROGENASE 1; ALDH1;;\nACETALDEHYDE DEHYDROGENASE 1;;\nALDH, LIVER CYTOSOLIC;;\nRETINAL DEHYDROGENASE 1; RALDH1"
},
"textSectionList": [
{
"textSection": {
"textSectionName": "description",
"textSectionTitle": "Description",
"textSectionContent": "The ALDH1A1 gene encodes a liver cytosolic isoform of acetaldehyde dehydrogenase ({EC 1.2.1.3}), an enzyme involved in the major pathway of alcohol metabolism after alcohol dehydrogenase (ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650}), variation in which has been implicated in different responses to alcohol ingestion.\n\nALDH1 is associated with a low Km for NAD, a high Km for acetaldehyde, and is strongly inactivated by disulfiram. ALDH2 is associated with a high Km for NAD, and low Km for acetaldehyde, and is insensitive to inhibition by disulfiram ({4:Hsu et al., 1985})."
}
}
],
"geneMap": {
"sequenceID": 7709,
"chromosome": 9,
"chromosomeSymbol": "9",
"chromosomeSort": 225,
"chromosomeLocationStart": 72900670,
"chromosomeLocationEnd": 72953052,
"transcript": "ENST00000297785.7",
"cytoLocation": "9q21",
"computedCytoLocation": "9q21.13",
"mimNumber": 100640,
"geneSymbols": "ALDH1A1",
"geneName": "Aldehyde dehydrogenase-1 family, member A1, soluble",
"mappingMethod": "REa, A",
"confidence": "P",
"mouseGeneSymbol": "Aldh1a1",
"mouseMgiID": "MGI:1353450",
"geneInheritance": null
},
"externalLinks": {
"geneIDs": "216",
"hgncID": "402",
"ensemblIDs": "ENSG00000165092,ENST00000297785.8",
"approvedGeneSymbols": "ALDH1A1",
"ncbiReferenceSequences": "1519246465",
"proteinSequences": "194378740,211947843,2183299,178400,119582947,119582948,178372,40807656,194375548,30582681,209402710,4262707,194739599,4261625,178394,261487497,16306661,21361176,32815082,118495,62089228",
"uniGenes": "Hs.76392",
"swissProtIDs": "P00352",
"decipherGene": false,
"umlsIDs": "C1412333",
"gtr": true,
"cmgGene": false,
"keggPathways": true,
"gwasCatalog": false,

}
}
},
{
"entry": {
"prefix": "*",
"mimNumber": 102560,
"status": "live",
"titles": {
"preferredTitle": "ACTIN, GAMMA-1; ACTG1",
"alternativeTitles": "ACTIN, GAMMA; ACTG;;\nCYTOSKELETAL GAMMA-ACTIN;;\nACTIN, CYTOPLASMIC, 2"
},
"textSectionList": [
{
"textSection": {
"textSectionName": "description",
"textSectionTitle": "Description",
"textSectionContent": "Actins are a family of highly conserved cytoskeletal proteins that play fundamental roles in nearly all aspects of eukaryotic cell biology. The ability of a cell to divide, move, endocytose, generate contractile force, and maintain shape is reliant upon functional actin-based structures. Actin isoforms are grouped according to expression patterns: muscle actins predominate in striated and smooth muscle (e.g., ACTA1, {102610}, and ACTA2, {102620}, respectively), whereas the 2 cytoplasmic nonmuscle actins, gamma-actin (ACTG1) and beta-actin (ACTB; {102630}), are found in all cells ({13:Sonnemann et al., 2006})."
}
}
],
"geneMap": {
"sequenceID": 13666,
"chromosome": 17,
"chromosomeSymbol": "17",
"chromosomeSort": 947,
"chromosomeLocationStart": 81509970,
"chromosomeLocationEnd": 81512798,
"transcript": "ENST00000331925.7",
"cytoLocation": "17q25.3",
"computedCytoLocation": "17q25.3",
"mimNumber": 102560,
"geneSymbols": "ACTG1, DFNA20, DFNA26, BRWS2",
"geneName": "Actin, gamma-1",
"mappingMethod": "REa, A, Fd",
"confidence": "C",
"mouseGeneSymbol": "Actg1",
"mouseMgiID": "MGI:87906",
"geneInheritance": null,
"phenotypeMapList": [
{
"phenotypeMap": {
"mimNumber": 102560,
"phenotype": "Baraitser-Winter syndrome 2",
"phenotypeMimNumber": 614583,
"phenotypicSeriesNumber": "PS243310",
"phenotypeMappingKey": 3,
"phenotypeInheritance": "Autosomal dominant"
}
},
{
"phenotypeMap": {
"mimNumber": 102560,
"phenotype": "Deafness, autosomal dominant 20/26",
"phenotypeMimNumber": 604717,
"phenotypicSeriesNumber": "PS124900",
"phenotypeMappingKey": 3,
"phenotypeInheritance": "Autosomal dominant"
}
}
]
}
}
}
]
}
}

Content from the OMIM API JSON response is reorganized as shown in the Nirvana JSON Output

Mappings between the Nirvana JSON output and OMIM JSON API are listed in the table below:

Nirvana JSON key chainOMIM API JSON key chain
omim:mimNumberomim:entryList:entry:mimNumber
omim:geneNameomim:entryList:entry:geneMap:geneName
omim:descriptionomim:entryList:entry:textSectionList:textSection:textSectionContent
omim:phenotypes:mimNumberomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:mimNumber
omim:phenotypes:phenotypeomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotype
omim:phenotypes:descriptionomim:entryList:entry:textSectionList:textSection:textSectionContent
omim:phenotypes:mappingomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotypeMappingKey (see mapping below)
omim:phenotypes:inheritancesomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotypeInheritance
omim:phenotypes:commentsomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotype (see mapping below)

Mapping key to content

1 to disorder was positioned by mapping of the wild type gene
-2 to disease phenotype itself was mapped
-3 to molecular basis of the disorder is known
-4 to disorder is a chromosome deletion or duplication syndrome

Phenotype character to comment

? to unconfirmed or possibly spurious mapping
-[/] to nondiseases
-{/} to contribute to susceptibility to multifactorial disorders or to susceptibility to infection

There are different types of link in the OMIM description section. For example, in above JSON response, we have the description of MIM entry 100640:

The ALDH1A1 gene encodes a liver cytosolic isoform of acetaldehyde dehydrogenase ({EC 1.2.1.3}), an enzyme involved in the major pathway of alcohol metabolism after alcohol dehydrogenase (ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650}), variation in which has been implicated in different responses to alcohol ingestion.\n\nALDH1 is associated with a low Km for NAD, a high Km for acetaldehyde, and is strongly inactivated by disulfiram. ALDH2 is associated with a high Km for NAD, and low Km for acetaldehyde, and is insensitive to inhibition by disulfiram ({4:Hsu et al., 1985}).

As the descriptions will be shown as plain text, we remove the curry brackets surrounding links and try to make the text still readable with minimal modifications. Briefly:

  • Links referring to another MIM entry (e.g. {100650}) will be removed. Any word(s) specifically associated with the removed link will also be removed. For example, "(ADH, see {103700})" will become "(ADH)" after the process.
  • Links referring to a literature reference will be processed to remove the internal index and curry brackets. For example, "{4:Hsu et al., 1985}" becomes "Hsu et al., 1985".
  • All the other links will simple have their curry brackets removed. For example, "{EC 1.2.1.3}" becomes "EC 1.2.1.3".
  • If the content within a pair of parentheses becomes empty after being processed, the parentheses need to be removed as well and its surrounding white spaces should be properly processed. For example, "ALDH2 ({100650})," will become "ALDH2,".

Here is a list of examples about how the description section supposed to be processed:

Original textProcessed text
({516030}, {516040}, and {516050})
(e.g., D1, {168461}; D2, {123833}; D3, {123834})(e.g., D1; D2; D3)
(desmocollins; see DSC2, {125645})(desmocollins; see DSC2)
(e.g., see {102700}, {300755})
(ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650})(ADH). See also liver mitochondrial ALDH2
(see, e.g., CACNA1A; {601011})(see, e.g., CACNA1A)
(e.g., GSTA1; {138359}), mu (e.g., {138350})(e.g., GSTA1), mu
(NFKB; see {164011})(NFKB)
(see ISGF3G, {147574})(see ISGF3G)
(DCK; {EC 2.7.1.74}; {125450})(DCK; EC 2.7.1.74)

JSON output

"omim":[ 
{
"mimNumber":600678,
"geneName":"MutS, E. coli, homolog of, 6",
"description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",
"phenotypes":[
{
"mimNumber":614350,
"phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",
"description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal dominant"
]
},
{
"mimNumber":608089,
"phenotype":"Endometrial cancer, familial",
"mapping":"molecular basis of the disorder is known"
},
{
"mimNumber":276300,
"phenotype":"Mismatch repair cancer syndrome",
"description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal recessive"
],
"comments" : [
"contribute to susceptibility to multifactorial disorders or to susceptibility to infection",
"unconfirmed or possibly spurious mapping"
]
}
]
}
]
FieldTypeNotes
mimNumberintOMIM ID for gene
geneNamestringgene name
descriptionstring
phenotypesobject arraysee Phenotype entry below

Phenotype

FieldTypeNotes
mimNumberint
phenotypestring
descriptionstring
mappingstringsee possible values below
inheritancestring arraysee possible values below
commentsstring arraysee possible values below

Mapping

  1. disorder was positioned by mapping of the wild type gene
  2. disease phenotype itself was mapped
  3. molecular basis of the disorder is known
  4. disorder is a chromosome deletion or duplication syndrome

Inheritance

  • autosomal recessive
  • autosomal dominant

Comments

  • contributes to the susceptibility to multifactorial disorders
  • variations that lead to apparently abnormal laboratory test values
  • unconfirmed mapping
- - - - \ No newline at end of file diff --git a/3.14/data-sources/phylop-json/index.html b/3.14/data-sources/phylop-json/index.html deleted file mode 100644 index fbbef01f..00000000 --- a/3.14/data-sources/phylop-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -phylop-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

phylop-json

"variants":[
{
"vid":"2:48010488:A",
"chromosome":"chr2",
"begin":48010488,
"end":48010488,
"refAllele":"G",
"altAllele":"A",
"variantType":"SNV",
"phylopScore":0.459
}
]
FieldTypeNotes
phylopScorefloatrange: -14.08 to 6.424
- - - - \ No newline at end of file diff --git a/3.14/data-sources/phylop/index.html b/3.14/data-sources/phylop/index.html deleted file mode 100644 index 5ed2ca8f..00000000 --- a/3.14/data-sources/phylop/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -PhyloP | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

PhyloP

Overview

PhyloP (phylogenetic p-values) conservation scores are obtained from the [PHAST package] (http://compgen.bscb.cornell.edu/phast/) for multiple alignments of vertebrate genomes to the human genome. For GRCh38, the multiple alignments are against 19 mammals and for GRCh37, it is against 45 vertebrate genomes.

Publication

Siepel A, Bejerano G, Pedersen JS, Hinrichs AS, Hou M, Rosenbloom K, Clawson H, Spieth J, Hillier LW, Richards S, et al. Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. Genome Res. 2005 Aug;15(8):1034-50. (http://www.genome.org/cgi/doi/10.1101/gr.3715005)

WigFix File

The data is provided in WigFix files which is a text file that provides conservation scores for contiguous intervals in the following format:

fixedStep chrom=chr1 start=10918 step=1
0.064
0.058
0.064
0.058
0.064
0.064
fixedStep chrom=chr1 start=34045 step=1
0.111
0.100
0.111
0.111
0.100
0.111
0.111
0.111
0.100
0.111
-1.636

We convert them to binary files with indexes for fast query. Note that these are scores for genomic positions and are reported only for SNVs.

Download URL

GRCh37: http://hgdownload.cse.ucsc.edu/goldenpath/hg19/phyloP46way/vertebrate/

GRCh38: http://hgdownload.cse.ucsc.edu/goldenPath/hg38/phyloP20way/

JSON Output

Unlike other supplemetary datasources, phyloP scores are reported in the variants section.

"variants":[
{
"vid":"2:48010488:A",
"chromosome":"chr2",
"begin":48010488,
"end":48010488,
"refAllele":"G",
"altAllele":"A",
"variantType":"SNV",
"phylopScore":0.459
}
]
FieldTypeNotes
phylopScorefloatrange: -14.08 to 6.424
- - - - \ No newline at end of file diff --git a/3.14/data-sources/primate-ai-json/index.html b/3.14/data-sources/primate-ai-json/index.html deleted file mode 100644 index 529507fe..00000000 --- a/3.14/data-sources/primate-ai-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -primate-ai-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

primate-ai-json

"primateAI":[
{
"hgnc":"TP53",
"scorePercentile":0.3,
}
]
FieldTypeNotes
hgncstring
scorePercentilefloatrange: 0 - 1.0
- - - - \ No newline at end of file diff --git a/3.14/data-sources/primate-ai/index.html b/3.14/data-sources/primate-ai/index.html deleted file mode 100644 index a1e1f886..00000000 --- a/3.14/data-sources/primate-ai/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Primate AI | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

Primate AI

Overview

Primate AI is a deep residual neural network for classifying the pathogenicity of missense mutations. The method is described in the publication:

Publication

Sundaram, L., Gao, H., Padigepati, S.R. et al. Predicting the clinical impact of human mutation with deep neural networks. Nat Genet 50, 1161–1170 (2018). https://doi.org/10.1038/s41588-018-0167-z

TSV File

Example

chr pos ref alt refAA   altAA   strand_1pos_0neg    trinucleotide_context   UCSC_gene   ExAC_coverage   primateDL_score
chr10 1046704 C T R C 1 CCG uc001ift.3 45.49 0.849114537239
chr10 1046704 C G R G 1 CCG uc001ift.3 45.49 0.795686006546

Parsing

From the TSV file, we're mainly interested in the following columns:

  • chr
  • pos
  • ref
  • alt
  • primateDL_score

We also use UCSC_gene to filter out variants that don't have matching gene models in Nirvana.

Pre-processing

Converting UCSC IDs

Primate AI only provides UCSC IDs. As an initial pre-processing step, we'll need to convert these to either Entrez or Ensembl Gene IDs.

The following queries are used to download the conversions from UCSC:

mysql -h genome-mysql.soe.ucsc.edu -u genome -A -P 3306 \
-e "select * FROM knownToLocusLink;" hg19 > ucsc_locuslink.tsv

mysql -h genome-mysql.soe.ucsc.edu -u genome -A -P 3306 \
-e "select knownToEnsembl.name, knownToEnsembl.value, ensGene.name2 FROM knownToEnsembl, ensGene WHERE knownToEnsembl.value = ensGene.name;" \
hg19 > ucsc_ensembl.tsv

Running the Pre-Processor

The Primate AI pre-processor can be run as follows:

dotnet PrimateAiPreProcessor.dll UGA_develop.tsv PrimateAI_scores_v0.2.tsv.gz \
ucsc_locuslink.tsv ucsc_ensembl.tsv PrimateAI_0.2_GRCh37.tsv.gz

During conversion, 0.5% of the UCSC Ids cannot be converted to either Entrez or Ensembl gene IDs. Once the gene IDs have been acquired, we check to see which are available in Nirvana.

The following Entrez Gene IDs were not found:

399753
401980
504189
504191
100293534

Here is the output from the pre-processor:

- loading UCSC to Entrez Gene ID dictionary... 73,432 genes loaded.
- loading UCSC to Ensembl Gene ID dictionary... 76,178 genes loaded.
- loading UGA gene ID to gene dictionary... 103,277 genes loaded.
- parsing Primate AI variants... 70,121,953 variants parsed.

# variants with unknown gene ID: 27,253 / 70,121,953
# genes with unknown gene ID: 109 / 19,614

# variants not in UGA: 2,036 / 70,121,953
# genes not in UGA: 6 / 19,614

Known Issues

Known Issues

The Primate AI data set provides raw scores, but the scores are biased according to gene context. I.e. a 0.4 means something different in TP53 than it does in KRAS.

As a result, the Primate AI team provided guidance on aggregating these scores and presenting them as percentiles with respect to the associated gene. According to their research, the 25th percentile is a good proxy for benign variants and the 75th percentile is a good proxy for pathogenic variants.

Download URL

https://basespace.illumina.com/s/cPgCSmecvhb4

JSON Output

"primateAI":[
{
"hgnc":"TP53",
"scorePercentile":0.3,
}
]
FieldTypeNotes
hgncstring
scorePercentilefloatrange: 0 - 1.0
- - - - \ No newline at end of file diff --git a/3.14/data-sources/revel-json/index.html b/3.14/data-sources/revel-json/index.html deleted file mode 100644 index bf67bb72..00000000 --- a/3.14/data-sources/revel-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -revel-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

revel-json

"revel":{ 
"score":0.027
}
FieldTypeNotes
scorefloatRange: 0 - 1.0
- - - - \ No newline at end of file diff --git a/3.14/data-sources/revel/index.html b/3.14/data-sources/revel/index.html deleted file mode 100644 index c20ffe1a..00000000 --- a/3.14/data-sources/revel/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -REVEL | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

REVEL

Overview

REVEL is an ensemble method for predicting the pathogenicity of missense variants based on a combination of scores from 13 individual tools: MutPred, FATHMM v2.3, VEST 3.0, PolyPhen-2, SIFT, PROVEAN, MutationAssessor, MutationTaster, LRT, GERP++, SiPhy, phyloP, and phastCons.

Publication

Ioannidis, N. M. et al. REVEL: An Ensemble Method for Predicting the Pathogenicity of Rare Missense Variants. The American Journal of Human Genetics 99, 877-885 (2016). https://doi.org/10.1016/j.ajhg.2016.08.016

CSV File

Example

chr,hg19_pos,grch38_pos,ref,alt,aaref,aaalt,REVEL
1,35142,35142,G,A,T,M,0.027
1,35142,35142,G,C,T,R,0.035
1,35142,35142,G,T,T,K,0.043
1,35143,35143,T,A,T,S,0.018
1,35143,35143,T,C,T,A,0.034

Parsing

From the CSV file, we're mainly interested in the following columns:

  • chr
  • hg19_pos
  • grch38_pos
  • ref
  • alt
  • REVEL

Known Issues

Sorting

Since the input file contains positions for both GRCh37 and GRCh38, we split it into two TSV files (for the sake of better readability) with identical format. The positions for GRCh37 were sorted but not for GRCh38. So we re-sort the variants by position in the GRCh38 file.

Conflicting Scores

When there are multiple scores available for the same variant (i.e. the same position with the same alternative allele), we pick the highest score.

Download URL

https://sites.google.com/site/revelgenomics/downloads

JSON Output

"revel":{ 
"score":0.027
}
FieldTypeNotes
scorefloatRange: 0 - 1.0
- - - - \ No newline at end of file diff --git a/3.14/data-sources/splice-ai-json/index.html b/3.14/data-sources/splice-ai-json/index.html deleted file mode 100644 index e9afc5e1..00000000 --- a/3.14/data-sources/splice-ai-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -splice-ai-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

splice-ai-json

"spliceAI":[ 
{
"hgnc":"BLCAP",
"acceptorGainDistance":-3,
"acceptorGainScore":0.3,
"donorLossDistance":7,
"donorLossScore":0.9
},
{
"hgnc":"NNAT",
"acceptorGainDistance":-1,
"acceptorGainScore":0.2,
"donorGainDistance":-2,
"donorGainScore":0.3
}
]
FieldTypeNotes
hgncstringHGNC gene symbol
acceptorGainDistanceint± bp from current position
acceptorGainScorefloatrange: 0 - 1.0. 1 decimal place
acceptorLossDistanceint± bp from current position
acceptorLossScorefloatrange: 0 - 1.0. 1 decimal place
donorGainDistanceint± bp from current position
donorGainScorefloatrange: 0 - 1.0. 1 decimal place
donorLossDistanceint± bp from current position
donorLossScorefloatrange: 0 - 1.0. 1 decimal place
- - - - \ No newline at end of file diff --git a/3.14/data-sources/splice-ai/index.html b/3.14/data-sources/splice-ai/index.html deleted file mode 100644 index 9afd2b10..00000000 --- a/3.14/data-sources/splice-ai/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Splice AI | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

Splice AI

Overview

SpliceAI, a 32-layer deep neural network, predicts splicing from a pre-mRNA sequence.

Publication

K. Jaganathan, et al. Predicting splicing from primary sequence with deep learning. Cell, 176 (3) (2019), pp. 535-548 e24

VCF File

Example

##fileformat=VCFv4.0
##assembly=GRCh37/hg19
##INFO=<ID=SYMBOL,Number=1,Type=String,Description="HGNC gene symbol">
##INFO=<ID=STRAND,Number=1,Type=String,Description="+ or - depending on whether the gene lies in the positive or negative strand">
##INFO=<ID=TYPE,Number=1,Type=String,Description="E or I depending on whether the variant position is exonic or intronic (GENCODE V24lift37 canonical annotation)">
##INFO=<ID=DIST,Number=1,Type=Integer,Description="Distance between the variant position and the closest splice site (GENCODE V24lift37 canonical annotation)">
##INFO=<ID=DS_AG,Number=1,Type=Float,Description="Delta score (acceptor gain)">
##INFO=<ID=DS_AL,Number=1,Type=Float,Description="Delta score (acceptor loss)">
##INFO=<ID=DS_DG,Number=1,Type=Float,Description="Delta score (donor gain)">
##INFO=<ID=DS_DL,Number=1,Type=Float,Description="Delta score (donor loss)">
##INFO=<ID=DP_AG,Number=1,Type=Integer,Description="Delta position (acceptor gain) relative to the variant position">
##INFO=<ID=DP_AL,Number=1,Type=Integer,Description="Delta position (acceptor loss) relative to the variant position">
##INFO=<ID=DP_DG,Number=1,Type=Integer,Description="Delta position (donor gain) relative to the variant position">
##INFO=<ID=DP_DL,Number=1,Type=Integer,Description="Delta position (donor loss) relative to the variant position">
#CHROM POS ID REF ALT QUAL FILTER INFO
10 92946 . C T . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0000;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=-26;DP_AL=-10;DP_DG=3;DP_DL=35
10 92946 . C G . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0008;DS_AL=0.0000;DS_DG=0.0003;DS_DL=0.0000;DP_AG=34;DP_AL=-27;DP_DG=35;DP_DL=1
10 92946 . C A . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0004;DS_AL=0.0000;DS_DG=0.0001;DS_DL=0.0000;DP_AG=-10;DP_AL=-48;DP_DG=35;DP_DL=-21
10 92947 . A C . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0002;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=-49;DP_AL=-11;DP_DG=0;DP_DL=34
10 92947 . A T . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0002;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=33;DP_AL=-11;DP_DG=-22;DP_DL=34
10 92947 . A G . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0006;DS_AL=0.0000;DS_DG=0.0001;DS_DL=0.0000;DP_AG=33;DP_AL=-11;DP_DG=34;DP_DL=32

Parsing

From the VCF file, we're mainly interested in the following columns:

  • DS_AG - Δ score (acceptor gain)
  • DS_AL - Δ score (acceptor loss)
  • DS_DG - Δ score (donor gain)
  • DS_DL - Δ score (donor loss)
  • DP_AG - Δ position (acceptor gain) relative to the variant position
  • DP_AL - Δ position (acceptor loss) relative to the variant position
  • DP_DG - Δ position (donor gain) relative to the variant position
  • DP_DL - Δ position (donor loss) relative to the variant position

The Splice AI team suggests the following interpretation for the scores:

RangeConfidencePathogenicity
0 ≤ x < 0.1lowlikely benign
0.1 ≤ x ≤ 0.5mediumlikely pathogenic
x > 0.5highpathogenic

Pre-processing

Filtering

Splice AI provides a comprehensive list of entries throughout the genome. However, many of the entries have little value. I.e. observing low splice scores in intergenic regions. Not only do these extra entries require more storage, but the unused content has a negative impact on annotation speed.

As a result, Nirvana filters out all the values in the low confidence tier except for regions within 15 bp of nascent splice sites. For those regions, we found it useful to see if Splice AI predicted an interruption of the splicing mechanism.

Download URL

https://basespace.illumina.com/s/5u6ThOblecrh

JSON Output

"spliceAI":[ 
{
"hgnc":"BLCAP",
"acceptorGainDistance":-3,
"acceptorGainScore":0.3,
"donorLossDistance":7,
"donorLossScore":0.9
},
{
"hgnc":"NNAT",
"acceptorGainDistance":-1,
"acceptorGainScore":0.2,
"donorGainDistance":-2,
"donorGainScore":0.3
}
]
FieldTypeNotes
hgncstringHGNC gene symbol
acceptorGainDistanceint± bp from current position
acceptorGainScorefloatrange: 0 - 1.0. 1 decimal place
acceptorLossDistanceint± bp from current position
acceptorLossScorefloatrange: 0 - 1.0. 1 decimal place
donorGainDistanceint± bp from current position
donorGainScorefloatrange: 0 - 1.0. 1 decimal place
donorLossDistanceint± bp from current position
donorLossScorefloatrange: 0 - 1.0. 1 decimal place
- - - - \ No newline at end of file diff --git a/3.14/file-formats/custom-annotations/index.html b/3.14/file-formats/custom-annotations/index.html deleted file mode 100644 index 72904564..00000000 --- a/3.14/file-formats/custom-annotations/index.html +++ /dev/null @@ -1,42 +0,0 @@ - - - - - - - -Custom Annotations | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

Custom Annotations

Overview

While the team tries to keep data sources up-to-date, you might want to start incorporate new annotations ahead of our update cycle. Another -common use case involves protected health information (PHI). Custom annotations are a mechanism that enables both use cases.

Here are some examples of how our collaborators use custom annotations:

  • associating context from both a patient-level and a patient cohort level with the variant annotations
  • adding content that is licensed (e.g. HGMD) to the variant annotations

At the moment, we have two different custom annotation file formats. One provides additional annotations to variants (both small variants and SVs) -while the other caters to gene annotations.

In both cases, the custom annotation file format is a tab-delimited file that is separated into two parts: the header & the data.

The header is where you can customize how you want the data to appear in the JSON file and provide context about the genome assembly and how -Nirvana should match the variants.

At Illumina, there are usually many components downstream of Nirvana that have to parse our annotations. If a customer provides a custom -annotation, those downstream tools need to understand more about the data such as:

  • data type (e.g. number, boolean, or a string)
  • data category (e.g. is this an allele count, allele number, allele frequency, etc.)
  • associated population (i.e. if this is an allele frequency)

For each custom annotation, Nirvana uses this context to create a JSON schema that can be sent to downstream tools. If -a tool knows that this is an allele frequency, it can validate user input to ensure that it's in the range of [0, 1].

Variant File Format

Basic Allele Frequency Example

Create the Custom Annotation TSV

Imagine that you want to create a basic allele frequency custom annotation for small variants. If we visualized the tab-delimited file -(TSV), it would look something like this:

Col 1Col 2Col 3Col 4Col 5
#title=MyDataSource
#assembly=GRCh38
#matchVariantsBy=allele
#CHROMPOSREFALTallAf
#categories...AlleleFrequency
#descriptions...ALL
#type...number
chr1623603511TGAT0.000006579
chr1668801894GA0.000006569
chr1911107436GA0.00003291

Here's the full TSV file.

Let's go over the header and discuss the contents:

  • title indicates the name of the JSON key
  • assembly indicates that this data is only valid for GRCh38
  • matchVariantsBy indicates that we should only match the annotations if they are allele-specific
  • categories provides hints to downstream tools on how they might want to treat the data. In this case, we indicate that it's an allele -frequency.
  • descriptions are used in special circumstances to provide more context. Even though column 5 is called allAf, it might not be clear to a -downstream tool that this means a global allele frequency using all sub-populations. In this case, ALL indicates the intended population.
  • type indicates to downstream tools the data type. Since allele frequencies are numbers, we'll write number in this column.
Reference Base Checking

Nirvana validates all the reference bases in a custom annotation. If a variant or genomic region is specified that has the wrong reference base, an error will be produced.

Sorting

The variants within each chromosome must be sorted by genomic position.

Convert to Nirvana Format

First we need to convert the TSV file to Nirvana's native file format and let's put that file in a new directory called CA:

$ mkdir CA
$ dotnet bin/Release/netcoreapp2.1/SAUtils.dll customvar \
-r Data/References/Homo_sapiens.GRCh38.Nirvana.dat -i MyDataSource.tsv -o CA
---------------------------------------------------------------------------
SAUtils (c) 2020 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0
---------------------------------------------------------------------------

Chromosome 16 completed in 00:00:00.1
Chromosome 19 completed in 00:00:00.0

Time: 00:00:00.2

Annotate with Nirvana

Let's annotate the following VCF (notice that it's one of the variants that we have in our custom annotation):

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
16 68801894 . G A . . .

Here's the full VCF file.

Since Nirvana can handle multiple directories with external annotations, all we need to do is specify our new CA directory in addition to -the normal Nirvana command-line.

$ dotnet bin/Release/netcoreapp2.1/Nirvana.dll -c Data/Cache/GRCh38/Both \
-r Data/References/Homo_sapiens.GRCh38.Nirvana.dat \
--sd Data/SupplementaryAnnotation/GRCh38 --sd CA -i TestCA.vcf -o TestCA
---------------------------------------------------------------------------
Nirvana (c) 2020 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0
---------------------------------------------------------------------------

Initialization Time Positions/s
---------------------------------------------------------------------------
Cache 00:00:01.8
SA Position Scan 00:00:00.0 19

Reference Preload Annotation Variants/s
---------------------------------------------------------------------------
chr16 00:00:00.2 00:00:01.3 1

Summary Time Percent
---------------------------------------------------------------------------
Initialization 00:00:01.9 25.5 %
Preload 00:00:00.2 3.3 %
Annotation 00:00:01.3 18.2 %

Time: 00:00:06.3

Investigate the Results

We would expect the following data to show up in our JSON output file:

      "variants": [
{
"vid": "16-68801894-G-A",
"chromosome": "16",
"begin": 68801894,
"end": 68801894,
"refAllele": "G",
"altAllele": "A",
"variantType": "SNV",
"hgvsg": "NC_000016.10:g.68801894G>A",
"phylopScore": 1,
"MyDataSource": {
"refAllele": "G",
"altAllele": "A",
"allAf": 7e-06
},
"clinvar": [

Here's the full JSON file.

Nirvana preserves up to 6 decimal places for allele frequency data.

Categories & Descriptions Example

Create the Custom Annotation TSV

Building on the previous example, we can add other types of annotations like predictions and general notes.

Col 1Col 2Col 3Col 4Col 5Col 6Col 7
#title=MyDataSource
#assembly=GRCh38
#matchVariantsBy=allele
#CHROMPOSREFALTallAfpathogenicitynotes
#categories...AlleleFrequencyPrediction.
#descriptions...ALL..
#type...numberstringstring
chr1623603511TGAT0.000006579P.
chr1668801894GA0.000006569LPSeen in case 123
chr1911107436GA0.00003291..

Here's the full TSV file.

Placeholders

You can use a period to denote an empty value (much in the same way as periods are used in VCF files to signify missing values). While -Nirvana also accepts empty columns in the TSV file, we use them in these examples to promote readability.

Let's go over what's new in this example:

  • Column 6 adds a field called pathogenicity which uses the Prediction category. When using this category, Nirvana will -validate to make -sure that the field contains either the abbreviations (B, LB, VUS, LP, and P) or the long-form equivalents (e.g. benign or pathogenic).
  • Column 7 adds a field called notes and it doesn't have a category or description. We're just going to use it to add some internal -notes.

Annotate with Nirvana

Let's use a new VCF file. It includes all the same positions as our custom annotation file, but only the middle variant also matches the -alternate allele (allele-specific match):

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
16 23603511 . TG T . . .
16 68801894 . G A . . .
19 11107436 . G C . . .

Here's the full VCF file.

Investigate the Results

Because we specified #matchVariantsBy=allele in our custom annotation file, only the middle variant will get an annotation:

      "variants": [
{
"vid": "16-68801894-G-A",
"chromosome": "16",
"begin": 68801894,
"end": 68801894,
"refAllele": "G",
"altAllele": "A",
"variantType": "SNV",
"hgvsg": "NC_000016.10:g.68801894G>A",
"phylopScore": 1,
"MyDataSource": {
"refAllele": "G",
"altAllele": "A",
"allAf": 7e-06,
"pathogenicity": "LP",
"notes": "Seen in case 123"
},
"clinvar": [

Here's the full JSON file.

Using Positional Matches

What would happen if we changed to #matchVariantsBy=position? Two things will happen. First, our positional variants will now match:

      "variants": [
{
"vid": "16-23603511-TG-T",
"chromosome": "16",
"begin": 23603512,
"end": 23603512,
"refAllele": "G",
"altAllele": "-",
"variantType": "deletion",
"hgvsg": "NC_000016.10:g.23603512delG",
"MyDataSource": [
{
"refAllele": "GA",
"altAllele": "-",
"allAf": 7e-06,
"pathogenicity": "P"
}
],
"clinvar": [

In addition, you will now see an extra flag for our allele-specific variant:

      "variants": [
{
"vid": "16-68801894-G-A",
"chromosome": "16",
"begin": 68801894,
"end": 68801894,
"refAllele": "G",
"altAllele": "A",
"variantType": "SNV",
"hgvsg": "NC_000016.10:g.68801894G>A",
"phylopScore": 1,
"MyDataSource": [
{
"refAllele": "G",
"altAllele": "A",
"allAf": 7e-06,
"pathogenicity": "LP",
"notes": "Seen in case 123",
"isAlleleSpecific": true
}
],
"clinvar": [

Genomic Region Example

Create the Custom Annotation TSV

In the previous example, we added a note for the middle variant, but sometimes it's handy to annotate a genomic region. Consider the following example:

Col 1Col 2Col 3Col 4Col 5
#title=MyDataSource
#assembly=GRCh38
#matchVariantsBy=allele
#CHROMPOSREFENDnotes
#categories....
#descriptions....
#type...string
chr1620000000T70000000Lots of false positives in this region

Here's the full TSV file.

Let's go over what's new in this example:

  • Column 5 now has a field called notes. In essence, it looks exactly like column 7 from our previous example.
  • The main difference is that now one of our custom annotation entries is actually a genomic region. Any variant that overlaps with that region will get a custom annotation.

In the previous example we learned about positional matching vs allele-specific matching. For genomic regions, #matchVariantsBy=allele and #matchVariantsBy=position produce -the same result.

Annotate with Nirvana

Let's use the same VCF file as our previous example.

Investigate the Results

    {
"chromosome": "16",
"position": 23603511,
"refAllele": "TG",
"altAlleles": [
"T"
],
"cytogeneticBand": "16p12.2",
"MyDataSource": [
{
"start": 20000000,
"end": 70000000,
"notes": "Lots of false positives in this region",
"reciprocalOverlap": 0,
"annotationOverlap": 0
}
],
"variants": [

Here's the full JSON file.

Reciprocal & Annotation Overlap

For all intervals, Nirvana internally calculates two overlaps: a variant overlap and an annotation overlap. Variant overlap is the percentage of the variant's length that is -overlapped. Annotation overlap is the percentage of the annotation's length that is overlap.

Reciprocal overlap is the minimum of those two overlaps. Given that the annotation is 50 Mbp and the deletion is one 1 bp, both overlaps will be pretty close to 0.

We will also see this annotation for the other variant on chr16:

    {
"chromosome": "16",
"position": 68801894,
"refAllele": "G",
"altAlleles": [
"A"
],
"cytogeneticBand": "16q22.1",
"MyDataSource": [
{
"start": 20000000,
"end": 70000000,
"notes": "Lots of false positives in this region",
"reciprocalOverlap": 0,
"annotationOverlap": 0
}
],
"variants": [
Targeting Structural Variants

Often we use genomic regions to represent other known CNVs and SVs in the genome. In this use case, we usually don't want to match these regions to other small variants. To -force Nirvana to match regions only to other SVs, use the #matchVariantsBy=sv option in the header.

Mixing Small Variants and Genomic Regions

Create the Custom Annotation TSV

Previously we looked at examples that either had small variants or genomic regions. Let's create a file that contains both:

Col 1Col 2Col 3Col 4Col 5Col 6
#title=MyDataSource
#assembly=GRCh38
#matchVariantsBy=allele
#CHROMPOSREFALTENDnotes
#categories.....
#descriptions.....
#type....string
chr1623603511TGAT..
chr1668801894GA..
chr1911107436GA..
chr2110510818C.10699435Interval #1
chr2110510818C<DEL>10699435Interval #2
chr2212370388TT[chr22:12370729[.Known false-positive

Here's the full TSV file.

Let's go over what's new in this example:

  • Column 4 now has the REF field. Exception for the case listed below, this is only used by small variants or translocation breakends.
  • Column 5 now has the END field. This is only used by genomic regions.
  • There are two custom annotations on chr21 and the start and end coordinates look the same, so what's different? Interval #2 has a symbolic allele in the ALT column. When this is used in custom annotation, the start position is treated as the padding base (using VCF conventions). When Nirvana matches a variant to interval #2, it will ignore the padding base and consider the start position to be at position 10510819.

Annotate with Nirvana

Let's use a new VCF file to study how matching works for intervals #1 and #2:

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
21 10510818 . C <DUP> . . END=10699435;SVTYPE=DUP
22 12370388 . T T[chr22:12370729[ . . SVTYPE=BND

Here's the full VCF file.

The first variant is similar to the custom annotation labelled "interval #2". Position 10510818 is the padding base, so it effectively starts at position 10510819.

Investigate the Results

  "positions": [
{
"chromosome": "21",
"position": 10510818,
"svEnd": 10699435,
"refAllele": "C",
"altAlleles": [
"<DUP>"
],
"cytogeneticBand": "21p11.2",
"MyDataSource": [
{
"start": 10510818,
"end": 10699435,
"notes": "Interval #1",
"reciprocalOverlap": 0.99999,
"annotationOverlap": 0.99999
},
{
"start": 10510819,
"end": 10699435,
"notes": "Interval #2",
"reciprocalOverlap": 1,
"annotationOverlap": 1
}
],

Here's the full JSON file.

As expected, the variant and interval #2 have matching endpoints, therefore there is 100% overlap. Interval #1 technically starts 1 bp earlier, so its overlap 99.9%.

Further down the JSON file, we find the annotated translocation breakend:

      "variants": [
{
"vid": "22-12370388-T-T[chr22:12370729[",
"chromosome": "22",
"begin": 12370388,
"end": 12370388,
"isStructuralVariant": true,
"refAllele": "T",
"altAllele": "T[chr22:12370729[",
"variantType": "translocation_breakend",
"MyDataSource": {
"refAllele": "T",
"altAllele": "T[chr22:12370729[",
"notes": "Known false-positive"
}
}

Gene File Format

Basic Gene Example

Create the Custom Annotation TSV

Previously we looked at examples that either had small variants or genomic regions, however, sometimes we would like to add custom gene annotations. The gene custom annotation file format -looks slightly different:

Col 1Col 2Col 3Col 4
#title=MyDataSource
#geneSymbolgeneIdphenotypenotes
#categories...
#descriptions...
#type.stringstring
TP537157Colorectal cancer, hereditary nonpolyposis, type 5.
KRASENSG00000133703Mismatch repair cancer syndromeSeen in cohort 123

Here's the full TSV file.

Let's go over what's in this example:

  • Column 2 has the geneId field. This can be either an Entrez Gene ID or an Ensembl ID.
Gene Symbols

Gene symbols are always in flux and are being updated on a daily basis at the NCBI and at HGNC. Due to this, Nirvana uses the geneId to match genes rather than the gene symbol. However, to -make the custom annotation files easier to read, we've included the geneSymbol column as well.

Unknown Gene IDs

When Nirvana parses the gene custom annotation file, it will note any gene IDs that are currently not recognized in Nirvana. In such a case, Nirvana will display an error showing all the -unrecognized gene IDs.

Annotate with Nirvana

Let's use a VCF file that contain variants in TP53 and KRAS:

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
12 25227255 . A T . . .
17 7675074 . C A . . .

Here's the full VCF file.

Investigate the Results

  "genes": [
{
"name": "KRAS",
"clingenGeneValidity": [
{
"diseaseId": "MONDO_0009026",
"disease": "Costello syndrome",
"classification": "disputed",
"classificationDate": "2018-07-24"
}
],
"clingenDosageSensitivityMap": {
"haploinsufficiency": "no evidence to suggest that dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "no evidence to suggest that dosage sensitivity is associated with clinical phenotype"
},
"gnomAD": {
"pLi": 0.000788,
"pRec": 0.789,
"pNull": 0.21,
"synZ": 0.336,
"misZ": 2.32,
"loeuf": 1.24
},
"MyDataSource": {
"phenotype": "Mismatch repair cancer syndrome",
"notes": "Seen in cohort 123"
}
},

This is the abbreviated output for KRAS. Here's the full JSON file if you want to see the complete KRAS entry.

Customizing the Header

Title

For the title, you can provide any string that hasn't already been used. The title should be unique.

caution

Make sure that the title does not conflict with other keys in the JSON file.

For small variants, you can't provide a title that conflicts with other keys in the variant object. Some examples of this would be -vid, chromosome, transcripts, etc.. The title should also not conflict with other data source keys like clinvar or gnomad.

For structural variants, you can't provide a title that conflicts with other keys in the position object. Some examples of this would be -chromosome, svLength, cytogeneticBand, etc. The title should also not conflict with other data source keys like clingen or dgv.

caution

Care should be taken not to annotate using multiple custom annotations that all use the same title.

Genome Assemblies

The following genome assemblies can be specified:

  • GRCh37
  • GRCh38

Matching Criteria

The matching criteria instructs how Nirvana should match a VCF variant to the custom annotation.

The following matching criteria can be specified:

  • allele - use this when you only want allele-specific matches. This is commonly the case when using allele frequency data sources like gnomAD
  • position - use this when you want positional matches. This is commonly used with disease phenotype data sources like ClinVar
  • sv - use this when you want to match to all other overlapping SVs. This use case arose when we were adding custom annotations for baseline -copy number intervals along the genome.

Categories

Categories are not used by Nirvana, but are often used by downstream tools. Categories provide hints for how those tools should filter or display -the annotation data.

When a category is specified, Nirvana will provide additional validation for those fields. The following table describes each category:

CategoryDescriptionValidation
AlleleCountallele counts for a specific populationSee the supported populations below
AlleleNumberallele numbers for a specific populationSee the supported populations below
AlleleFrequencyallele frequencies for a specific populationSee the supported populations below
PredictionACMG-style pathogenicity classificationsbenign (B)
likely benign (LB)
VUS
likely pathogenic (LP)
pathogenic (P)
Filterfree text that signals downstream tools to add the column to the filterMax 20 characters
Descriptionfree-text descriptionMax 100 characters
Identifierany IDMax 50 characters
HomozygousCountcount of homozygous individuals for a specific populationSee the supported populations below
Scoreany score valueAny double-precision floating point number

Descriptions

Descriptions are used to add more context to the categories. For now, descriptions are mainly used to associate allele counts, numbers, and frequencies with their respective populations.

Populations

The following populations were specified in the HapMap project, 1000 Genomes Project, ExAC, and gnomAD.

Population CodeSuper-population CodeDescription
ACBAFRAfrican Caribbeans in Barbados
AFRAFRAfrican
ALLALLAll populations
AMRAMRAd Mixed American
ASJAshkenazi Jewish
ASWAFRAmericans of African Ancestry in SW USA
BEBSASBengali from Bangladesh
CDXEASChinese Dai in Xishuangbanna, China
CEUEURUtah Residents (CEPH) with Northern and Western European Ancestry
CHBEASHan Chinese in Beijing, China
CHSEASSouthern Han Chinese
CLMAMRColombians from Medellin, Colombia
EASEASEast Asian
ESNAFREsan in Nigeria
EUREUREuropean
FINEURFinnish in Finland
GBREURBritish in England and Scotland
GIHSASGujarati Indian from Houston, Texas
GWDAFRGambian in Western Divisions in the Gambia
IBSEURIberian population in Spain
ITUSASIndian Telugu from the UK
JPTEASJapanese in Tokyo, Japan
KHVEASKinh in Ho Chi Minh City, Vietnam
LWKAFRLuhya in Webuye, Kenya
MAGAFRMandinka in the Gambia
MKKAFRMaasai in Kinyawa, Kenya
MSLAFRMende in Sierra Leone
MXLAMRMexican Ancestry from Los Angeles, USA
NFEEUREuropean (Non-Finnish)
OTHOTHOther
PELAMRPeruvians from Lima, Peru
PJLSASPunjabi from Lahore, Pakistan
PURAMRPuerto Ricans from Puerto Rico
SASSASSouth Asian
STUSASSri Lankan Tamil from the UK
TSIEURToscani in Italia
YRIAFRYoruba in Ibadan, Nigeria

Data Types

Each custom annotation can be one of the following data types:

  • bool - true or false
  • number - any integer or floating-point number
  • string - text
tip

For boolean variables, only keys with a true value will be output to the JSON object.

Using SAUtils

Nirvana includes a tool called SAUtils that converts various data sources into Nirvana's native binary format. The sub-commands customvar and customgene are used to specify a variant file or a gene file respectively.

Convert Variant File

dotnet bin/Release/netcoreapp2.1/SAUtils.dll customvar \
-r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \
-i MyDataSource.tsv \
-o SupplementaryAnnotation
  • the -r argument specifies the compressed reference path
  • the -i argument specifies the input TSV path
  • the -o argument specifies the output directory

Convert Gene File

dotnet bin/Release/netcoreapp2.1/SAUtils.dll customgene \
--uga Nirvana_UGA.tsv \
-i MyDataSource.tsv \
-o SupplementaryAnnotation
  • the --uga argument specifies the Nirvana universal gene archive (UGA) path
  • the -i argument specifies the input TSV path
  • the -o argument specifies the output directory
- - - - \ No newline at end of file diff --git a/3.14/file-formats/nirvana-json-file-format/index.html b/3.14/file-formats/nirvana-json-file-format/index.html deleted file mode 100644 index 207d2278..00000000 --- a/3.14/file-formats/nirvana-json-file-format/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Nirvana JSON File Format | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

Nirvana JSON File Format

Overview

Conventions

In the Nirvana JSON representation, we try to maximize the amount of useful information that is relayed in the output file. As such, we have several conventions that are useful to know about:

  • With boolean key/value pairs, we only output the keys that have a true value. I.e. there's no reason to display "isStructuralVariant":false a few million times when annotating a small variant VCF.
  • When transferring data from the VCF file to the JSON (e.g. for allele depths (AD)), it is common to use a period (.) as a placeholder for missing data in the VCF file. Nirvana treats periods like empty or null strings and therefore will not output those entries.

JSON Layout

info

In general, each position corresponds to a row in the original VCF file.

For each gene that was referenced in the transcripts found in the positions section, there will be additional gene-level annotation in the gene section.

{ 
"header":{
"annotator":"Nirvana 3.0.0-alpha.5+g6c52e247",
"creationTime":"2017-06-14 15:53:13",
"genomeAssembly":"GRCh37",
"dataSources":[
{
"name":"OMIM",
"version":"unknown",
"description":"An Online Catalog of Human Genes and Genetic Disorders",
"releaseDate":"2017-05-03"
},
{
"name":"VEP",
"version":"84",
"description":"BothRefSeqAndEnsembl",
"releaseDate":"2017-01-16"
},
{
"name":"ClinVar",
"version":"20170503",
"description":"A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence",
"releaseDate":"2017-05-03"
},
{
"name":"phyloP",
"version":"hg19",
"description":"46 way conservation score between humans and 45 other vertebrates",
"releaseDate":"2009-11-10"
}
],
"samples":[
"NA12878",
"NA12891",
"NA12892"
]
},
FieldTypeNotes
annotatorstringthe name of the annotator and the current version
creationTimestringyyyy-MM-dd hh:mm:ss
genomeAssemblystringsee possible values below
schemaVersionintegerincremented whenever the core structure of the JSON file introduces breaking changes
dataVersionstring
dataSourcesobject arraysee Data Source entry below
samplesstring arraythe order of these sample names will be used throughout the JSON file when enumerating samples

Data Source

FieldTypeNotes
namestring
versionstring
descriptionstringoptional description of the data source
releaseDatestringyyyy-MM-dd

Genome Assemblies

  • GRCh37
  • GRCh38
  • hg19
  • SARSCoV2

Positions

"positions":[ 
{
"chromosome":"chr2",
"position":48010488,
"repeatUnit":"GGCCCC",
"refRepeatCount":3,
"svEnd":48020488,
"refAllele":"G",
"altAlleles":[
"A",
"GT"
],
"quality":461,
"filters":[
"PASS"
],
"ciPos":[
-170,
170
],
"ciEnd":[
-175,
175
],
"svLength":1000,
"strandBias":1.23,
"jointSomaticNormalQuality":29,
"cytogeneticBand":"2p16.3",
FieldTypeVariant TypeNotes
chromosomestringallexactly as displayed in the vcf
postionintegerallexactly as displayed in the vcf (1-based notation). Range: 1 - 250 million
repeatUnitstringSTRprovided by ExpansionHunter
refRepeatCountintegerSTRprovided by ExpansionHunter
svEndintegerSV
refAllelestringallexactly as displayed in the vcf
altAllelestring arrayallexactly as displayed in the vcf
qualityfloatallexactly as displayed in the vcf (Normally an integer, but some variant callers using floating point. Has been observed as high as 500k)
filtersstring arrayallexactly as displayed in the vcf
ciPosinteger arraySV
ciEndinteger arraySV
svLengthintegerSV
strandBiasfloatsmall variantprovided by GATK (from SB)
jointSomaticNormalQualityintegerSVprovided by the Manta variant caller (SOMATICSCORE)
cytogeneticBandstringalle.g. 17p13.1

1000 Genomes (SV)

"oneKg":[
{
"chromosome":"1",
"begin":1595369,
"end":1612441,
"variantType": "copy_number_variation",
"id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",
"allAn": 5008,
"allAc": 2702,
"allAf": 0.539537,
"afrAf": 0.6052,
"amrAf": 0.3675,
"eurAf": 0.5357,
"easAf": 0.5368,
"sasAf": 0.5797,
"reciprocalOverlap": 0.07555
}
],
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring
idstring
allAnfloating pointallele number for all populations. Non-zero integer.
allAcfloating pointallele count for all populations. Integer.
allAffloating pointallele frequency for all populations. Range: 0 - 1.0
afrAffloating pointallele frequency for the African super population. Range: 0 - 1.0
amrAffloating pointallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
eurAffloating pointallele frequency for the European super population. Range: 0 - 1.0
easAfintegerallele frequency for the East Asian super population. Range: 0 - 1.0
sasAfintegerallele frequency for the South Asian super population. Range: 0 - 1.0
reciprocalOverlapfloating pointrange: 0 - 1.

MITOMAP (SV)

"mitomap":[ 
{
"chromosome":"MT",
"begin":"3166",
"end":"14152",
"variantType":"deletion",
"reciprocalOverlap":0.18068,
"annotationOverlap":0.42405
}
]
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring array
reciprocalOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
annotationOverlapfloatRange: 0 - 1. Specified up to 5 decimal places

Samples

"samples":[
{
"genotype":"0/1",
"variantFrequencies":[
0.333,
0.5
],
"totalDepth":57,
"genotypeQuality":12,
"copyNumber":3,
"repeatUnitCounts":[
10,
20
],
"alleleDepths":[
10,
20,
30
],
"failedFilter":true,
"splitReadCounts":[
10,
20
],
"pairedEndReadCounts":[
10,
20
],
"isDeNovo":true,
"diseaseAffectedStatuses":[
"-"
],
"artifactAdjustedQualityScore":89.3,
"likelihoodRatioQualityScore":78.2,
"heteroplasmyPercentile":[
23.13,
12.65
]
}
]
FieldTypeNotes
genotypestring
variantFrequenciesfloat arrayrange: 0 - 1.0. One value per alternate allele
totalDepthintegernon-negative integer values
genotypeQualityintegernon-negative integer values. Typically maxes out at 99
copyNumberintegernon-negative integer values
repeatUnitCountsinteger arrayExpansionHunter-specific
alleleDepthsinteger arraynon-negative integer values
failedFilterbool
splitReadCountsinteger arrayManta-specific
pairedEndReadCountsinteger arrayManta-specific
isDeNovobool
diseaseAffectedStatusesstring arrayExpansionHunter-specific
artifactAdjustedQualityScorefloatPEPE-specific. Range: 0 - 100.0
likelihoodRatioQualityScorefloatPEPE-specific. Range: 0 - 100.0
heteroplasmyPercentilefloatrange: 0 - 100. 2 decimal places. One value per alternate allele
Empty Samples

If a sample does not contain any entries, we will create a sample object that contains the isEmpty key. This ensures that sample ordering is preserved while indicating that a sample is intentionally empty.

"samples":[ 
{
"isEmpty":true
}
],

Variants

"variants":[ 
{
"vid":"2:48010488:A",
"chromosome":"chr2",
"begin":48010488,
"end":48010488,
"isReferenceMinorAllele":true,
"isStructuralVariant":true,
"refAllele":"G",
"altAllele":"A",
"variantType":"SNV",
"isDecomposedVariant":true,
"isRecomposedVariant":true,
"linkedVids":["2:48010488:GTA:ATC"],
"hgvsg":"NC_000002.11:g.48010488G>A",
"phylopScore":0.459
FieldTypeNotes
vidstringsee Variant Identifiers
chromosomestring
beginint1-based non-negative integer values. Range: 1 - 250 million
endint1-based non-negative integer values. Range: 1 - 250 million
isReferenceMinorAllelebooltrue when this is a reference minor allele
isStructuralVariantbooltrue when the variant is a structural variant
inLowComplexityRegionbooltrue when the variant lies in a low complexity region (gnomAD low complexity regions)
refAllelestringparsimonious representation of the reference allele
altAllelestringparsimonious representation of the alternate allele.
variantTypestringuses Sequence Ontology sequence alterations
isDecomposedVariantbooltrue when the decomposed variant has been used to create another recomposed variant
isRecomposedVariantbooltrue when the variant is recomposed from two or more decomposed variants
linkedVidsstring arraylist of VIDs for variants connecting decomposed and recomposed variants
hgvsgstringHGVS g. notation
phylopScorefloatphyloP conservation score. Range: -14.08 to 6.424
Reference Minor Alleles

Nirvana supports annotating reference minor alleles. In such a case, refAllele will be replaced by the global major allele and altAllele will be replaced with the original reference allele.

Flagging Decomposed & Recomposed Variants

When two or more decomposed variants are recomposed into an MNV, the decomposed variants will be marked with "isDecomposedVariant":true.

Similarly, the recomposed variant will be shown as a new VCF position. This recomposed variant will be flagged with "isRecomposedVariant":true.

Transcripts

"transcripts":[
{
"transcript":"ENST00000445503.1",
"source":"Ensembl",
"bioType":"nonsense_mediated_decay",
"codons":"gGg/gAg",
"aminoAcids":"G/E",
"cdnaPos":"268",
"cdsPos":"116",
"exons":"1/9",
"introns":"1/8",
"proteinPos":"39",
"geneId":"ENSG00000116062",
"hgnc":"MSH6",
"consequence":[
"missense_variant",
"NMD_transcript_variant"
],
"hgvsc":"ENST00000445503.1:c.116G>A",
"hgvsp":"ENSP00000405294.1:p.(Gly39Glu)",
"geneFusion":{
"exon":6,
"intron":5,
"fusions":[
{
"hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000437180.1}:c.58+568_1443",
"exon":3,
"intron":2
},
{
"hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000300305.3}:c.58+568_1443",
"exon":2,
"intron":1
}
]
},
"isCanonical":true,
"polyPhenScore":0.95,
"polyPhenPrediction":"probably damaging",
"proteinId":"ENSP00000405294.1",
"siftScore":0.61,
"siftPrediction":"tolerated",
"completeOverlap":true
}
]
FieldTypeNotes
transcriptstringtranscript ID. e.g. ENST00000445503.1
sourcestringRefSeq / Ensembl
bioTypestringdescriptions of the biotypes from Ensembl
codonsstring
aminoAcidsstring
cdnaPosstring
cdsPosstring
exonsstringexons affected by the variant
intronsstringintrons affected by the variant
proteinPosstring
geneIdstringgene ID. e.g. ENSG00000116062
hgncstringgene symbol. e.g. MSH6
consequencestring arraySequence Ontology Consequences
hgvscstringHGVS coding nomenclature
hgvspstringHGVS protein nomenclature
geneFusionobjectsee Gene Fusions entry below
isCanonicalbooltrue when this is a canonical transcript
polyPhenScorefloatrange: 0 - 1.0
polyPhenPredictionstringsee possible values below
proteinIdstringprotein ID. E.g. ENSP00000405294.1
siftScorefloatrange: 0 - 1.0
siftPredictionstringsee possible values below
completeOverlapbooltrue when this transcript is completely overlapped by the variant

PolyPhen

  • probably damaging
  • possibly damaging
  • benign
  • unknown

SIFT

  • tolerated
  • deleterious
  • tolerated - low confidence
  • deleterious - low confidence

Gene Fusions

FieldTypeNotes
exonintactual exon where the breakpoint was located
intronintactual intron where the breakpoint was located
fusionsobject arraysee Fusion entry below

Fusion

FieldTypeNotes
exonintactual exon where the other breakpoint was located
intronintactual intron where the other breakpoint was located
hgvscstringHGVS coding nomenclature describing the two genes and the transcripts that are fused along with

Regulatory Regions

"regulatoryRegions":[ 
{
"id":"ENSR00001542175",
"type":"promoter",
"consequence":[
"regulatory_region_variant"
]
}
]
FieldTypeNotes
idstring
typestringsee possible values below
consequencestring arraysee possible values below

Regulatory Types

  • CTCF_binding_site
  • enhancer
  • open_chromatin_region
  • promoter
  • promoter_flanking_region
  • TF_binding_site

Regulatory Consequences

  • regulatory_region_variant
  • regulatory_region_ablation
  • regulatory_region_amplification
  • regulatory_region_truncation

ClinVar

"clinvar":[
{
"id":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"significance":[
"benign"
],
"refAllele":"G",
"altAllele":"A",
"lastUpdatedDate":"2020-03-01",
"isAlleleSpecific":true
},
{
"id":"RCV000030258.4",
"variationId":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"alleleOrigins":[
"germline"
],
"refAllele":"G",
"altAllele":"A",
"phenotypes":[
"Lynch syndrome"
],
"medGenIds":[
"C1333990"
],
"omimIds":[
"120435"
],
"significance":[
"benign"
],
"lastUpdatedDate":"2017-05-01",
"isAlleleSpecific":true
}
]
FieldTypeNotes
idstringClinVar ID
variationIdstringClinVar VCV ID
reviewStatusstringsee possible values below
alleleOriginsstring arraysee possible values below
refAllelestring
altAllelestring
phenotypesstring array
medGenIdsstring arrayMedGen IDs
omimIdsstring arrayOMIM IDs
orphanetIdsstring arrayOrphanet IDs
significancestring arraysee possible values below
lastUpdatedDatestringyyyy-MM-dd
pubMedIdsstring arrayPubMed IDs
isAlleleSpecificbooltrue when the current variant alternate allele matches the ClinVar alternate allele

reviewStatus:

  • no assertion provided
  • no assertion criteria provided
  • criteria provided, single submitter
  • practice guideline
  • classified by multiple submitters
  • criteria provided, conflicting interpretations
  • criteria provided, multiple submitters, no conflicts
  • no interpretation for the single variant

alleleOrigins:

  • unknown
  • other
  • germline
  • somatic
  • inherited
  • paternal
  • maternal
  • de-novo
  • biparental
  • uniparental
  • not-tested
  • tested-inconclusive

significance:

  • uncertain significance
  • not provided
  • benign
  • likely benign
  • likely pathogenic
  • pathogenic
  • drug response
  • histocompatibility
  • association
  • risk factor
  • protective
  • affects
  • conflicting data from submitters
  • other
  • no interpretation for the single variant
  • conflicting interpretations of pathogenicity

1000 Genomes

"oneKg":{
"allAf":0.200879,
"afrAf":0.210287,
"amrAf":0.139769,
"easAf":0.275794,
"eurAf":0.181909,
"sasAf":0.173824,
"allAn":5008,
"afrAn":1322,
"amrAn":694,
"easAn":1008,
"eurAn":1006,
"sasAn":978,
"allAc":1006,
"afrAc":278,
"amrAc":97,
"easAc":278,
"eurAc":183,
"sasAc":170
}
FieldTypeNotes
allAffloatallele frequency for all populations. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
allAnintallele number for all populations. Non-zero integer.
afrAffloatallele frequency for the African super population. Range: 0 - 1.0
afrAcintallele count for the African super population. Integer.
afrAnintallele number for the African super population. Non-zero integer.
amrAffloatallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
amrAcintallele count for the Ad Mixed American super population. Integer.
amrAnintallele number for the Ad Mixed American super population. Non-zero integer.
easAffloatallele frequency for the East Asian super population. Range: 0 - 1.0
easAcintallele count for the East Asian super population. Integer.
easAnintallele number for the East Asian super population. Non-zero integer.
eurAffloatallele frequency for the European super population. Range: 0 - 1.0
eurAcintallele count for the European super population. Integer.
eurAnintallele number for the European super population. Non-zero integer.
sasAffloatallele frequency for the South Asian super population. Range: 0 - 1.0
sasAcintallele count for the South Asian super population. Integer.
sasAnintallele number for the South Asian super population. Non-zero integer.

gnomAD

"gnomad":{ 
"coverage":20,
"allAf":0.190317,
"maleAf":0.193,
"femaleAf": 0.1935,
"afrAf":0.222876,
"amrAf":0.121394,
"easAf":0.239802,
"finAf":0.136833,
"nfeAf":0.181282,
"asjAf":0.258278,
"othAf":0.186094,
"allAn":30796,
"maleAn":15096,
"femaleAn":15700
"afrAn":8664,
"amrAn":832,
"easAn":1618,
"finAn":3486,
"nfeAn":14916,
"asjAn":302,
"othAn":978,
"allAc":5861,
"maleAc":2930,
"femaleAc": 2931,
"afrAc":1931,
"amrAc":101,
"easAc":388,
"finAc":477,
"nfeAc":2704,
"asjAc":78,
"othAc":182,
"allHc":561,
"afrHc":208,
"amrHc":6,
"easHc":42,
"finHc":31,
"nfeHc":242,
"asjHc":13,
"othHc":19,
"maleHc":280,
"femaleHc":281,
"controlsAllAf":0.190317,
"controlsAllAn":30796,
"controlsAllAc":5861,
"lowComplexityRegion":true,
"failedFilter":true
}
FieldTypeNotes
coverageintaverage coverage (non-negative integer values)
allAffloatallele frequency for all populations. Range: 0 - 1.0
maleAffloatallele frequency for male population. Range: 0 - 1.0
femaleAffloatallele frequency for female population. Range: 0 - 1.0
controlsAllAffloatallele frequency for the controls subset. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
maleAcintallele count for male population. Integer.
femaleAcintallele count for female population. Integer.
controlsAllAcintallele count for the controls subset. Integer.
allAnintallele number for all populations. Non-zero integer.
maleAnintallele number for male population. Non-zero integer.
femaleAnintallele number for female population. Non-zero integer.
controlsAllAnintallele number for the controls subset. Non-zero integer.
allHcintcount of homozygous individuals for all populations. Non-negative integer.
maleHcintcount of homozygous individuals for male population. Non-negative integer.
femaleHcintcount of homozygous individuals for female population. Non-negative integer.
afrAffloatallele frequency for the African / African American population. Range: 0 - 1.0
afrAcintallele count for the African / African American population. Integer.
afrAnintallele number for the African / African American population. Non-zero integer.
afrHcintcount of homozygous individuals for African / African American population. Non-negative integer.
amrAffloatallele frequency for the Latino population. Range: 0 - 1.0
amrAcintallele count for the Latino population. Integer.
amrAnintallele number for the Latino population. Non-zero integer.
amrHcintcount of homozygous individuals for Latino population. Non-negative integer.
easAffloatallele frequency for the East Asian population. Range: 0 - 1.0
easAcintallele count for the East Asian population. Integer.
easAnintallele number for the East Asian population. Non-zero integer.
easHcintcount of homozygous individuals for East Asian population. Non-negative integer.
finAffloatallele frequency for the Finnish population. Range: 0 - 1.0
finAcintallele count for the Finnish population. Integer.
finAnintallele number for the Finnish population. Non-zero integer.
finHcintcount of homozygous individuals for Finnish population. Non-negative integer
nfeAffloatallele frequency for the Non-Finnish European population. Range: 0 - 1.0
nfeAcintallele count for the Non-Finnish European population. Integer.
nfeAnintallele number for the Non-Finnish European population. Non-zero integer.
nfeHcintcount of homozygous individuals for Non-Finnish European population. Non-negative integer
othAffloatallele frequency for the Other population. Range: 0 - 1.0
othAcintallele count for the Other population. Integer.
othAnintallele number for the Other population. Non-zero integer.
othHcintcount of homozygous individuals for Other population. Non-negative integer
asjAffloatallele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0
asjAcintallele count for the Ashkenazi Jewish population Integer.
asjAnintallele number for the Ashkenazi Jewish population. Non-zero integer.
asjHcintcount of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer
sasAffloatallele frequency for the South Asian population. Range: 0 - 1.0
sasAcintallele count for the South Asian population Integer.
sasAnintallele number for the South Asian population. Non-zero integer.
sasHcintcount of homozygous individuals for the South Asian population. Non-negative integer.
failedFilterboolTrue if this variant failed any filters (Note: we do not list the failed filters)
lowComplexityRegionboolTrue if this variant is located in a low complexity region.

dbSNP

"dbsnp":[
"rs1042821"
]
FieldTypeNotes
dbsnpstring arraydbSNP rsIDs

MITOMAP

"mitomap":[ 
{
"refAllele":"G",
"altAllele":"A",
"diseases":[
"Bipolar disorder",
"Melanoma"
],
"hasHomoplasmy":false,
"hasHeteroplasmy":true,
"status":"Reported",
"clinicalSignificance":"confirmed pathogenic",
"scorePercentile":83.30,
"numGenBankFullLengthSeqs":2,
"pubMedIds":["2316527","6299878","6301949"],
"isAlleleSpecific":true
}
]
FieldTypeNotes
refAllelestring
altAllelestring
diseasesstring arrayassociated diseases
hasHomoplasmyboolean
hasHeteroplasmyboolean
statusstringrecord status
clinicalSignificancestringpredicted pathogenicity
scorePercentilefloatMitoTIP score
numGenBankFullLengthSeqsinteger# of GenBank full-length sequences
pubMedIdsstring array
isAlleleSpecificbooleantrue when the current variant alternate allele matches the MITOMAP alternate allele

Primate AI

"primateAI":[
{
"hgnc":"TP53",
"scorePercentile":0.3,
}
]
FieldTypeNotes
hgncstring
scorePercentilefloatrange: 0 - 1.0

REVEL

"revel":{ 
"score":0.027
}
FieldTypeNotes
scorefloatRange: 0 - 1.0

Splice AI

"spliceAI":[ 
{
"hgnc":"BLCAP",
"acceptorGainDistance":-3,
"acceptorGainScore":0.3,
"donorLossDistance":7,
"donorLossScore":0.9
},
{
"hgnc":"NNAT",
"acceptorGainDistance":-1,
"acceptorGainScore":0.2,
"donorGainDistance":-2,
"donorGainScore":0.3
}
]
FieldTypeNotes
hgncstringHGNC gene symbol
acceptorGainDistanceint± bp from current position
acceptorGainScorefloatrange: 0 - 1.0. 1 decimal place
acceptorLossDistanceint± bp from current position
acceptorLossScorefloatrange: 0 - 1.0. 1 decimal place
donorGainDistanceint± bp from current position
donorGainScorefloatrange: 0 - 1.0. 1 decimal place
donorLossDistanceint± bp from current position
donorLossScorefloatrange: 0 - 1.0. 1 decimal place

Genes

"genes":[ 
{
"name":"MSH6",
"hgncId":7329,
"summary":"This gene encodes a member of the DNA mismatch repair MutS family. In E. coli, the MutS protein helps in the recognition of mismatched nucleotides prior to their repair. A highly conserved region of approximately 150 aa, called the Walker-A adenine nucleotide binding motif, exists in MutS homologs. The encoded protein heterodimerizes with MSH2 to form a mismatch recognition complex that functions as a bidirectional molecular switch that exchanges ADP and ATP as DNA mismatches are bound and dissociated. Mutations in this gene may be associated with hereditary nonpolyposis colon cancer, colorectal cancer, and endometrial cancer. Transcripts variants encoding different isoforms have been described. [provided by RefSeq, Jul 2013]",
/* this is where gene-level data sources can be found e.g. OMIM */
}
]
FieldTypeNotes
namestringHGNC gene symbol
hgncIdintHGNC ID
summarystringshort description of the gene from OMIM

OMIM

"omim":[ 
{
"mimNumber":600678,
"geneName":"MutS, E. coli, homolog of, 6",
"description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",
"phenotypes":[
{
"mimNumber":614350,
"phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",
"description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal dominant"
]
},
{
"mimNumber":608089,
"phenotype":"Endometrial cancer, familial",
"mapping":"molecular basis of the disorder is known"
},
{
"mimNumber":276300,
"phenotype":"Mismatch repair cancer syndrome",
"description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal recessive"
],
"comments" : [
"contribute to susceptibility to multifactorial disorders or to susceptibility to infection",
"unconfirmed or possibly spurious mapping"
]
}
]
}
]
FieldTypeNotes
mimNumberintOMIM ID for gene
geneNamestringgene name
descriptionstring
phenotypesobject arraysee Phenotype entry below

Phenotype

FieldTypeNotes
mimNumberint
phenotypestring
descriptionstring
mappingstringsee possible values below
inheritancestring arraysee possible values below
commentsstring arraysee possible values below

Mapping

  1. disorder was positioned by mapping of the wild type gene
  2. disease phenotype itself was mapped
  3. molecular basis of the disorder is known
  4. disorder is a chromosome deletion or duplication syndrome

Inheritance

  • autosomal recessive
  • autosomal dominant

Comments

  • contributes to the susceptibility to multifactorial disorders
  • variations that lead to apparently abnormal laboratory test values
  • unconfirmed mapping

gnomAD LoF Gene Metrics

"gnomAD":{ 
"pLi":1.00e0,
"pNull":8.94e-40,
"pRec":1.84e-16,
"synZ":-8.44e-2,
"misZ":5.96e-1,
"loeuf":1.13e0
}
FieldTypeNotes
pLifloatprobability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)
pNullfloatprobability of being completely tolerant of loss of function variation (observed = expected)
pRecfloatprobability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)
synZfloatcorrected synonymous Z score
misZfloatcorrected missense Z score
loeuffloatloss of function observed/expected upper bound fraction (LOEUF)
- - - - \ No newline at end of file diff --git a/3.14/index.html b/3.14/index.html deleted file mode 100644 index 2ecbfe5a..00000000 --- a/3.14/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Introduction | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

Nirvana provides clinical-grade annotation of genomic variants (SNVs, MNVs, insertions, deletions, indels, STRs, gene fusions, and SVs (including CNVs). It can be run as a stand-alone package, as an AWS Lambda function, or integrated into larger software tools that require variant annotation.

The input to Nirvana are VCFs and the output is a structured JSON representation of all annotation and sample information (as extracted from the VCF). Nirvana handles multiple alternate alleles and multiple samples with ease.

The software is being developed under a rigorous SDLC and testing process to ensure accuracy of the results and enable embedding in other software with regulatory needs. Nirvana uses a continuous integration pipeline where millions of variant annotations are monitored against baseline values daily.

Fun Fact

Nirvana is a backronym for NImble and Robust VAriant aNnotAtor

What does Nirvana annotate?

We use Sequence Ontology consequences to describe how each variant impacts a given transcript:

In addition, we also use external data sources to provide additional context for each variant:

Licensing

Code

Nirvana source code is provided under the GPLv3 license. Nirvana includes several third party packages provided under other open source licenses, please see Dependencies for additional details.

Data

The data used by Nirvana is publicly available, however some data sources have special restrictions on use by non-academic entities.

Nirvana Team

Active Team

The Nirvana team works on the core functionality, AWS annotation services, in addition to keeping the annotation data sources up-to-date.

Current members of the Nirvana team are listed in alphabetical order below.

Haochen Li

Active developer. Detail-oriented quick thinker that keeps cool even in the most stressful situations.

Michael Strömberg

Nirvana founder and now ever grateful Nirvana cheerleader to those who actually write code for it.

Rajat Shuvro Roy

Lead developer. Loves to speed up things and make services available to all interested users.

Honorary Alumni

Nirvana would never be what it is today without the huge contributions from these folks who have moved on to bigger and greater things.

Julien Lajugie

Julien is a legend around these parts. When he's not taking down opponents in Taekwondo or melting riffs in his rock band, he's demolishing bugs and making the world a better place.

Shuli Kang

Oncology bioinformatician from USC before joining our team at Illumina. Now working as a Senior Bioinformatics Scientist at Novartis Gene Therapies.

Yu Jiang

Biostatistics genius from Duke University before joining our team at Illumina. Now working as a Research Engineer at Facebook AI Research.
- - - - \ No newline at end of file diff --git a/3.14/introduction/covid19/index.html b/3.14/introduction/covid19/index.html deleted file mode 100644 index 9ca957e3..00000000 --- a/3.14/introduction/covid19/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Annotating COVID-19 | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

Annotating COVID-19

The Nirvana development team is mainly focused on providing annotations for the human genome. This focus allows us to maximize our resources towards understanding human health.

However, nothing in our architecture prevents us from supporting other genomes. Earlier this year, we had an opportunity to put that statement to the test - we added support for annotating the SARS-CoV-2 genome, the virus that causes the COVID-19 disease.

In addition to normal transcript annotation, we also supply:

  • allele frequencies
  • protein domains
SARS-CoV-2 Galaxy Project

The allele frequencies used by Nirvana were provided by the SARS-CoV-2 Galaxy Project. This is an international effort that provides ongoing analysis of COVID-19 using Galaxy, BioConda, and public research infrastructures.

Getting Nirvana

If you don't have Nirvana already, please consult our Getting Started page first.

Downloading the COVID-19 data files

Here's a data zip file containing new gene models, reference, and external data sources for SARS-CoV-2:

Just go to the directory that contains your Nirvana Data directory.

cd ~/Nirvana
curl -O https://illumina.github.io/NirvanaDocumentation/files/Covid19Data.zip
unzip Covid19Data.zip

Download a COVID-19 VCF file

Here's a COVID-19 VCF file you can play around with:

curl -O https://illumina.github.io/NirvanaDocumentation/files/Covid19Mutations.vcf.gz

Running Nirvana

Once you have downloaded the data sets, use the following command to annotate your VCF:

dotnet bin/Release/netcoreapp2.1/Nirvana.dll \
-c Data/Cache/SARS-CoV-2/SARS-CoV-2 \
--sd Data/SupplementaryAnnotation/SARS-CoV-2 \
-r Data/References/SARS-CoV-2.ASM985889v3.dat \
-i Covid19Mutations.vcf.gz \
-o Covid19Mutations
  • the -c argument specifies the cache prefix
  • the --sd argument specifies the supplementary annotation directory
  • the -r argument specifies the compressed reference path
  • the -i argument specifies the input VCF path
  • the -o argument specifies the output filename prefix

When running Nirvana, performance metrics are shown as it evaluates each chromosome in the input VCF file:

---------------------------------------------------------------------------
Nirvana (c) 2020 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0
---------------------------------------------------------------------------

Initialization Time Positions/s
---------------------------------------------------------------------------
Cache 00:00:00.0
SA Position Scan 00:00:00.0 1763

Reference Preload Annotation Variants/s
---------------------------------------------------------------------------
NC_045512 00:00:00.0 00:00:00.1 173

Summary Time Percent
---------------------------------------------------------------------------
Initialization 00:00:00.0 2.0 %
Preload 00:00:00.0 0.3 %
Annotation 00:00:00.1 6.0 %

Time: 00:00:01.5

The output will be a JSON file called Covid19Mutations.json.gz. Here's the full JSON file.

Investigating the Results

Here's an example of what a COVID-19 variant looks like in the JSON output:

{
"chromosome":"NC_045512.2",
"position":27323,
"refAllele":"C",
"altAlleles":[
"T"
],
"filters":[
"PASS"
],
"proteinDomains":[
{
"start":27202,
"end":27384,
"proteinId":"YP_009724394.1",
"domainId":"cl13556",
"domainName":"Sars6 super family",
"reciprocalOverlap":0.00546,
"annotationOverlap":0.00546
}
],
"variants":[
{
"vid":"NC_045512.2-27323-C-T",
"chromosome":"NC_045512.2",
"begin":27323,
"end":27323,
"refAllele":"C",
"altAllele":"T",
"variantType":"SNV",
"hgvsg":"NC_045512.2:g.27323C>T",
"alleleFrequency":{
"refAllele":"C",
"altAllele":"T",
"allAc":8,
"allAn":1058,
"allAf":0.007561
},
"transcripts":[
{
"transcript":"YP_009724394.1",
"source":"RefSeq",
"bioType":"protein_coding",
"codons":"tCt/tTt",
"aminoAcids":"S/F",
"cdnaPos":"122",
"cdsPos":"122",
"exons":"1/1",
"proteinPos":"41",
"geneId":"43740572",
"hgnc":"ORF6",
"consequence":[
"missense_variant"
],
"hgvsc":"YP_009724394.1:c.122C>T",
"hgvsp":"YP_009724394.1:p.(Ser41Phe)",
"proteinId":"YP_009724394.1"
},
{
"transcript":"YP_009724395.1",
"source":"RefSeq",
"bioType":"protein_coding",
"geneId":"43740573",
"hgnc":"ORF7a",
"consequence":[
"upstream_gene_variant"
],
"proteinId":"YP_009724395.1"
}
]
}
]
}
- - - - \ No newline at end of file diff --git a/3.14/introduction/dependencies/index.html b/3.14/introduction/dependencies/index.html deleted file mode 100644 index 306a9a85..00000000 --- a/3.14/introduction/dependencies/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Dependencies | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

Dependencies

All of the following dependencies have been included in this repository.

NameLicenseUsage
Amazon.LambdaApacheAWS extensions for .NET CLI
AWSSDKApacheAWS Lambda, S3, SNS support
Json.NETMITJASIX utility
libdeflateMITBlockCompression library
MoqBSDMocking framework for unit tests
NDesk.OptionsMIT/X11CommandLine library
xUnitApacheUnit testing framework
zlib-ngzlibBlockCompression library
zstdBSDBlockCompression library
- - - - \ No newline at end of file diff --git a/3.14/introduction/getting-started/index.html b/3.14/introduction/getting-started/index.html deleted file mode 100644 index 0576012d..00000000 --- a/3.14/introduction/getting-started/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Getting Started | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.14

Getting Started

Nirvana is written in C# using .NET Core (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files.

tip

Nirvana currently uses .NET Core 2.1 or later. Please make sure that you have the most current runtime from the .NET Core downloads page.

Quick Start

If you want to get started right away, we've created a script that downloads Nirvana, compiles it, and starts annotating a test file:

curl -O https://illumina.github.io/NirvanaDocumentation/files/TestNirvana.sh
sh ./TestNirvana.sh

We have verified that this script works on Windows (using Git Bash or WSL), Linux, and Mac OS X.

Getting Nirvana

Compile from Source

The following will grab the latest version of Nirvana from GitHub and compile it using the .NET Core compiler:

git clone https://github.com/Illumina/Nirvana.git
cd Nirvana
dotnet build -c Release

GitHub Release Notes

Alternatively, you can grab the latest binaries from our GitHub Releases page:

mkdir -p Nirvana/Data
cd Nirvana
unzip Nirvana-3.12.0-dotnet-2.1.0.zip

Docker

You can find us on Docker Hub under annotation/nirvana:

caution

We think Docker is fantastic. However, because our data files are usually accessed through a Docker volume, there is a noticeable performance penalty when running Nirvana in Docker.

mkdir -p Nirvana/Data
cd Nirvana
docker pull annotation/nirvana:3.9.1

For Docker, we have special instructions for running the Downloader:

sudo docker run --rm -it -v Data:/scratch annotation/nirvana:3.9.1 dotnet \
/opt/nirvana/Downloader.dll --ga GRCh37 -o /scratch

Similarly, we have special instructions for running Nirvana (Here's a toy VCF in case you need it):

sudo docker run --rm -it -v Data:/scratch annotation/nirvana:3.9.1 dotnet \
/opt/nirvana/Nirvana.dll -c /scratch/Cache/GRCh37/Both \
-r /scratch/References/Homo_sapiens.GRCh37.Nirvana.dat \
--sd /scratch/SupplementaryAnnotation/GRCh37 \
-i /scratch/HiSeq.10000.vcf.gz -o /scratch/HiSeq

Downloading the data files

To download the latest data sources (or update the ones that you already have), use the following command to automate the download from S3:

dotnet bin/Release/netcoreapp2.1/Downloader.dll \
--ga GRCh37 \
-o Data
  • the --ga argument specifies the genome assembly which can be GRCh37, GRCh38, or both.
  • the -o argument specifies the output directory
Glitches in the Matrix

Every once in a while, the download process does not go smoothly. Perhaps the internet connection cut out or you ran out of disk space. The Downloader attempts to detect these situations by checking the file sizes at the very end. If you see that a file was marked truncated, try fixing the root cause and running the downloader again.

tip

From time to time, you can re-run the Downloader to get the latest annotation files. It will only download the files that changed.

Download a test VCF file

Here's a toy VCF file you can play around with:

curl -O https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.vcf.gz

Running Nirvana

Once you have downloaded the data sets, use the following command to annotate your VCF:

dotnet bin/Release/netcoreapp2.1/Nirvana.dll \
-c Data/Cache/GRCh37/Both \
--sd Data/SupplementaryAnnotation/GRCh37 \
-r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \
-i HiSeq.10000.vcf.gz \
-o HiSeq.10000
  • the -c argument specifies the cache prefix
  • the --sd argument specifies the supplementary annotation directory
  • the -r argument specifies the compressed reference path
  • the -i argument specifies the input VCF path
  • the -o argument specifies the output filename prefix

When running Nirvana, performance metrics are shown as it evaluates each chromosome in the input VCF file:

---------------------------------------------------------------------------
Nirvana (c) 2020 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0
---------------------------------------------------------------------------

Initialization Time Positions/s
---------------------------------------------------------------------------
Cache 00:00:01.8
SA Position Scan 00:00:00.7 12902

Reference Preload Annotation Variants/s
---------------------------------------------------------------------------
chr1 00:00:02.3 00:00:04.5 2176

Summary Time Percent
---------------------------------------------------------------------------
Initialization 00:00:02.6 16.5 %
Preload 00:00:02.3 15.2 %
Annotation 00:00:04.5 29.0 %

Time: 00:00:14.7

The output will be a JSON file called HiSeq.10000.json.gz. Here's the full JSON file.

- - - - \ No newline at end of file diff --git a/3.16/core-functionality/canonical-transcripts/index.html b/3.16/core-functionality/canonical-transcripts/index.html deleted file mode 100644 index c2cf7c6c..00000000 --- a/3.16/core-functionality/canonical-transcripts/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Canonical Transcripts | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

Canonical Transcripts

Overview

One of the more polarizing topics within annotation is the notion of canonical transcripts. Because of alternative splicing, we often have several transcripts for each gene. In the human genome, there are an average of 3.4 transcripts per gene (Tung, 2020). As scientists, we seem to have a need for identifying a representative example of a gene - even if there's no biological basis for the motivation.

Golden Helix Blog

A few years ago, the guys over at Golden Helix wrote an excellent post about the pitfalls and issues surrounding the identification of canonical transcripts: What’s in a Name: The Intricacies of Identifying Variants.

In Nirvana, we wanted to identify an algorithm for determining the canonical transcript and apply it consistently to all of our transcript data sources.

Known Algorithms

UCSC

UCSC publishes a list of canonical transcripts in its knownCanonical table which is available via the TableBrowser. Of the RefSeq data sources, it was the only one we could find that provided canonical transcripts:

The canonical transcript is defined as either the longest CDS, if the gene has translated transcripts, or the longest cDNA.

If you were to implement this and compare it with the knownCanonical table, you would see a lot of exceptions to the rule.

Ensembl

The Ensembl glossary states:

The canonical transcript is used in the gene tree analysis in Ensembl and does not necessarily reflect the most biologically relevant transcript of a gene. For human, the canonical transcript for a gene is set according to the following hierarchy:

  1. Longest CCDS translation with no stop codons.
  2. If no (1), choose the longest Ensembl/Havana merged translation with no stop codons.
  3. If no (2), choose the longest translation with no stop codons.
  4. If no translation, choose the longest non-protein-coding transcript.

ACMG

From the ACMG Guidelines for the Interpretation of Sequence Variants:

A reference transcript for each gene should be used and provided in the report when describing coding variants. The transcript should represent either the longest known transcript and/or the most clinically relevant transcript.

ClinVar

From the ClinVar paper:

When there are multiple transcripts for a gene, ClinVar selects one HGVS expression to construct a preferred name. By default, this selection is based on the first reference standard transcript identified by the RefSeqGene/LRG (Locus Reference Genomic) collaboration.

Unified Approach

Our approach is almost identical to the one Golden Helix discussed in their article:

  1. If we're looking at RefSeq, only consider NM & NR transcripts as candidates for canonical transcripts.
  2. Sort the transcripts in the following order:
    1. Locus Reference Genomic (LRG) entries occur before non-LRG entries
    2. Descending CDS length
    3. Descending transcript length
    4. Ascending accession number
  3. Grab the first entry
- - - - \ No newline at end of file diff --git a/3.16/core-functionality/gene-fusions/index.html b/3.16/core-functionality/gene-fusions/index.html deleted file mode 100644 index 0d070b17..00000000 --- a/3.16/core-functionality/gene-fusions/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Gene Fusion Detection | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

Gene Fusion Detection

Overview

Gene fusions often result from large genomic rearrangements such as structural variants. While WGS secondary analysis pipelines typically contain alignment and variant calling stages, very few of them contain dedicated gene fusion callers. When they are included, they are usually associated with RNA-Seq pipelines where gene fusions can be readily observed.

Since gene fusions are frequently observed in cancer and since many sequencing experiments do not include paired RNA-Seq data, we have added gene fusion detection and annotation to Nirvana.

The rich diversity in gene fusion architectures and their likely mechanisms can be seen below:

Publication

Kumar-Sinha, C., Kalyana-Sundaram, S. & Chinnaiyan, A.M. Landscape of gene fusions in epithelial cancers: seq and ye shall find. Genome Med 7, 129 (2015)

Approach

Nirvana uses structural variant calls to evaluate if they form either putative intra-chromosomal or inter-chromosomal gene fusions. Let's consider two transcripts, NM_014206.3 (TMEM258) and NM_013402.4 (FADS1). Both of these genes are on the reverse strand in the genome. The vertical bar indicates the breakpoint where these transcripts are fused:

TMEM258 &amp; FADS1 transcripts

The above explains where the transcripts are fused together, but it doesn't explain in which orientation. By using the directionality encoded in the translocation breakend, we can rearrange these two transcripts in four ways:

TMEM258 &amp; FADS1 gene fusions

Only two of the combinations yields a fusion contains both the transcription start site (TSS) and the stop codon. In one case, we can even detect an in-frame gene fusion.

Interpreting translocation breakends

At first glance, translocation breakends are a bit daunting. However, once you understand how they work, they're actually quite simple. For more information, we recommend reading section 5.4 in the VCF 4.2 specification.

REFALTMeaning
st[p[piece extending to the right of p is joined after t
st]p]reverse comp piece extending left of p is joined after t
s]p]tpiece extending to the left of p is joined before t
s[p[treverse comp piece extending right of p is joined before t

Variant Types

Specifically we can identify gene fusions from the following structural variant types:

  • deletions (<DEL>)
  • tandem_duplications (<DUP:TANDEM>)
  • inversions (<INV>)
  • translocation breakpoints (AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[)

Criteria

The following criteria must be met for Nirvana to identify a gene fusion:

  1. After accounting for gene orientation and genomic rearrangements, both transcripts must have the same orientation
  2. Both transcripts must be from the same transcript source (i.e. we won't mix and match between RefSeq and Ensembl transcripts)
  3. Both transcripts must belong to different genes
  4. Both transcripts cannot have a coding region that already overlaps without the variant (i.e. in cases where two genes naturally overlap, we don't want to call a gene fusion)

ETV6/RUNX1 Example

ETV6/RUNX1 is the most common gene fusion in childhood B-cell precursor acute lymphoblastic leukemia (ALL). Patients with this translocation are associated with a good prognosis and excellent response to treatment.

VCF

Here's a simplified representation of the translocation breakends called by the Manta structural variant caller:

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
chr12 12026270 . C [chr21:36420865[C . PASS SVTYPE=BND
chr12 12026305 . A A]chr21:36420571] . PASS SVTYPE=BND
chr21 36420571 . C C]chr12:12026305] . PASS SVTYPE=BND
chr21 36420865 . C [chr12:12026270[C . PASS SVTYPE=BND

When you put these calls together, the resulting genomic rearrangement looks something like this:

JSON Output

The annotation for the first variant in the VCF looks like this:

{
"chromosome": "chr12",
"position": 12026270,
"refAllele": "C",
"altAlleles": [
"[chr21:36420865[C"
],
"filters": [
"PASS"
],
"cytogeneticBand": "12p13.2",
"clingen": [
{
"chromosome": "12",
"begin": 173786,
"end": 34835837,
"variantType": "copy_number_gain",
"id": "nsv995956",
"clinicalInterpretation": "pathogenic",
"phenotypes": [
"Decreased calvarial ossification",
"Delayed gross motor development",
"Feeding difficulties",
"Frontal bossing",
"Morphological abnormality of the central nervous system",
"Patchy alopecia"
],
"phenotypeIds": [
"HP:0002007",
"HP:0002011",
"HP:0002194",
"HP:0002232",
"HP:0005474",
"HP:0011968",
"MedGen:C0232466",
"MedGen:C1862862",
"MedGen:CN001816",
"MedGen:CN001820",
"MedGen:CN001989",
"MedGen:CN004852"
],
"observedGains": 1,
"validated": true
}
],
"variants": [
{
"vid": "12-12026270-C-[chr21:36420865[C",
"chromosome": "chr12",
"begin": 12026270,
"end": 12026270,
"isStructuralVariant": true,
"refAllele": "C",
"altAllele": "[chr21:36420865[C",
"variantType": "translocation_breakend",
"cosmicGeneFusions": [
{
"id": "COSF2245",
"numSamples": 249,
"geneSymbols": [
"ETV6",
"RUNX1"
],
"hgvsr": "ENST00000396373.4(ETV6):r.1_1283::ENST00000300305.3(RUNX1):r.504_6222",
"histologies": [
{
"name": "acute lymphoblastic B cell leukaemia",
"numSamples": 169
},
{
"name": "acute lymphoblastic leukaemia",
"numSamples": 80
}
],
"sites": [
{
"name": "haematopoietic and lymphoid tissue",
"numSamples": 249
}
],
"pubMedIds": [
7761424,
7780150,
8609706,
8751464,
8982044,
9067587,
9207408,
9226156,
9628428,
10463610,
10774753,
11091202,
12621238,
12661004,
12750722,
15104290,
15642392,
24557455,
26925663
]
}
],
"fusionCatcher": [
{
"genes": {
"first": {
"hgnc": "ETV6",
"isOncogene": true
},
"second": {
"hgnc": "RUNX1",
"isOncogene": true
}
},
"somaticSources": [
"DepMap CCLE",
"Cancer Genome Project",
"ChimerKB 4.0",
"ChimerPub 4.0",
"ChimerSeq 4.0",
"Known",
"Mitelman DB",
"OncoKB",
"TICdb"
]
}
],
"transcripts": [
{
"transcript": "ENST00000396373.4",
"source": "Ensembl",
"bioType": "protein_coding",
"introns": "5/7",
"geneId": "ENSG00000139083",
"hgnc": "ETV6",
"consequence": [
"transcript_variant",
"unidirectional_gene_fusion"
],
"geneFusions": [
{
"transcript": "ENST00000437180.1",
"bioType": "protein_coding",
"intron": 2,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000437180.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"
},
{
"transcript": "ENST00000300305.3",
"bioType": "protein_coding",
"intron": 1,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000300305.3(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"
},
{
"transcript": "ENST00000482318.1",
"bioType": "nonsense_mediated_decay",
"intron": 2,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000482318.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"
},
{
"transcript": "ENST00000486278.2",
"bioType": "protein_coding",
"intron": 2,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000486278.2(RUNX1):r.?_-15+274::ENST00000396373.4(ETV6):r.1009+3367_?"
},
{
"transcript": "ENST00000455571.1",
"bioType": "protein_coding",
"intron": 2,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000455571.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"
},
{
"transcript": "ENST00000475045.2",
"bioType": "protein_coding",
"intron": 11,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000475045.2(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"
},
{
"transcript": "ENST00000416754.1",
"bioType": "protein_coding",
"intron": 2,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000416754.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"
}
],
"isCanonical": true,
"proteinId": "ENSP00000379658.3"
},
{
"transcript": "NM_001987.4",
"source": "RefSeq",
"bioType": "protein_coding",
"introns": "5/7",
"geneId": "2120",
"hgnc": "ETV6",
"consequence": [
"transcript_variant",
"unidirectional_gene_fusion"
],
"geneFusions": [
{
"transcript": "NM_001754.4",
"bioType": "protein_coding",
"intron": 2,
"geneId": "861",
"hgnc": "RUNX1",
"hgvsr": "NM_001754.4(RUNX1):r.?_58+274::NM_001987.4(ETV6):r.1009+3367_?"
}
],
"isCanonical": true,
"proteinId": "NP_001978.1"
}
]
}
]
}
FieldTypeNotes
transcriptstringtranscript ID
bioTypestringdescriptions of the biotypes from Ensembl
exonintexon that contained fusion breakpoint
intronintintron that contained fusion breakpoint
geneIdstringgene ID. e.g. ENSG00000116062
hgncstringgene symbol. e.g. MSH6
hgvsrstringHGVS RNA nomenclature

Gene Fusion Data Sources

To provide more context to our gene fusions, we provide the following gene fusion data sources:

Consequences

When a gene fusion is identified, we add the following Sequence Ontology consequence:

              "consequence": [
"transcript_variant",
"unidirectional_gene_fusion"
],

Gene Fusions Section

The geneFusions section is contained within the object of the originating transcript. It will contain all the pairwise gene fusions that obey the criteria outline above. In the case of ENST00000396373.4, there 7 other Ensembl transcripts that would produce a gene fusion. For NM_001987.4, there was only one transcript (NM_001754.4) that produce a gene fusion.

For each originating transcript, we report the following for each partner transcript:

  • transcript ID
  • gene ID
  • HGNC gene symbol
  • transcript bio type (e.g. protein_coding)
  • intron or exon number containing the breakpoint
  • HGVS RNA notation
tip

Before Nirvana 3.15, we provided HGVS coding notation. However, HGVS r. notation is more appropriate for these types fusion splicing events (see HGVS SVD-WG007).

          "geneFusions": [
{
"transcript": "NM_001754.4",
"bioType": "protein_coding",
"intron": 2,
"geneId": "861",
"hgnc": "RUNX1",
"hgvsr": "NM_001754.4(RUNX1):r.?_58+274::NM_001987.4(ETV6):r.1009+3367_?"
}
],

The HGVS RNA notation above indicates that the gene fusion starts with NM_001754.4 (RUNX1) until CDS position 58 and continues with NM_001987.4 (ETV6). 1009+3367 indicates that the fusion occurred 3367 bp within intron 2.

- - - - \ No newline at end of file diff --git a/3.16/core-functionality/mnv-recomposition/index.html b/3.16/core-functionality/mnv-recomposition/index.html deleted file mode 100644 index 259c389c..00000000 --- a/3.16/core-functionality/mnv-recomposition/index.html +++ /dev/null @@ -1,22 +0,0 @@ - - - - - - - -MNV Recomposition | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

MNV Recomposition

Overview

Most annotation tools handle variants independently. The problem with this approach is that nearby variants could affect the same codon leading to a very different annotation. For example, consider the following example (Danecek, 2017):

When handled independently, the two variants (C→T & G→A) would be annotated as missense annotations. However, if we consider them together, the resulting MNV would yield a stop gain.

By default, Nirvana identifies these types of cases where two or more SNVs would affect the same codon. In addition, it's able to perform this operation on VCFs containing large numbers of samples (we've tested this on 2,500+ samples using the 1000 Genomes Project VCF files).

Publication

Petr Danecek, Shane A McCarthy, BCFtools/csq: haplotype-aware variant consequences, Bioinformatics, Volume 33, Issue 13, 1 July 2017, Pages 2037–2039

Supported variant types

At the moment, Nirvana only supports recomposing multiple SNVs into an MNV. The Danecek paper makes a compelling case for supporting frameshifting variants paired with frame-restoring variants. We've also received requests for supporting the recomposition of an SNV with insertions and deletions. While this is something we've looked into, it represents functionality that many of our clinical customers are not yet comfortable with.

Criteria

Nirvana will recompose a set of SNVs if two or more SNVs are located in the same codon for any codon in any of the overlapping transcripts.

The following criteria must also be met for at least one sample:

  1. Genotypes are provided for the VCF variants and all variants are in phase or homozygous variant.
  2. All the available phase set IDs are the same (homozygous variants are available to all phase sets)
  3. The genotype ploidy for all the variants are the same.
  4. No unsupported variant type (i.e. insertion or deletion) overlaps the recomposed variants
  5. The first and last base in at least one of the recomposed alleles must be non-reference.

Examples

During variant recomposition, if two SNVs affect the same codon, it becomes the seed codon. If there are SNVs in the adjacent codons, they will be aggregated into the seed codon.

  • Three SNVs in two adjacent codons. The recomposed alternate allele is ATAG: -

  • Three SNVs in two adjacent codons (larger distance). The recomposed alternate allele is ATATCC: -

  • Nirvana can use multiple reading frames to aggregate the seed codon. In this example, the seed codon is highlighted in green. If we look at reading frame 1, we see that the T→A variant occurs in the ACT codon. The adjacent codon to the left also has a variant C→T. As a result, there can be up to four bases between SNVs when aggregating the flanking codons. The recomposed alternate allele is TTCACATAGCACTCAC: -

  • Nothing will be recomposed if there's no seed codon: -

Multiple Samples

Recomposing variants while handling multiple samples can be complex. The recomposition criteria described above often leads to sample-specific recomposed variants. Here we show the recomposition of three variants with sample-specific criteria marked in bold:

POSREFALTSample 1Sample 2Sample 3
Decomposed Variant 1100AC0|10|11|1
Decomposed Variant 2101CG0/11|10|0
Decomposed Variant 3102TA1|1.0|1
Recomposed Variant 1100ACAG, CG.1|2.
Recomposed Variant 2100ACTCCT, CCA..1|2

In the example above, the heterozygous genotype in sample 1 at position 101 would prevent the MNVs from being recomposed. Similarly, the unknown genotype for sample 2 at position 102 would produce a smaller MNV than the one expressed for sample 3.

Phase Sets

Homozygous variants, same phase set

Recomposed phase set becomes . since homozygous variants belong to all phase sets.

POSREFALTGenotypePhase Set
Decomposed Variant 1100AT1|1567
Decomposed Variant 2101CG1|1567
Recomposed Variant100ACTG1|1.

Mixing phased and unphased variants

POSREFALTGenotypePhase Set
Decomposed Variant 1100AT0|1567
Decomposed Variant 2101CG1/1.
Recomposed Variant100ACAG,TG1|2567

Variants in different phase sets

POSREFALTGenotypePhase Set
Decomposed Variant 1100AT0|1567
Decomposed Variant 2101CG1|1890
Recomposed Variant100ACAG,TG1|2.

Unphased homozygous variants

POSREFALTGenotypePhase Set
Decomposed Variant 1100AT1/1.
Decomposed Variant 2101CG1/1.
Recomposed Variant100ACTG1/1.

Homozygous variants are not commutative

POSREFALTGenotypePhase Set
Decomposed Variant 1100AT0|1567
Decomposed Variant 2101CG1|1567
Decomposed Variant 3102GT0|1890

In this example, the homozygous variant at position 101 cannot bridge the gap between other two variants since there could be a switching error between phase sets 567 & 890. As a result, we have to create two overlapping MNVs:

POSREFALTGenotypePhase Set
Recomposed Variant 1100ACAG, TG1|2567
Recomposed Variant 2101CGGG, GT1|2890

Conflicting Genotypes

JSON Output

Given the following VCF entries:

#CHROM  POS ID  REF ALT QUAL    FILTER  INFO    FORMAT  S1  S2  S3
chr1 12861477 . T C . PASS . GT:PS 0/0:. 0/0:. 0|1:12861477
chr1 12861478 . G A . PASS . GT:PS 0/0:. 0/0:. 0|1:12861477

Each original variant would be annotated as usual. The difference is that both will now have a isDecomposedVariant flag set to true in addition to an entry in the linkedVids field that points to the new MNV:

{
"chromosome":"chr1",
"position":12861477,
"refAllele":"T",
"altAlleles":[
"C"
],
"filters":[
"PASS"
],
"samples":[
{
"genotype":"0/0",
},
{
"genotype":"0/0",
},
{
"genotype":"0|1",
}
],
"variants":[
{
"vid":"1-12861477-T-C",
"chromosome":"chr1",
"begin":12861477,
"end":12861477,
"refAllele":"T",
"altAllele":"C",
"variantType":"SNV",
"isDecomposedVariant":true,
"linkedVids":[
"1-12861477-TG-CA"
],
"hgvsg":"NC_000001.11:g.12861477T>C",
"transcripts":[ ... ]
}
]
},
{
"chromosome":"chr1",
"position":12861478,
"refAllele":"G",
"altAlleles":[
"A"
],
"filters":[
"PASS"
],
"samples":[
{
"genotype":"0/0",
},
{
"genotype":"0/0",
},
{
"genotype":"0|1",
}
],
"variants":[
{
"vid":"1-12861478-G-A",
"chromosome":"chr1",
"begin":12861478,
"end":12861478,
"refAllele":"G",
"altAllele":"A",
"variantType":"SNV",
"isDecomposedVariant":true,
"linkedVids":[
"1-12861477-TG-CA"
],
"hgvsg":"NC_000001.11:g.12861478G>A",
"transcripts":[ ... ]
}
]
}

The recomposed variant gets a separate entry where the isRecomposedVariant flag is set to true and the linkedVids field links to the constituent SNVs:

{
"chromosome":"chr1",
"position":12861478,
"refAllele":"G",
"altAlleles":[
"A"
],
"filters":[
"PASS"
],
"samples":[
{
"genotype":"0/0",
},
{
"genotype":"0/0",
},
{
"genotype":"0|1",
}
],
"variants":[
{
"vid":"1-12861478-G-A",
"chromosome":"chr1",
"begin":12861478,
"end":12861478,
"refAllele":"G",
"altAllele":"A",
"variantType":"SNV",
"isDecomposedVariant":true,
"linkedVids":[
"1-12861477-TG-CA"
],
"hgvsg":"NC_000001.11:g.12861478G>A",
"transcripts":[ ... ]
}
]
}
Recomposed QUAL, FILTER, and GQ

Although the example above does not demonstrate it, Nirvana tries to set the quality score, filter, and genotype quality (GQ) for the recomposed variant. The QUAL score is calculated to be the minimum QUAL score for all the constituent SNVs. The same method is used for the genotype quality (GQ) scores. For the filters field, PASS will be used if all constituent variants passed their filters, otherwise we set it to FilteredVariantsRecomposed.

- - - - \ No newline at end of file diff --git a/3.16/core-functionality/variant-ids/index.html b/3.16/core-functionality/variant-ids/index.html deleted file mode 100644 index d5e1fdd3..00000000 --- a/3.16/core-functionality/variant-ids/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Variant IDs | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

Variant IDs

Overview

Many downstream tools use a variant identifier to store annotation results. We've standardized on using variant identifiers (VIDs) that originated from the notation used by the Broad Institute.

The Broad VID scheme is not only simple, but it has the advantage that a user could create a bare bones VCF entry from the information captured in the identifier. One of the limitations of the Broad VID scheme is that it does not define how to handle structural variants. Our VID scheme attempts to fill that gap.

Conventions
  • all chromosomes use Ensembl style notation (i.e. 22 instead of chr22)
  • for a reference variant (i.e. no alt allele), replace the period (.) with the reference base
  • padding bases are used, neither the reference nor alternate allele can be empty
  • some large variant callers lazily output N for the reference allele. If this is the case, replace it with the true reference base

Small Variants

VCF Examples

chr1    66507   .   T   A   184.45  PASS    .
chr1 66521 . T TATATA 144.53 PASS .
chr1 66572 . GTA G,GTACTATATATTATA 45.45 PASS .

Format

chromosomepositionreference allelealternate allele

VID Examples

  • 1-66507-T-A
  • 1-66521-T-TATATA
  • 1-66572-GTA-G
  • 1-66572-G-GTACTATATATTA

Translocation Breakends

VCF Example

chr1    2617277 .   A   AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[  .   PASS    SVTYPE=BND

Format

chromosomepositionreference allelealternate allele

VID Example

  • 1-2617277-A-AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[

All Other Structural Variants

VCF Examples

chr1    1000    .   G   <ROH>   .   PASS    END=3001000;SVTYPE=ROH
chr1 1350082 . G <DEL> . PASS END=1351320;SVTYPE=DEL
chr1 1477854 . C <DUP:TANDEM> . PASS END=1477984;SVTYPE=DUP
chr1 1477968 . T <INS> . PASS END=1477968;SVTYPE=INS
chr1 1715898 . N <DUP> . PASS SVTYPE=CNV;END=1750149
chr1 2650426 . N <DEL> . PASS SVTYPE=CNV;END=2653074
chr2 321682 . T <INV> . PASS SVTYPE=INV;END=421681
chr20 2633403 . G <STR2> . PASS END=2633421

Format

chromosomepositionend positionreference allelealternate alleleSVTYPE

VID Examples

  • 1-1000-3001000-G-<ROH>-ROH
  • 1-1350082-1351320-G-<DEL>-DEL
  • 1-1477854-1477984-C-<DUP:TANDEM>-DUP
  • 1-1477968-1477968-T-<INS>-INS
  • 1-1715898-1750149-A-<DUP>-CNV (replace the N with A)
  • 1-2650426-2653074-N-<DEL>-CNV (keep the N)
  • 2-321682-421681-T-<INV>-INV
  • 20-2633403-2633421-G-<STR2>-STR
- - - - \ No newline at end of file diff --git a/3.16/data-sources/1000Genomes-snv-json/index.html b/3.16/data-sources/1000Genomes-snv-json/index.html deleted file mode 100644 index f5336304..00000000 --- a/3.16/data-sources/1000Genomes-snv-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -1000Genomes-snv-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

1000Genomes-snv-json

"oneKg":{
"allAf":0.200879,
"afrAf":0.210287,
"amrAf":0.139769,
"easAf":0.275794,
"eurAf":0.181909,
"sasAf":0.173824,
"allAn":5008,
"afrAn":1322,
"amrAn":694,
"easAn":1008,
"eurAn":1006,
"sasAn":978,
"allAc":1006,
"afrAc":278,
"amrAc":97,
"easAc":278,
"eurAc":183,
"sasAc":170
}
FieldTypeNotes
allAffloatallele frequency for all populations. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
allAnintallele number for all populations. Non-zero integer.
afrAffloatallele frequency for the African super population. Range: 0 - 1.0
afrAcintallele count for the African super population. Integer.
afrAnintallele number for the African super population. Non-zero integer.
amrAffloatallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
amrAcintallele count for the Ad Mixed American super population. Integer.
amrAnintallele number for the Ad Mixed American super population. Non-zero integer.
easAffloatallele frequency for the East Asian super population. Range: 0 - 1.0
easAcintallele count for the East Asian super population. Integer.
easAnintallele number for the East Asian super population. Non-zero integer.
eurAffloatallele frequency for the European super population. Range: 0 - 1.0
eurAcintallele count for the European super population. Integer.
eurAnintallele number for the European super population. Non-zero integer.
sasAffloatallele frequency for the South Asian super population. Range: 0 - 1.0
sasAcintallele count for the South Asian super population. Integer.
sasAnintallele number for the South Asian super population. Non-zero integer.
- - - - \ No newline at end of file diff --git a/3.16/data-sources/1000Genomes-sv-json/index.html b/3.16/data-sources/1000Genomes-sv-json/index.html deleted file mode 100644 index 8c4ce2b1..00000000 --- a/3.16/data-sources/1000Genomes-sv-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -1000Genomes-sv-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

1000Genomes-sv-json

"oneKg":[
{
"chromosome":"1",
"begin":1595369,
"end":1612441,
"variantType": "copy_number_variation",
"id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",
"allAn": 5008,
"allAc": 2702,
"allAf": 0.539537,
"afrAf": 0.6052,
"amrAf": 0.3675,
"eurAf": 0.5357,
"easAf": 0.5368,
"sasAf": 0.5797,
"reciprocalOverlap": 0.07555
}
],
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring
idstring
allAnfloating pointallele number for all populations. Non-zero integer.
allAcfloating pointallele count for all populations. Integer.
allAffloating pointallele frequency for all populations. Range: 0 - 1.0
afrAffloating pointallele frequency for the African super population. Range: 0 - 1.0
amrAffloating pointallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
eurAffloating pointallele frequency for the European super population. Range: 0 - 1.0
easAfintegerallele frequency for the East Asian super population. Range: 0 - 1.0
sasAfintegerallele frequency for the South Asian super population. Range: 0 - 1.0
reciprocalOverlapfloating pointrange: 0 - 1.
- - - - \ No newline at end of file diff --git a/3.16/data-sources/1000Genomes/index.html b/3.16/data-sources/1000Genomes/index.html deleted file mode 100644 index 803106a9..00000000 --- a/3.16/data-sources/1000Genomes/index.html +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - - -1000 Genomes | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

1000 Genomes

Overview

The goal of the 1000 Genomes Project was to find most genetic variants with frequencies of at least 1% in the populations studied. It was the first project to sequence the genomes of a large number of people, to provide a comprehensive resource on human genetic variation. Data from the 1000 Genomes Project was quickly made available to the worldwide scientific community through freely accessible public databases.

Publication

Sudmant, P., Rausch, T., Gardner, E. et al. An integrated map of structural variation in 2,504 human genomes. Nature 526, 75–81 (2015). https://doi.org/10.1038/nature15394

Populations

Small Variants

VCF File Parsing

The original VCF files come with allele frequency fields (e.g. ALL_AF, AMR_AF) but we recompute them using allele counts and allele numbers in order to get 6 digit precision. The allele counts and allele numbers (e.g. AMR_AC, AMR_AN) are not expressed in the INFO field. Instead the genotypes need to be parsed to compute that information. Our team converted the original data to VCF entries with allele counts and allele numbers like the following.

#CHROM  POS ID  REF ALT QUAL    FILTER  INFO
1 15274 rs62636497 A G,T 100 PASS AC=1739,3210;AF=0.347244,0.640974;AN=5008;NS=2504;DP=23255;EAS_AF=0.4812,0.5188;AMR_AF=0.2752,0.7205;AFR_AF=0.323,0.6369;EUR_AF=0.2922,0.7078;SAS_AF=0.3497,0.6472;AA=g|||;VT=SNP;MULTI_ALLELIC;EAS_AN=1008;EAS_AC=485,523;EUR_AN=1006;EUR_AC=294,712;AFR_AN=1322;AFR_AC=427,842;AMR_AN=694;AMR_AC=191,500;SAS_AN=978;SAS_AC=342,633

The ancestral allele, if it exists, is the first value in the pipe separated AA fields (the Indel specific REF, ALT, IndelType fields are ignored).

We parse the VCF file and extract the following fields from INFO:

  • AA
  • AC
  • AN
  • EAS_AN
  • AMR_AN
  • AFR_AN
  • EUR_AN
  • SAS_AN
  • EAS_AC
  • AMR_AC
  • AFR_AC
  • EUR_AC
  • SAS_AC

Conflict Resolution

We have observed conflicting allele frequency information in the source. Take the following example:

#CHROM  POS ID  REF ALT QUAL    FILTER  INFO
1 20505705 rs35377696 C CTCTG,CTG,CTGTG 100 PASS AC=46,1513,152;AF=0.0091853,0.302117,0.0303514;
1 20505705 rs35377696 C CTG 100 PASS AC=4;AF=0.000798722;

That is, the variant 1-20505705-C-CTG has conflicting entries. To get an idea of how frequently we observe this, here is a table summarizing ChrX and all chromosomes. Note that almost all such entries are found in ChrX.

Chromosome# of alleles# of conflicting allelespercentage
chrX83480027330.33%
Total2141309827430.013%

Currently, we removed the allele frequency of the conflicting allele (i.e., insertion TG in the example) but keep allele frequencies of all other alleles in the VCF line.

Potential Alternate Solutions

  • Remove all alleles that are contained in the vcf lines which have conflicting allele. (Recommended by 1000 genome group Holly Zheng-Bradley, 7/29/2015)
  • Recalculate the allele frequency for the conflicting allele.
  • Pick the allele frequency that has the highest data support.

Download URL

GRCh37 -GRCh38

JSON Output

"oneKg":{
"allAf":0.200879,
"afrAf":0.210287,
"amrAf":0.139769,
"easAf":0.275794,
"eurAf":0.181909,
"sasAf":0.173824,
"allAn":5008,
"afrAn":1322,
"amrAn":694,
"easAn":1008,
"eurAn":1006,
"sasAn":978,
"allAc":1006,
"afrAc":278,
"amrAc":97,
"easAc":278,
"eurAc":183,
"sasAc":170
}
FieldTypeNotes
allAffloatallele frequency for all populations. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
allAnintallele number for all populations. Non-zero integer.
afrAffloatallele frequency for the African super population. Range: 0 - 1.0
afrAcintallele count for the African super population. Integer.
afrAnintallele number for the African super population. Non-zero integer.
amrAffloatallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
amrAcintallele count for the Ad Mixed American super population. Integer.
amrAnintallele number for the Ad Mixed American super population. Non-zero integer.
easAffloatallele frequency for the East Asian super population. Range: 0 - 1.0
easAcintallele count for the East Asian super population. Integer.
easAnintallele number for the East Asian super population. Non-zero integer.
eurAffloatallele frequency for the European super population. Range: 0 - 1.0
eurAcintallele count for the European super population. Integer.
eurAnintallele number for the European super population. Non-zero integer.
sasAffloatallele frequency for the South Asian super population. Range: 0 - 1.0
sasAcintallele count for the South Asian super population. Integer.
sasAnintallele number for the South Asian super population. Non-zero integer.

Structural Variants

VCF File Parsing

The VCF files contain entries like the following:

#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103
22 16050654 esv3647175;esv3647176;esv3647177;esv3647178 A <CN0>,<CN2>,<CN3>,<CN4> 100 PASS AC=9,87,599,20;AF=0.00179712,0.0173722,0.119609,0.00399361;AN=5008;CS=DUP_gs;END=16063474;NS=2504;SVTYPE=CNV;DP=22545;EAS_AF=0.001,0.0169,0.2361,0.0099;AMR_AF=0,0.0101,0.219,0.0072;AFR_AF=0.0061,0.0363,0.0053,0;EUR_AF=0,0.007,0.0944,0.003;SAS_AF=0,0.0082,0.1094,0.002;VT=SV GT 3|0 0|0 0|0 0|0 0|0 0|0 0|4

Please note that, CNVs are allele-specific. For example, HG00096 is effectively copy number 4, which would be a net gain on chr22.

1000 Genomes contains 5 types of structural variants:

  • CNV
  • DEL
  • DUP
  • INS
  • INV

Since data of 1000 genomes is provided in VCF format, we assume that the coordinates follow the vcf format, i.e., there is a padding base for symbolic alleles. So all the interval can be interpreted as [BEGIN+1, END]. -Similarly, for all other variant types except insertion, END is far larger than BEGIN. The distribution of BEGIN and END for insertions is summarized below.

Insertion issues

  • END = BEGIN for 6/165
  • END = BEGIN+2 for 93/165
  • END = BEGIN+3 for 11/165
  • END = BEGIN+4 for 11/165
  • END – BEGIN range from 5 to 1156 for others.

Converting VCF svTypes to SO sequence alterations

The svType will be captured in our JSON file under the sequenceAlteration key. Here's the translation we'll use according to svType in 1000 Genomes.

svTypeAlternative Alleles contain <CN*>sequenceAlteration
ALUFALSEmobile_element_insertion
DUPTRUEcopy_number_gain
CNVTRUEcopy_number_gain (observed_gains >0 and observed_losses =0)
copy_number_loss (observed_gains = 0 and observed_losses > 0)
copy_number_variation (otherwise)
DELTRUEcopy_number_loss
LINE1FALSEmobile_element_insertion
SVAFALSEmobile_element_insertion
INVFALSEinversion
INSFALSEinsertion

Exceptions

We discard structural variants without END

#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103
21 9495848 esv3646347 A <INS:ME:LINE1> 100 PASS AC=1543;AF=0.308107;AN=5008;CS=L1_umary;MEINFO=LINE1,5669,6005,+;NS=2504;SVLEN=336;SVTYPE=LINE1;TSD=null;DP=20015;EAS_AF=0.3125;AMR_AF=0.2911;AFR_AF=0.3026;EUR_AF=0.2922;SAS_AF=0.3395;VT=SV GT 0|0 1|1 1|0 0|1 1|0 1|0 0|0

CNVs in chrY

  • No other types of structural variants exist in chrY
  • Since copy number is provided in genotype field, we directly parse the copy number from "CN" field.
  • For most CNVs in chrY, the reference copy number is 1, but the refence number for CNVs in segmental duplication sites is 2 (<CN2> in the 2nd example). All segmental duplication calls have identifiers starting with GS_SD_M2.
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  HG00096 HG00101 HG00103 HG00105 HG00107 HG00108
Y 2888555 CNV_Y_2888555_3014661 T <CN2> 100 PASS AC=1;AF=0.000817661;AN=1223;END=3014661;NS=1233;SVTYPE=CNV;AMR_AF=0.0000;AFR_AF=0.0000;EUR_AF=0.0000;SAS_AF=0.0019;EAS_AF=0.0000;VT=SV GT:CN:CNL:CNP:CNQ:GP:GQ:PL 0:1:-1000,0,-58.45:-1000,0,-61.55:99:0,-61.55:99:0,585 0:1:-296.36,0,-16.6:-300.46,0,-19.7:99:0,-19.7:99:0,166 0:1:-1000,0,-39.44:-1000,0,-42.54:99:0,-42.54:99:0,394
Y 6128381 GS_SD_M2_Y_6128381_6230094_Y_9650284_9752225 C <CN1>,<CN3> 100 PASS AC=4,2;AF=0.00327065,0.00163532;AN=1223;END=6230094;NS=1233;SVTYPE=CNV;AMR_AF=0.0029,0.0029;AFR_AF=0.0016,0.0016;EUR_AF=0.0000,0.0000;SAS_AF=0.0038,0.0000;EAS_AF=0.0000,0.0000;VT=SV;EX_TARGET GT:CN:CNL:CNP:CNQ:GP:GQ 0:2:-1000,-138.78,0,-38.53:-1000,-141.27,0,-41.33:99:0,-141.27,-41.33:99 0:2:-1000,-53.32,0,-17.85:-1000,-55.81,0,-20.64:99:0,-55.81,-20.64:99 0:2:-1000,-71.83,0,-32.5:-1000,-74.32,0,-35.29:99:0,-74.32,-35.29:99 0:2:-1000,-60.96,0,-20.29:-1000,-63.45,0,-23.08:99:0,-63.45,-23.08:99 0:2:-1000,-77.6,0,-31.45:-1000,-80.09,0,-34.24:99:0,-80.09,-34.24:99

JSON Output

"oneKg":[
{
"chromosome":"1",
"begin":1595369,
"end":1612441,
"variantType": "copy_number_variation",
"id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",
"allAn": 5008,
"allAc": 2702,
"allAf": 0.539537,
"afrAf": 0.6052,
"amrAf": 0.3675,
"eurAf": 0.5357,
"easAf": 0.5368,
"sasAf": 0.5797,
"reciprocalOverlap": 0.07555
}
],
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring
idstring
allAnfloating pointallele number for all populations. Non-zero integer.
allAcfloating pointallele count for all populations. Integer.
allAffloating pointallele frequency for all populations. Range: 0 - 1.0
afrAffloating pointallele frequency for the African super population. Range: 0 - 1.0
amrAffloating pointallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
eurAffloating pointallele frequency for the European super population. Range: 0 - 1.0
easAfintegerallele frequency for the East Asian super population. Range: 0 - 1.0
sasAfintegerallele frequency for the South Asian super population. Range: 0 - 1.0
reciprocalOverlapfloating pointrange: 0 - 1.
- - - - \ No newline at end of file diff --git a/3.16/data-sources/amino-acid-conservation-json/index.html b/3.16/data-sources/amino-acid-conservation-json/index.html deleted file mode 100644 index 145382d6..00000000 --- a/3.16/data-sources/amino-acid-conservation-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -amino-acid-conservation-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

amino-acid-conservation-json

"aminoAcidConservation": {
"scores": [0.34]
}
FieldTypeNotes
aminoAcidConservationobject
scoresobject array of doublespercent conserved with respect to human amino acid residue. Range: 0.01 - 1.00
- - - - \ No newline at end of file diff --git a/3.16/data-sources/amino-acid-conservation/index.html b/3.16/data-sources/amino-acid-conservation/index.html deleted file mode 100644 index dc2ebd49..00000000 --- a/3.16/data-sources/amino-acid-conservation/index.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - -Amino Acid Conservation | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

Amino Acid Conservation

Overview

Amino acid conservation scores are obtained from multiple alignments of vertebrate exomes to the human ones. The score indicate the frequency with which a particular AA is observed in Humans.

Publication

Siepel A, Bejerano G, Pedersen JS, Hinrichs AS, Hou M, Rosenbloom K, Clawson H, Spieth J, Hillier LW, Richards S, et al. Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. Genome Res. 2005 Aug;15(8):1034-50. (http://www.genome.org/cgi/doi/10.1101/gr.3715005)

FASTA File

The exon alignments are provided in FASTA files as follows:

>ENST00000641515.2_hg38_1_2 3 0 0 chr1:65565-65573+
MKK
>ENST00000641515.2_panTro4_1_2 3 0 0 chrUn_GL393541:146907-146915+
MKK
>ENST00000641515.2_gorGor3_1_2 3 0 0
---
>ENST00000641515.2_ponAbe2_1_2 3 0 0 chr15:99141417-99141425-
MKK
>ENST00000641515.2_hg38_2_2 324 0 0 chr1:69037-70008+
VTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLLHFFGGSEMVILIAMGFDRYIAICKPLHYTTIMCGNACVGIMAVTWGIGFLHSVSQLAFAVHLLFCGPNEVDSFYCDLPRVIKLACTDTYRLDIMVIANSGVLTVCSFVLLIISYTIILMTIQHRPLDKSSKALSTLTAHITVVLLFFGPCVFIYAWPFPIKSLDKFLAVFYSVITPLLNPIIYTLRNKDMKTAIRQLRKWDAHSSVKFZ
>ENST00000641515.2_panTro4_2_2 324 0 0 chrUn_GL393541:151333-152303+

Parsing FASTA

For each Ensembl transcript, we will need to aggregate all the exons together for each of the 100 species. From there, we should get a full alignment that can be used to determine conservation. For example, for ENST00000641515.2 we have:

Human (hg38) MKKVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLL
Chimp MKKVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFL-MLFFVFYGGIVFGNLLIVRIVVSDSHLHSPMYFLLANLSLIDLSLCSVTAPKMITDFFSQRKVISFKGCLVQIFLL
Gorilla ----------------------------------------------------------------------------------------------------------------------
Orangutan MKKVTAEAISWNESTSKTNNSVVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVIIVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLL
Gibbon ----------------------------------------------------------------------------------------------------------------------
Rhesus MKKVTEAAISWNESTSETNNSIVTEFIFLGLSDSQELQIFLFVLFLVFYGGIVFGNLLIVITVVSDSHLHSPMYLLLANLSVVDLSLSSVTAPKMITDFFSQRKAISFKGCLVQIFLL
Macaque MKKVTEAAISWNESTSETNNSIVTEFIFLGLSDSQELQIFLFVLFLVFYGGIVFGNLLIVITVVSDSHLHSPMYLLLANLSVIDLSLSSVTAPKMITDFFSQRKAISFKGCLVQIFLL

If we look at position 6, we see that humans have an Alanine (A) residue. This residue is shared by Chimp and Orangutan. However, Rhesus and Macaque have a Glutamic acid (E) residue at that position. Moreover, Gorilla and Gibbon don't even have data for that transcript. -For position 6, we would say that we have 43% conservation (3/7) since three organisms share the same residue as humans.

Assigning scores to Nirvana transcripts

The source FASTA file comes with Ensembl/UCSC transcript ids of the transcripts used for alignments. The Nirvana cache has RefSeq and Ensembl transcripts and our first attempt was to map the given Ensembl/UCSC ids to their equivalent RefSeq/Ensembl ids. This attempt was unsuccessful since UCSC Table Browser provided mapping without version numbers. So we proceeded as follows:

  • Take proteins which have a unique mapping (and hence one set of conservation scores). For ones that mapped to both ChrX and ChrY, we accepted the one from ChrX.
  • A Nirvana transcript having an exact peptide sequence match with a uniquely aligned protein is assigned the corresponding conservation scores.

Unfortunately this left us with a very small number of transcripts having conservation scores.

GRCh37

  • Source FASTA contained 41957 protein alignments.
  • 38165 proteins had unique scores.
  • 88 aligned proteins existed in Nirvana cache.
  • 118 transcripts had conservation scores.

GRCh38

  • Source FASTA contained 110024 protein alignments.
  • 88961 proteins had unique scores.
  • 11688 aligned proteins existed in Nirvana cache.
  • 12098 transcripts had conservation scores.

Download URL

GRCh37: http://hgdownload.soe.ucsc.edu/goldenPath/hg19/multiz100way/alignments/knownGene.exonAA.fa.gz

GRCh38: http://hgdownload.soe.ucsc.edu/goldenPath/hg38/multiz100way/alignments/knownGene.exonAA.fa.gz

JSON Output

Conservation scores are reported in the transcript section. One score is reported for each alt allele

"aminoAcidConservation": {
"scores": [0.34]
}
FieldTypeNotes
aminoAcidConservationobject
scoresobject array of doublespercent conserved with respect to human amino acid residue. Range: 0.01 - 1.00
- - - - \ No newline at end of file diff --git a/3.16/data-sources/clingen-dosage-json/index.html b/3.16/data-sources/clingen-dosage-json/index.html deleted file mode 100644 index 4f936eea..00000000 --- a/3.16/data-sources/clingen-dosage-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -clingen-dosage-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

clingen-dosage-json

"clingenDosageSensitivityMap": [{
"chromosome": "15",
"begin": 30900686,
"end": 32153204,
"haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",
"reciprocalOverlap": 0.00147,
"annotationOverlap": 0.33994
},
{
"chromosome": "15",
"begin": 31727418,
"end": 32153204,
"haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "dosage sensitivity unlikely",
"reciprocalOverlap": 0.00147,
"annotationOverlap": 1
}]
FieldTypeNotes
clingenDosageSensitivityMapobject array
chromosomestringEnsembl-style chromosome names
begininteger1-based position
endinteger1-based position
haploinsufficiencystringsee possible values below
triplosensitivitystring(same as haploinsufficiency) 
reciprocalOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).
annotationOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).

haploinsufficiency and triplosensitivity

  • no evidence to suggest that dosage sensitivity is associated with clinical phenotype
  • little evidence suggesting dosage sensitivity is associated with clinical phenotype
  • emerging evidence suggesting dosage sensitivity is associated with clinical phenotype
  • sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype
  • gene associated with autosomal recessive phenotype
  • dosage sensitivity unlikely
- - - - \ No newline at end of file diff --git a/3.16/data-sources/clingen-gene-validity-json/index.html b/3.16/data-sources/clingen-gene-validity-json/index.html deleted file mode 100644 index 2806f637..00000000 --- a/3.16/data-sources/clingen-gene-validity-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -clingen-gene-validity-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

clingen-gene-validity-json

"clingenGeneValidity":[
{
"diseaseId":"MONDO_0007893",
"disease":"Noonan syndrome with multiple lentigines",
"classification":"no reported evidence",
"classificationDate":"2018-06-07"
},
{
"diseaseId":"MONDO_0015280",
"disease":"cardiofaciocutaneous syndrome",
"classification":"no reported evidence",
"classificationDate":"2018-06-07"
}
]
FieldTypeNotes
clingenGeneValidityobject
diseaseIdstringMonarch Disease Ontology ID (MONDO)
diseasestringdisease label
classificationstringsee below for possible values
classificationDatestringyyyy-MM-dd

classification

  • no reported evidence
  • disputed
  • limited
  • moderate
  • definitive
  • strong
  • refuted
- - - - \ No newline at end of file diff --git a/3.16/data-sources/clingen-json/index.html b/3.16/data-sources/clingen-json/index.html deleted file mode 100644 index 100b5c0a..00000000 --- a/3.16/data-sources/clingen-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -clingen-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

clingen-json

"clingen":[
{
"chromosome":"17",
"begin":525,
"end":14667519,
"variantType":"copy_number_gain",
"id":"nsv996083",
"clinicalInterpretation":"pathogenic",
"observedGains":1,
"validated":true,
"phenotypes":[
"Intrauterine growth retardation"
],
"phenotypeIds":[
"HP:0001511",
"MedGen:C1853481"
],
"reciprocalOverlap":0.00131
},
{
"chromosome":"17",
"begin":45835,
"end":7600330,
"variantType":"copy_number_loss",
"id":"nsv869419",
"clinicalInterpretation":"pathogenic",
"observedLosses":1,
"validated":true,
"phenotypes":[
"Developmental delay AND/OR other significant developmental or morphological phenotypes"
],
"reciprocalOverlap":0.00254
}
]
FieldTypeNotes
clingenobject array
chromosomestringEnsembl-style chromosome names
begininteger1-based position
endinteger1-based position
variantTypestringAny of the sequence alterations defined here.
idstringIdentifier from the data source. Alternatively a VID
clinicalInterpretationstringsee possible values below
observedGainsintegerRange: 0 - (231 - 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.
observedLossesintegerRange: 0 - (231 - 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.
validatedboolean
phenotypesstring arrayDescription of the phenotype.
phenotypeIdsstring arrayDescription of the phenotype IDs.
reciprocalOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).

clinicalInterpretation

  • benign
  • curated benign
  • curated pathogenic
  • likely benign
  • likely pathogenic
  • path gain
  • path loss
  • pathogenic
  • uncertain
- - - - \ No newline at end of file diff --git a/3.16/data-sources/clingen/index.html b/3.16/data-sources/clingen/index.html deleted file mode 100644 index 2ba769ff..00000000 --- a/3.16/data-sources/clingen/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -ClinGen | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

ClinGen

Overview

ClinGen is a National Institutes of Health (NIH)-funded resource dedicated to building a central resource that defines the clinical relevance of genes and variants for use in precision medicine and research.

Publication

Heidi L. Rehm, Ph.D., Jonathan S. Berg, M.D., Ph.D., Lisa D. Brooks, Ph.D., Carlos D. Bustamante, Ph.D., James P. Evans, M.D., Ph.D., Melissa J. Landrum, Ph.D., David H. Ledbetter, Ph.D., Donna R. Maglott, Ph.D., Christa Lese Martin, Ph.D., Robert L. Nussbaum, M.D., Sharon E. Plon, M.D., Ph.D., Erin M. Ramos, Ph.D., Stephen T. Sherry, Ph.D., and Michael S. Watson, Ph.D., for ClinGen. ClinGen The Clinical Genome Resource. N Engl J Med 2015; 372:2235-2242 June 4, 2015 DOI: 10.1056/NEJMsr1406261.

ISCA Regions

TSV Extraction

ClinGen contains only copy number variation variants, since the coordinates in ClinGen original file follow the same rule as BED format, the coordinates had to be adjusted to [BEGIN+1, END].

#bin    chrom   chromStart      chromEnd        name    score   strand  thickStart      thickEnd        attrCount       attrTags        attrVals
nsv530705 1 564405 8597804 0 1 copy_number_loss pathogenic False Developmental delay AND/OR other significant developmental or morphological phenotypes
nsv530706 1 564424 3262790 0 1 copy_number_loss pathogenic False Abnormal facial shape,Abnormality of cardiac morphology,Global developmental delay,Muscular hypotonia HP:0001252,HP:0001263,HP:0001627,HP:0001999,MedGen:CN001147,MedGen:CN001157,MedGen:CN001482,MedGen:CN001810
nsv530707 1 564424 7068738 0 1 copy_number_loss pathogenic False Abnormality of cardiac morphology,Cleft upper lip,Failure to thrive,Global developmental delay,Intrauterine growth retardation,Microcephaly,Short stature HP:0000204,HP:0000252,HP:0001263,HP:0001508,HP:0001511,HP:0001627,HP:0004322,MedGen:C0349588,MedGen:C1845868,MedGen:C1853481,MedGen:C2364119,MedGen:CN000197,MedGen:CN001157,MedGen:CN001482
nsv533512 1 564435 649748 0 1 copy_number_loss benign False Developmental delay AND/OR other significant developmental or morphological phenotypes
nsv931338 1 714078 4958499 0 1 copy_number_loss pathogenic False Developmental delay AND/OR other significant developmental or morphological phenotypes
nsv530300 1 728138 5066371 1 0 copy_number_gain pathogenic False Abnormality of cardiac morphology,Cleft palate,Global developmental delay HP:0000175,HP:0001263,HP:0001627,MedGen:C2240378,MedGen:CN001157,MedGen:CN001482

Status levels

  • benign
  • curated benign
  • curated pathogenic
  • likely benign
  • likely pathogenic
  • path gain
  • path loss
  • pathogenic
  • uncertain

Parsing

We parse the ClinGen tsv file and extract the following:

  • chrom
  • chromStart (note this a 0-based coordinate)
  • chromEnd
  • attrTags
  • attrVals

attrTags and attrVals are comma separated lists. attrTags contains the field keys and attrVals contains the field values. We will parse the following keys from the two fields:

  • parent (this will be used as the ID in our JSON output)
  • clinical_int
  • validated
  • phenotype (this should be a string array)
  • phenotype_id (this should be a string array)

Observed losses and observed gains will be calculated from entries that share a common parent ID.

  • variants with a common parent ID and same coordinates are grouped
    • calculated observed losses, observed gains for each group
    • Clinical significance and validation status are collapsed using the priority strategy described below
  • Variants with the same parent ID can have different coordinates (mapped to hg38)
    • nsv491508 : chr14:105583663-106881350 and chr14:105605043-106766076 (only one example)
    • we kept both variants

Conflict Resolution

Clinical significance priority

When there are a mixture of variants belonging to the same parent ID, we will choose the most pathogenic clinical significance from the available values. i.e. if 3 samples were deemed pathogenic and 2 samples were likely pathogenic, we would list the variant as pathogenic.

Priority (high to low)

  • Priority
  • Pathogenic
  • Likely pathogenic
  • Benign
  • Likely benign
  • Uncertain significance

Validation Priority

When there are a mixture of variants belonging to same parent ID, we will set the validation status to true if any of the variants were validated.

Download URL

https://cirm.ucsc.edu/cgi-bin/hgTrackUi?db=hg19&g=iscaComposite

JSON Output

"clingen":[
{
"chromosome":"17",
"begin":525,
"end":14667519,
"variantType":"copy_number_gain",
"id":"nsv996083",
"clinicalInterpretation":"pathogenic",
"observedGains":1,
"validated":true,
"phenotypes":[
"Intrauterine growth retardation"
],
"phenotypeIds":[
"HP:0001511",
"MedGen:C1853481"
],
"reciprocalOverlap":0.00131
},
{
"chromosome":"17",
"begin":45835,
"end":7600330,
"variantType":"copy_number_loss",
"id":"nsv869419",
"clinicalInterpretation":"pathogenic",
"observedLosses":1,
"validated":true,
"phenotypes":[
"Developmental delay AND/OR other significant developmental or morphological phenotypes"
],
"reciprocalOverlap":0.00254
}
]
FieldTypeNotes
clingenobject array
chromosomestringEnsembl-style chromosome names
begininteger1-based position
endinteger1-based position
variantTypestringAny of the sequence alterations defined here.
idstringIdentifier from the data source. Alternatively a VID
clinicalInterpretationstringsee possible values below
observedGainsintegerRange: 0 - (231 - 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.
observedLossesintegerRange: 0 - (231 - 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.
validatedboolean
phenotypesstring arrayDescription of the phenotype.
phenotypeIdsstring arrayDescription of the phenotype IDs.
reciprocalOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).

clinicalInterpretation

  • benign
  • curated benign
  • curated pathogenic
  • likely benign
  • likely pathogenic
  • path gain
  • path loss
  • pathogenic
  • uncertain

Dosage Sensitivity Map

The Clinical Genome Resource (ClinGen) consortium is curating genes and regions of the genome to assess whether there is evidence to support that these genes/regions are dosage sensitive and should be targeted on a cytogenomic array. Nirvana reports these annotations for overlapping SVs.

Publication

Riggs ER, Nelson T, Merz A, Ackley T, Bunke B, Collins CD, Collinson MN, Fan YS, Goodenberger ML, Golden DM, Haglund-Hazy L, Krgovic D, Lamb AN, Lewis Z, Li G, Liu Y, Meck J, Neufeld-Kaiser W, Runke CK, Sanmann JN, Stavropoulos DJ, Strong E, Su M, Tayeh MK, Kokalj Vokac N, Thorland EC, Andersen E, Martin CL. Copy number variant discrepancy resolution using the ClinGen dosage sensitivity map results in updated clinical interpretations in ClinVar. Hum Mutat. 2018 Nov;39(11):1650-1659. doi: 10.1002/humu.23610. PMID: 30095202; PMCID: PMC7374944.

TSV Source files

Regions

#ClinGen Region Curation Results
#07 May,2019
#Genomic Locations are reported on GRCh38 (hg38): GCF_000001405.36
#https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen
#to create link: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/clingen_region.cgi?id=key
#ISCA ID ISCA Region Name cytoBand Genomic Location Haploinsufficiency Score Haploinsufficiency Description Haploinsufficiency PMID1 Haploinsufficiency PMID2 Haploinsufficiency PMID3 Triplosensitivity Score Triplosensitivity Description Triplosensitivity PMID1 Triplosensitivity PMID2 Triplosensitivity PMID3 Date Last Evaluated Loss phenotype OMIM ID Triplosensitive phenotype OMIM ID
ISCA-46299 Xp11.22 region (includes HUWE1) Xp11.22 tbd 0 No evidence available 3 Sufficient evidence for dosage pathogenicity 22840365 20655035 26692240 2018-11-19
ISCA-46295 15q13.3 recurrent region (D-CHRNA7 to BP5) (includes CHRNA7 and OTUD7A) 15q13.3 chr15:31727418-32153204 3 Sufficient evidence for dosage pathogenicity 19898479 20236110 22775350 40 Dosage sensitivity unlikely 26968334 22420048 2018-05-10
ISCA-46291 7q11.23 recurrent distal region (includes HIP1, YWHAG) 7q11.23 chr7:75528718-76433859 2 Some evidence for dosage pathogenicity 21109226 16971481 1 Little evidence for dosage pathogenicity 21109226 27867344 2018-12-31
ISCA-46290 Xp11.22p11.23 recurrent region (includes SHROOM4) Xp11.22-p11.23 chrX: 48447780-52444264 0 No evidence available 3 Sufficient evidence for dosage pathogenicity 19716111 21418194 25425167 2017-12-14 300801

Genes

#ClinGen Gene Curation Results
#24 May,2019
#Genomic Locations are reported on GRCh37 (hg19): GCF_000001405.13
#https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen
#to create link: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/clingen_gene.cgi?sym=Gene Symbol
#Gene Symbol Gene ID cytoBand Genomic Location Haploinsufficiency Score Haploinsufficiency Description Haploinsufficiency PMID1 Haploinsufficiency PMID2 Haploinsufficiency PMID3 Triplosensitivity Score Triplosensitivity Description Triplosensitivity PMID1 Triplosensitivity PMID2 Triplosensitivity PMID3 Date Last Evaluated Loss phenotype OMIM ID Triplosensitive phenotype OMIM ID
A4GALT 53947 22q13.2 chr22:43088121-43117307 30 Gene associated with autosomal recessive phenotype 0 No evidence available 2014-12-11 111400
AAGAB 79719 15q23 chr15:67493013-67547536 3 Sufficient evidence for dosage pathogenicity 23064416 23000146 0 No evidence available 2013-02-28 148600

Dosage Rating System

RatingPossible Clinical Interpretation
0No evidence to suggest that dosage sensitivity is associated with clinical phenotype
1Little evidence suggesting dosage sensitivity is associated with clinical phenotype
2Emerging evidence suggesting dosage sensitivity is associated with clinical phenotype
3Sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype
30Gene associated with autosomal recessive phenotype
40Dosage sensitivity unlikely

Reference: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/help.shtml

Download URL

ftp://ftp.clinicalgenome.org/

JSON Output

"clingenDosageSensitivityMap": [{
"chromosome": "15",
"begin": 30900686,
"end": 32153204,
"haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",
"reciprocalOverlap": 0.00147,
"annotationOverlap": 0.33994
},
{
"chromosome": "15",
"begin": 31727418,
"end": 32153204,
"haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "dosage sensitivity unlikely",
"reciprocalOverlap": 0.00147,
"annotationOverlap": 1
}]
FieldTypeNotes
clingenDosageSensitivityMapobject array
chromosomestringEnsembl-style chromosome names
begininteger1-based position
endinteger1-based position
haploinsufficiencystringsee possible values below
triplosensitivitystring(same as haploinsufficiency) 
reciprocalOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).
annotationOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).

haploinsufficiency and triplosensitivity

  • no evidence to suggest that dosage sensitivity is associated with clinical phenotype
  • little evidence suggesting dosage sensitivity is associated with clinical phenotype
  • emerging evidence suggesting dosage sensitivity is associated with clinical phenotype
  • sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype
  • gene associated with autosomal recessive phenotype
  • dosage sensitivity unlikely

Gene-Disease Validity

The ClinGen Gene-Disease Clinical Validity curation process involves evaluating the strength of evidence supporting or refuting a claim that variation in a particular gene causes a particular disease. Nirvana reports these annotations for genes in the genes section of the JSON.

Publication

Strande NT, Riggs ER, Buchanan AH, et al. Evaluating the Clinical Validity of Gene-Disease Associations: An Evidence-Based Framework Developed by the Clinical Genome Resource. Am J Hum Genet. 2017;100(6):895-906. doi:10.1016/j.ajhg.2017.04.015

Source TSV

The source data comes in a CSV file that we convert to a TSV as follows:

CLINGEN GENE VALIDITY CURATIONS
FILE CREATED: 2019-05-28
WEBPAGE: https://search.clinicalgenome.org/kb/gene-validity
+++++++++++,++++++++++++++,+++++++++++++,++++++++++++++++++,+++++++++,++++++++++++++,+++++++++++++,+++++++++++++++++++
GENE SYMBOL,GENE ID (HGNC),DISEASE LABEL,DISEASE ID (MONDO),SOP,CLASSIFICATION,ONLINE REPORT,CLASSIFICATION DATE
+++++++++++,++++++++++++++,+++++++++++++,++++++++++++++++++,+++++++++,++++++++++++++,+++++++++++++,+++++++++++++++++++
A2ML1,HGNC:23336,Noonan syndrome with multiple lentigines,MONDO_0007893,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/59b87033-dd91-4f1e-aec1-c9b1f5124b16--2018-06-07T14:37:47,2018-06-07T14:37:47.175Z
A2ML1,HGNC:23336,cardiofaciocutaneous syndrome,MONDO_0015280,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/fc3c41d8-8497-489b-a350-c9e30016bc6a--2018-06-07T14:31:03,2018-06-07T14:31:03.696Z
A2ML1,HGNC:23336,Costello syndrome,MONDO_0009026,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/ea72ba8d-cf62-44bc-86be-da64e3848eba--2018-06-07T14:34:05,2018-06-07T14:34:05.324Z

Download URL

https://search.clinicalgenome.org/kb/gene-validity.csv

Conflict Resolution

Multiple Classifications

Here is an example of multiple classifications.

$ grep MONDO_0010192 ClinGen-Gene-Disease-Summary-2019-12-02.csv  | grep EDNRB
EDNRB,HGNC:3180,Waardenburg syndrome type 4A,MONDO_0010192,SOP6,Moderate,https://search.clinicalgenome.org/kb/gene-validity/d7abbd45-7915-437b-849b-dea876bfc2f5--2018-05-08T04:00:00,2018-05-08T04:00:00.000Z
EDNRB,HGNC:3180,Waardenburg syndrome type 4A,MONDO_0010192,SOP6,Limited,https://search.clinicalgenome.org/kb/gene-validity/73ee9727-60c1-40fd-830f-08c2b513d2ee--2018-05-08T04:00:00,2018-05-08T04:00:00.000Z

In such cases, we select the more severe classification.

Multiple Dates

$ grep MONDO_0016419 ClinGen-Gene-Disease-Summary-2019-12-02.csv  | grep MUTYH
MUTYH,HGNC:7527,hereditary breast carcinoma,MONDO_0016419,SOP4,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/9904,2017-05-24T00:00:00
MUTYH,HGNC:7527,hereditary breast carcinoma,MONDO_0016419,SOP4,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/9902,2017-05-25T00:00:00

If the classifications are the same, we should select the latest classification date.

JSON Output

"clingenGeneValidity":[
{
"diseaseId":"MONDO_0007893",
"disease":"Noonan syndrome with multiple lentigines",
"classification":"no reported evidence",
"classificationDate":"2018-06-07"
},
{
"diseaseId":"MONDO_0015280",
"disease":"cardiofaciocutaneous syndrome",
"classification":"no reported evidence",
"classificationDate":"2018-06-07"
}
]
FieldTypeNotes
clingenGeneValidityobject
diseaseIdstringMonarch Disease Ontology ID (MONDO)
diseasestringdisease label
classificationstringsee below for possible values
classificationDatestringyyyy-MM-dd

classification

  • no reported evidence
  • disputed
  • limited
  • moderate
  • definitive
  • strong
  • refuted
- - - - \ No newline at end of file diff --git a/3.16/data-sources/clinvar-json/index.html b/3.16/data-sources/clinvar-json/index.html deleted file mode 100644 index 8fd34938..00000000 --- a/3.16/data-sources/clinvar-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -clinvar-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

clinvar-json

"clinvar":[
{
"id":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"significance":[
"benign"
],
"refAllele":"G",
"altAllele":"A",
"lastUpdatedDate":"2020-03-01",
"isAlleleSpecific":true
},
{
"id":"RCV000030258.4",
"variationId":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"alleleOrigins":[
"germline"
],
"refAllele":"G",
"altAllele":"A",
"phenotypes":[
"Lynch syndrome"
],
"medGenIds":[
"C1333990"
],
"omimIds":[
"120435"
],
"significance":[
"benign"
],
"lastUpdatedDate":"2017-05-01",
"isAlleleSpecific":true
}
]
FieldTypeNotes
idstringClinVar ID
variationIdstringClinVar VCV ID
reviewStatusstringsee possible values below
alleleOriginsstring arraysee possible values below
refAllelestring
altAllelestring
phenotypesstring array
medGenIdsstring arrayMedGen IDs
omimIdsstring arrayOMIM IDs
orphanetIdsstring arrayOrphanet IDs
significancestring arraysee possible values below
lastUpdatedDatestringyyyy-MM-dd
pubMedIdsstring arrayPubMed IDs
isAlleleSpecificbooltrue when the current variant alternate allele matches the ClinVar alternate allele

reviewStatus:

  • no assertion provided
  • no assertion criteria provided
  • criteria provided, single submitter
  • practice guideline
  • classified by multiple submitters
  • criteria provided, conflicting interpretations
  • criteria provided, multiple submitters, no conflicts
  • no interpretation for the single variant

alleleOrigins:

  • unknown
  • other
  • germline
  • somatic
  • inherited
  • paternal
  • maternal
  • de-novo
  • biparental
  • uniparental
  • not-tested
  • tested-inconclusive

significance:

  • uncertain significance
  • not provided
  • benign
  • likely benign
  • likely pathogenic
  • pathogenic
  • drug response
  • histocompatibility
  • association
  • risk factor
  • protective
  • affects
  • conflicting data from submitters
  • other
  • no interpretation for the single variant
  • conflicting interpretations of pathogenicity
- - - - \ No newline at end of file diff --git a/3.16/data-sources/clinvar/index.html b/3.16/data-sources/clinvar/index.html deleted file mode 100644 index a3e9cff1..00000000 --- a/3.16/data-sources/clinvar/index.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - -ClinVar | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

ClinVar

Overview

ClinVar is a freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence. ClinVar thus facilitates access to and communication about the relationships asserted between human variation and observed health status, and the history of that interpretation.

Publication

Melissa J Landrum, Jennifer M Lee, Mark Benson, Garth R Brown, Chen Chao, Shanmuga Chitipiralla, Baoshan Gu, Jennifer Hart, Douglas Hoffman, Wonhee Jang, Karen Karapetyan, Kenneth Katz, Chunlei Liu, Zenith Maddipatla, Adriana Malheiro, Kurt McDaniel, Michael Ovetsky, George Riley, George Zhou, J Bradley Holmes, Brandi L Kattman, Donna R Maglott, ClinVar: improving access to variant interpretations and supporting evidence, Nucleic Acids Research, 46, Issue D1, 4 January 2018, Pages D1062–D1067, https://doi.org/10.1093/nar/gkx1153

RCV File

Example

Here's a full RCV entry.

Parsing

In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output.

ID

<ClinVarSet>
<ReferenceClinVarAssertion>
<ClinVarAccession Acc="RCV000000001" Version="2">
</ClinVarSet>

The Acc and Version fields are merged to form the ID (RCV000000001.2)

LastUpdatedDate

<ClinVarSet>
<ReferenceClinVarAssertion DateCreated="2012-08-13" DateLastUpdated="2016-02-17" ID="57604" >
</ClinVarSet>

Significance

<ClinVarSet>
<ReferenceClinVarAssertion>
<ClinicalSignificance DateLastEvaluated="1996-04-01">
<ReviewStatus>no assertion criteria provided</ReviewStatus>
<Description>Pathogenic</Description>
</ClinicalSignificance>
</ClinVarSet>

ReviewStatus

<ClinVarSet>
<ReferenceClinVarAssertion>
<ClinicalSignificance DateLastEvaluated="1996-04-01">
<ReviewStatus>no assertion criteria provided</ReviewStatus>
<Description>Pathogenic</Description>
</ClinicalSignificance>
</ClinVarSet>

Phenotypes

<ReferenceClinVarAssertion>
<TraitSet Type="Disease" ID="62">
<Trait Type="Disease">
<Name>
<ElementValue Type="Preferred">Joubert syndrome 9</ElementValue>
</Name>
</Trait>
</TraitSet>
</ReferenceClinVarAssertion>

We only use the field with Type="Preferred". Multiple phenotypes may be reported

Location and Variant Id

<ReferenceClinVarAssertion>
<GenotypeSet Type="CompoundHeterozygote" ID="424709">
<MeasureSet Type="Variant" ID="81">
<Measure Type="single nucleotide variant" ID="15120">
<SequenceLocation Assembly="GRCh38" AssemblyAccessionVersion="GCF_000001405.38"
AssemblyStatus="current" Chr="10" Accession="NC_000010.11" start="89222510"
stop="89222510" display_start="89222510" display_stop="89222510" variantLength="1"
positionVCF="89222510" referenceAlleleVCF="C" alternateAlleleVCF="T"/>
<SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25"
AssemblyStatus="previous" Chr="10" Accession="NC_000010.10" start="90982267"
stop="90982267" display_start="90982267" display_stop="90982267" variantLength="1"
positionVCF="90982267" referenceAlleleVCF="C" alternateAlleleVCF="T"/>
</Measure>
</MeasureSet>
</GenotypeSet>
</ReferenceClinVarAssertion>
  • The variant position is extracted from the fields for their respective assemblies.
  • Updated records contain positionVCF, referenceAlleleVCF and alternateAlleleVCF fields and when present, we use them to create the variant.
  • For older records, since "start' and "stop" fields are not always available, we use the "display_start" and "display_end" fields.
  • If a required allele is not available, we extract it from the reference sequence.
  • Only variants having a dbSNP id are extracted.
  • Note that a ClinVar accession may have multiple variants associated with it (possible in different locations)
  • VariantId is extracted from the MeasureSet attributes.

MedGen, OMIM, Orphanet IDs

<ReferenceClinVarAssertion>
<TraitSet Type="Disease" ID="175">
<Trait ID="3036" Type="Disease">
<XRef ID="C0086651" DB="MedGen"/>
<XRef ID="309297" DB="Orphanet"/>
<XRef ID="582" DB="Orphanet"/>
<XRef Type="MIM" ID="253000" DB="OMIM"/>
</Trait>
</TraitSet>
</ReferenceClinVarAssertion>

AlleleOrigins

<ClinVarAssertion>
<Origin>germline</Origin>
</ClinVarAssertion>

We only extract all Allele Origins from Submissions (SCV) entries.

PubMedIds

<ClinVarAssertion>
<ClinicalSignificance DateLastEvaluated="1996-04-01">
<Citation Type="general">
<ID Source="PubMed">12114475</ID>
</Citation>
</ClinicalSignificance>
<AttributeSet>
<Attribute Type="AssertionMethod">LMM Criteria</Attribute>
<Citation>
<ID Source="PubMed">24033266</ID>
</Citation>
</AttributeSet>
<ObservedIn>
<ObservedData ID="9727445">
<Citation Type="general">
<ID Source="PubMed">9113933</ID>
</Citation>
</ObservedData>
</ObservedIn>
<Citation Type="general">
<ID Source="PubMed">23757202</ID>
</Citation>
</ClinVarAssertion>

We only extract all Pubmed Ids from Submissions (SCV) entries.

Parsing Significance

Extracting significance(s) may involve parsing multiple fields. Take the following snippets into consideration.

<ClinicalSignificance DateLastEvaluated="1996-04-01">
<ReviewStatus>no assertion criteria provided</ReviewStatus>
<Description>Pathogenic</Description>
</ClinicalSignificance>

<ClinicalSignificance DateLastEvaluated="2016-10-13">
<ReviewStatus>criteria provided, multiple submitters, no conflicts</ReviewStatus>
<Description>Pathogenic/Likely pathogenic</Description>
</ClinicalSignificance>

<ClinicalSignificance DateLastEvaluated="2012-06-07">
<ReviewStatus>no assertion criteria provided</ReviewStatus>
<Description>Conflicting interpretations of pathogenicity</Description>
<Explanation DataSource="ClinVar" Type="public">Pathogenic(1);Uncertain significance(1)</Explanation>
</ClinicalSignificance>

Given the evidence, we converted the significance field into an array of strings which may be parsed out of the Descriptions or Explanation fields.

Varying Delimiters

The delimiters in each field may vary. Currently, the delimiters for Description are , and /. The delimiters for Explanation are ; and /.

VCV File

Example

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<ClinVarVariationRelease xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_variation/variation_archive_1.4.xsd" ReleaseDate="2019-12-31">
<VariationArchive VariationID="431749" VariationName="GRCh37/hg19 1p36.31(chr1:6051187-6158763)" VariationType="copy number gain" DateCreated="2017-08-12" DateLastUpdated="2019-09-10" Accession="VCV000431749" Version="1" RecordType="included" NumberOfSubmissions="0" NumberOfSubmitters="0">
<RecordStatus>current</RecordStatus>
<Species>Homo sapiens</Species>
<IncludedRecord>
<SimpleAllele AlleleID="425239" VariationID="431749">
<GeneList>
<Gene Symbol="KCNAB2" FullName="potassium voltage-gated channel subfamily A regulatory beta subunit 2" GeneID="8514" HGNC_ID="HGNC:6229" Source="calculated" RelationshipType="genes overlapped by variant">
<Location>
<CytogeneticLocation>1p36.31</CytogeneticLocation>
<SequenceLocation Assembly="GRCh38" AssemblyAccessionVersion="GCF_000001405.38" AssemblyStatus="current" Chr="1" Accession="NC_000001.11" start="5992639" stop="6101186" display_start="5992639" display_stop="6101186" Strand="+"/>
<SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25" AssemblyStatus="previous" Chr="1" Accession="NC_000001.10" start="6052357" stop="6161252" display_start="6052357" display_stop="6161252" Strand="+"/>
</Location>
<OMIM>601142</OMIM>
</Gene>
<Gene Symbol="NPHP4" FullName="nephrocystin 4" GeneID="261734" HGNC_ID="HGNC:19104" Source="calculated" RelationshipType="genes overlapped by variant">
<Location>
<CytogeneticLocation>1p36.31</CytogeneticLocation>
<SequenceLocation Assembly="GRCh38" AssemblyAccessionVersion="GCF_000001405.38" AssemblyStatus="current" Chr="1" Accession="NC_000001.11" start="5862810" stop="5992425" display_start="5862810" display_stop="5992425" Strand="-"/>
<SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25" AssemblyStatus="previous" Chr="1" Accession="NC_000001.10" start="5922869" stop="6052532" display_start="5922869" display_stop="6052532" Strand="-"/>
</Location>
<OMIM>607215</OMIM>
</Gene>
</GeneList>
<Name>GRCh37/hg19 1p36.31(chr1:6051187-6158763)</Name>
<VariantType>copy number gain</VariantType>
<Location>
<CytogeneticLocation>1p36.31</CytogeneticLocation>
<SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25" forDisplay="true" AssemblyStatus="previous" Chr="1" Accession="NC_000001.10" start="6051187" stop="6158763" display_start="6051187" display_stop="6158763"/> </Location>
<Interpretations>
<Interpretation NumberOfSubmissions="0" NumberOfSubmitters="0" Type="Clinical significance">
<Description>no interpretation for the single variant</Description>
</Interpretation>
</Interpretations>
<XRefList>
<XRef Type="Interpreted" ID="431733" DB="ClinVar"/>
</XRefList>
</SimpleAllele>
<ReviewStatus>no interpretation for the single variant</ReviewStatus>
<Interpretations>
<Interpretation NumberOfSubmissions="0" NumberOfSubmitters="0" Type="Clinical significance">
<Description>no interpretation for the single variant</Description>
</Interpretation>
</Interpretations>
<SubmittedInterpretationList>
<SCV Title="SUB1895145" Accession="SCV000296057" Version="1"/>
</SubmittedInterpretationList>
<InterpretedVariationList>
<InterpretedVariation VariationID="431733" Accession="VCV000431733" Version="1"/>
</InterpretedVariationList>
</IncludedRecord>
</VariationArchive>
</ClinVarVariationRelease>

Parsing

In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output.

id

<VariationArchive VariationID="431749" VariationName="GRCh37/hg19 1p36.31(chr1:6051187-6158763)" VariationType="copy number gain" DateCreated="2017-08-12" DateLastUpdated="2019-09-10" Accession="VCV000431749" Version="1" RecordType="included" NumberOfSubmissions="0" NumberOfSubmitters="0">

The Acc and Version fields are merged to form the ID (RCV000000001.2)

significance

<ClinVarVariationRelease>
<VariationArchive>
<IncludedRecord>
<SimpleAllele>
<Interpretations>
<Interpretation NumberOfSubmissions="0" NumberOfSubmitters="0" Type="Clinical significance">
<Description>no interpretation for the single variant</Description>
</Interpretation>
</Interpretations>
</SimpleAllele>
</IncludedRecord>
</VariationArchive>
</ClinVarVariationRelease>

May have multiple significances listed.

reviewStatus

<ClinVarVariationRelease>
<VariationArchive>
<IncludedRecord>
<ReviewStatus>no interpretation for the single variant</ReviewStatus>
</IncludedRecord>
</VariationArchive>
</ClinVarVariationRelease>

Known Issues

Known Issues
  • The XML file contains ~1k more entries (out of 162K) than the VCF file
  • The XML file does not have a field indicating that a record is associated with the reference base - something that was present in VCF
  • The XML file contains entries (e.g. RCV000016645 version=1) which have IUPAC ambiguous bases ("R", "Y", "H", -etc.) as their alternate allele

Download URLs

ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz

https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_00-latest.xml.gz

JSON Output

"clinvar":[
{
"id":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"significance":[
"benign"
],
"refAllele":"G",
"altAllele":"A",
"lastUpdatedDate":"2020-03-01",
"isAlleleSpecific":true
},
{
"id":"RCV000030258.4",
"variationId":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"alleleOrigins":[
"germline"
],
"refAllele":"G",
"altAllele":"A",
"phenotypes":[
"Lynch syndrome"
],
"medGenIds":[
"C1333990"
],
"omimIds":[
"120435"
],
"significance":[
"benign"
],
"lastUpdatedDate":"2017-05-01",
"isAlleleSpecific":true
}
]
FieldTypeNotes
idstringClinVar ID
variationIdstringClinVar VCV ID
reviewStatusstringsee possible values below
alleleOriginsstring arraysee possible values below
refAllelestring
altAllelestring
phenotypesstring array
medGenIdsstring arrayMedGen IDs
omimIdsstring arrayOMIM IDs
orphanetIdsstring arrayOrphanet IDs
significancestring arraysee possible values below
lastUpdatedDatestringyyyy-MM-dd
pubMedIdsstring arrayPubMed IDs
isAlleleSpecificbooltrue when the current variant alternate allele matches the ClinVar alternate allele

reviewStatus:

  • no assertion provided
  • no assertion criteria provided
  • criteria provided, single submitter
  • practice guideline
  • classified by multiple submitters
  • criteria provided, conflicting interpretations
  • criteria provided, multiple submitters, no conflicts
  • no interpretation for the single variant

alleleOrigins:

  • unknown
  • other
  • germline
  • somatic
  • inherited
  • paternal
  • maternal
  • de-novo
  • biparental
  • uniparental
  • not-tested
  • tested-inconclusive

significance:

  • uncertain significance
  • not provided
  • benign
  • likely benign
  • likely pathogenic
  • pathogenic
  • drug response
  • histocompatibility
  • association
  • risk factor
  • protective
  • affects
  • conflicting data from submitters
  • other
  • no interpretation for the single variant
  • conflicting interpretations of pathogenicity

Building the supplementary files

The ClinVar .nsa for Nirvana can be built using the SAUtils command's clinvar subcommand.

Source data files

Two input .xml files and a .version file are required in order to build the .nsa file. You should have the following files:

ClinVarFullRelease_2021-06.xml.gz       ClinVarVariationRelease_2021-06.xml.gz
ClinVarFullRelease_2021-06.xml.gz.version

The version file is a text file with the follwoing format.

NAME=ClinVar
VERSION=20210603
DATE=2021-06-03
DESCRIPTION=A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence

The help menu for the utility is as follows:

dotnet SAUtils.dll clinvar
---------------------------------------------------------------------------
SAUtils (c) 2021 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0
---------------------------------------------------------------------------

USAGE: dotnet SAUtils.dll clinvar [options]
Creates a supplementary database with ClinVar annotations

OPTIONS:
--ref, -r <VALUE> compressed reference sequence file
--rcv, -i <VALUE> ClinVar Full release XML file
--vcv, -c <VALUE> ClinVar Variation release XML file
--out, -o <VALUE> output directory
--help, -h displays the help menu
--version, -v displays the version

dotnet SAUtils.dll clinvar

Here is a sample execution:

dotnet ~/development/Nirvana/bin/Debug/netcoreapp3.1/SAUtils.dll clinvar \\
--ref ~/development/References/7/Homo_sapiens.GRCh38.Nirvana.dat --rcv ClinVarFullRelease_2021-06.xml.gz \\
--vcv ClinVarVariationRelease_2021-06.xml.gz --out ~/development/SupplementaryDatabase/63/GRCh38
---------------------------------------------------------------------------
SAUtils (c) 2020 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.13.0
---------------------------------------------------------------------------

Found 983417 VCV records
Chromosome 1 completed in 00:09:46.2
Chromosome 2 completed in 00:00:16.4
Chromosome 3 completed in 00:00:06.9
Unknown vcv id:982521 found in RCV001262095.1
Chromosome 4 completed in 00:00:03.9
Chromosome 5 completed in 00:00:07.1
Chromosome 6 completed in 00:00:05.7
Chromosome 7 completed in 00:00:06.6
Unknown vcv id:430873 found in RCV000493222.1
Chromosome 8 completed in 00:00:04.6
Chromosome 9 completed in 00:00:06.2
Chromosome 10 completed in 00:00:05.6
Chromosome 11 completed in 00:00:10.2
Chromosome 12 completed in 00:00:06.9
Chromosome 13 completed in 00:00:05.9
Chromosome 14 completed in 00:00:04.9
Chromosome 15 completed in 00:00:05.4
Chromosome 16 completed in 00:00:08.9
Chromosome 17 completed in 00:00:13.1
Chromosome 18 completed in 00:00:02.4
Chromosome 19 completed in 00:00:07.6
Chromosome 20 completed in 00:00:02.4
Chromosome 21 completed in 00:00:01.6
Chromosome 22 completed in 00:00:02.6
Chromosome MT completed in 00:00:00.3
Chromosome X completed in 00:00:05.5
2 unknown VCVs found in RCVs.
982521,430873
Chromosome Y completed in 00:00:00.0

Time: 00:12:08.2

- - - - \ No newline at end of file diff --git a/3.16/data-sources/cosmic-json/index.html b/3.16/data-sources/cosmic-json/index.html deleted file mode 100644 index 46244cd4..00000000 --- a/3.16/data-sources/cosmic-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -cosmic-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

cosmic-json

   "cosmicGeneFusions":[
{
"id":"COSF881",
"numSamples":6,
"geneSymbols":[
"MYB",
"NFIB"
],
"hgvsr":"ENST00000341911.5(MYB):r.1_2368::ENST00000397581.2(NFIB):r.2592_3318",
"histologies":[
{
"name":"adenoid cystic carcinoma",
"numSamples":6
}
],
"sites":[
{
"name":"salivary gland (submandibular)",
"numSamples":1
},
{
"name":"salivary gland (parotid)",
"numSamples":1
},
{
"name":"salivary gland (nasal cavity)",
"numSamples":1
},
{
"name":"breast",
"numSamples":3
}
],
"pubMedIds":[
19841262
]
}
]
FieldTypeNotes
idstringCOSMIC fusion ID
numSamplesint
geneSymbolsstring array5' gene & 3' gene
hgvsrstringHGVS RNA translocation fusion notation
histologiescount arrayphenotypic descriptions
sitescount arraytissue types
pubMedIdsint arrayPubMed IDs

Count

FieldTypeNotes
namestringdescription
numSamplesint
- - - - \ No newline at end of file diff --git a/3.16/data-sources/cosmic/index.html b/3.16/data-sources/cosmic/index.html deleted file mode 100644 index 3ce4d991..00000000 --- a/3.16/data-sources/cosmic/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -COSMIC | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

COSMIC

Overview

COSMIC, the Catalogue of Somatic Mutations in Cancer, is the world's largest source of expert manually curated somatic mutation information relating to human cancers.

Publication

John G Tate, Sally Bamford, Harry C Jubb, Zbyslaw Sondka, David M Beare, Nidhi Bindal, Harry Boutselakis, Charlotte G Cole, Celestino Creatore, Elisabeth Dawson, Peter Fish, Bhavana Harsha, Charlie Hathaway, Steve C Jupe, Chai Yin Kok, Kate Noble, Laura Ponting, Christopher C Ramshaw, Claire E Rye, Helen E Speedy, Ray Stefancsik, Sam L Thompson, Shicai Wang, Sari Ward, Peter J Campbell, Simon A Forbes. (2019) COSMIC: the Catalogue Of Somatic Mutations In Cancer, Nucleic Acids Research, Volume 47, Issue D1

Licensed Content

Commercial companies are required to acquire a license from COSMIC. At the moment, this means that our COSMIC content is only available in Illumina's products and services, not in the open source distribution.

Since many of you are academic users, we will enable a COSMIC login in our downloader later this year that will allow academic and commercial organizations (with a license) access our COSMIC data sources.

Gene Fusions

Gene fusions are manually curated from peer reviewed publications by expert COSMIC curators. A comprehensive literature curation is completed for each fusion pair when it is released in the database. Currently COSMIC includes information on fusions involved in solid tumours and leukaemias.

TSV File

Example

SAMPLE_ID       SAMPLE_NAME     PRIMARY_SITE    SITE_SUBTYPE_1  SITE_SUBTYPE_2  SITE_SUBTYPE_3  PRIMARY_HISTOLOGY      HISTOLOGY_SUBTYPE_1      HISTOLOGY_SUBTYPE_2     HISTOLOGY_SUBTYPE_3     FUSION_ID       TRANSLOCATION_NAME      5'_CHROMOSOME   5'_STRAND       5'_GENE_ID      5'_GENE_NAME    5'_LAST_OBSERVED_EXON   5'_GENOME_START_FROM    5'_GENOME_START_TO      5'_GENOME_STOP_FROM     5'_GENOME_STOP_TO       3'_CHROMOSOME   3'_STRAND       3'_GENE_ID      3'_GENE_NAME   3'_FIRST_OBSERVED_EXON   3'_GENOME_START_FROM    3'_GENOME_START_TO      3'_GENOME_STOP_FROM     3'_GENOME_STOP_TO      FUSION_TYPE      PUBMED_PMID
749711 HCC1187 breast NS NS NS carcinoma ductal_carcinoma NS NS 665 ENST00000360863.10(RGS22):r.1_3555_ENST00000369518.1(SYCP1):r.2100_3452 8 - 197199 RGS22 22 99981937 99981937 100106116 100106116 1 + 212470 SYCP1_ENST00000369518 24 114944339 114944339 114995367 114995367 Inferred Breakpoint 20033038

Parsing

From the TSV file, we're mainly interested in the following columns:

  • SAMPLE_ID
  • PRIMARY_SITE
  • PRIMARY_HISTOLOGY
  • HISTOLOGY_SUBTYPE_1
  • FUSION_ID
  • TRANSLOCATION_NAME
  • PUBMED_PMID
info

For all the histologies and sites, we replace all the underlines with spaces. salivary_gland would become salivary gland.

Aggregation

To create the gene fusion entries in Nirvana, we perform the following on each row in the TSV file:

  • Group all entries by FUSION_ID
  • Using all the entries related to this FUSION_ID:
    • Collect all the PubMed IDs
    • Tally the number of observed sample IDs
    • Grab the HGVS r. notation (should not change throughout the FUSION_ID)
    • Tally the number of samples observed for each histology
    • Tally the number of samples observed for each site
  • Extract the transcript IDs from the HGVS notation and lookup the associated gene symbols

Fixing the HGVS RNA Notation

ENST00000360863.6(RGS22):r.1_3555_ENST00000369518.1(SYCP1):r.2100_3452

There are some issues with the HGVS RNA notation:

  • The two transcripts should be linked by a double colon ::.
  • For coding transcripts, HGVS numbering should use CDS coordinates. Right now COSMIC is using cDNA coordinates for all their fusion
  • If only the breakpoint is truly known, the recommendation is to use ? marks

We chose to only update the linkage between each transcript using double colons ::. While we could have recalculated the HGVS notation using the supplied breakpoints, we chose not to because the resulting notation would be quite different from the original material. This would potentially lead to some confusion.

Aggregating Histologies

For histologies we want to capture the most specific description available. In the example above, we saw that the primary histology was carcinoma, but the subtype was ductal carcinoma. In this case we would use the subtype for the annotation.

COSMIC uses NS to show that a value is empty. If the subtype is NS, we will use the primary histology instead.

Aggregating Sites

For sites, we observe that the subtype provides additional description but is still dependent on the primary site value. For example, the primary site might be skin, but the subtype is foot. Therefore, we will combine the values in the following manner: skin (foot).

Known Issues

Known Issues

There are some issues with the HGVS RNA notation:

  • The two transcripts should be linked by a double colon ::. We fixed this aspect in Nirvana.
  • For coding transcripts, HGVS numbering should use CDS coordinates. Right now COSMIC is using cDNA coordinates for all their fusions.

Download URL

JSON Output

   "cosmicGeneFusions":[
{
"id":"COSF881",
"numSamples":6,
"geneSymbols":[
"MYB",
"NFIB"
],
"hgvsr":"ENST00000341911.5(MYB):r.1_2368::ENST00000397581.2(NFIB):r.2592_3318",
"histologies":[
{
"name":"adenoid cystic carcinoma",
"numSamples":6
}
],
"sites":[
{
"name":"salivary gland (submandibular)",
"numSamples":1
},
{
"name":"salivary gland (parotid)",
"numSamples":1
},
{
"name":"salivary gland (nasal cavity)",
"numSamples":1
},
{
"name":"breast",
"numSamples":3
}
],
"pubMedIds":[
19841262
]
}
]
FieldTypeNotes
idstringCOSMIC fusion ID
numSamplesint
geneSymbolsstring array5' gene & 3' gene
hgvsrstringHGVS RNA translocation fusion notation
histologiescount arrayphenotypic descriptions
sitescount arraytissue types
pubMedIdsint arrayPubMed IDs

Count

FieldTypeNotes
namestringdescription
numSamplesint
- - - - \ No newline at end of file diff --git a/3.16/data-sources/dbsnp-json/index.html b/3.16/data-sources/dbsnp-json/index.html deleted file mode 100644 index e6aba178..00000000 --- a/3.16/data-sources/dbsnp-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -dbsnp-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

dbsnp-json

"dbsnp":[
"rs1042821"
]
FieldTypeNotes
dbsnpstring arraydbSNP rsIDs
- - - - \ No newline at end of file diff --git a/3.16/data-sources/dbsnp/index.html b/3.16/data-sources/dbsnp/index.html deleted file mode 100644 index 0c3af902..00000000 --- a/3.16/data-sources/dbsnp/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -dbSNP | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

dbSNP

Overview

dbSNP contains human single nucleotide variations, microsatellites, and small-scale insertions and deletions along with publication, population frequency, molecular consequence, and genomic and RefSeq mapping information for both common variations and clinical mutations.

Publication

Sherry, S.T., Ward, M. and Sirotkin, K. (1999) dbSNP—Database for Single Nucleotide Polymorphisms and Other Classes of Minor Genetic Variation. Genome Res., 9, 677–679.

VCF File

Example

#CHROM  POS ID  REF ALT QUAL    FILTER  INFO
1 10177 rs367896724 A AC . . RS=367896724;RSPOS=10177;dbSNPBuildID=138; \
SSR=0;SAO=0;VP=0x050000020005130026000200;GENEINFO=DDX11L1:100287102;WGT=1; \
VC=DIV;R5;ASP;G5A;G5;KGPhase3;CAF=0.5747,0.4253;COMMON=1; \
TOPMED=0.76728147298674821,0.23271852701325178

Parsing

From the VCF file, we're mainly interested in the following:

  • rsID from the ID field
  • CAF from the INFO field

Global allele extraction

The global major and minor alleles are extracted based on the frequency of the alleles provided in the CAF field. The global minor allele frequency is the second highest value of the CAF comma delimited field (ignoring '.' values).

Tie Breaking: Global Major Allele

If there are two candidates for global major and the reference allele is one of them, we prefer the reference allele.

Tie Breaking: Global Minor Allele

If there are two candidates for global minor and the reference allele is one of them, we prefer the other allele. If the reference allele is not involved, they are chosen arbitrarily.

Equal Allele Frequency Example (2 alleles)

chr1    100 A   C   CAF=0.5,0.5

We will select A to be the global major allele and C to be the global minor allele.

Equal Allele Frequency Example (3 alleles)

chr1    100 A   C,T CAF=0.33,0.33,0.33

We will select A to be the global major allele and either C or T is chosen (arbitrarily) to be the global minor allele.

Equal Allele Frequency in Alternate Alleles

chr1    100 A   C,T CAF=0.2,0.4,0.4

We will select C or T to be arbitrarily assigned to be the global major or global minor allele.

Equal Allele Frequency Between Reference & Alternate Allele

chr1    100 A   C,T CAF=0.2,0.2,0.6

We will select T to be the global major allele and C to be the global minor allele.

Known Issues

Known Issues

If there are multiple entries with different CAF values for the same allele, we use the first CAF value.

Download URL

https://ftp.ncbi.nih.gov/snp/organisms/

JSON Output

"dbsnp":[
"rs1042821"
]
FieldTypeNotes
dbsnpstring arraydbSNP rsIDs
- - - - \ No newline at end of file diff --git a/3.16/data-sources/fusioncatcher-json/index.html b/3.16/data-sources/fusioncatcher-json/index.html deleted file mode 100644 index 50c761a7..00000000 --- a/3.16/data-sources/fusioncatcher-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -fusioncatcher-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

fusioncatcher-json

   "fusionCatcher":[
{
"genes":{
"first":{
"hgnc":"ETV6",
"isOncogene":true
},
"second":{
"hgnc":"RUNX1"
},
"isParalogPair":true,
"isPseudogenePair":true,
"isReadthrough":true
},
"germlineSources":[
"1000 Genomes Project"
],
"somaticSources":[
"COSMIC",
"TCGA oesophageal carcinomas"
]
}
]
FieldTypeNotes
genesgenes object5' gene & 3' gene
germlineSourcesstring arraymatches in known germline data sources
somaticSourcesstring arraymatches in known somatic data sources

genes

FieldTypeNotes
firstgene object5' gene
secondgene object3' gene
isParalogPairbooltrue when both genes are paralogs for each other
isPseudogenePairbooltrue when both genes are pseudogenes for each other
isReadthroughbooltrue when this fusion gene is a readthrough event (both are on the same strand and there are no genes between them)

gene

FieldTypeNotes
hgncstringgene symbol. e.g. MSH6
isOncogenebooltrue when this gene is an oncogene
- - - - \ No newline at end of file diff --git a/3.16/data-sources/fusioncatcher/index.html b/3.16/data-sources/fusioncatcher/index.html deleted file mode 100644 index 0f9b96e6..00000000 --- a/3.16/data-sources/fusioncatcher/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -FusionCatcher | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

FusionCatcher

Overview

FusionCatcher is a well-known tool that searches for somatic novel/known fusion genes, translocations, and/or chimeras in RNA-seq data. While FusionCatcher itself is not part of Nirvana, we have included a subset of their genomic databases in Nirvana.

Publication

Daniel Nicorici, Mihaela Şatalan, Henrik Edgren, Sara Kangaspeska, Astrid Murumägi, Olli Kallioniemi, Sami Virtanen, Olavi Kilkku. (2014) FusionCatcher – a tool for finding somatic fusion genes in paired-end RNA-sequencing data. bioRxiv 011650

Supported Data Sources

Oncogenes

The following data sources are aggregated and used to populate the isOncogene field in the gene JSON object:

DescriptionReferenceDataFusionCatcher filename
Bushmanbushmanlab.orgcancer_genes.txt
ONGENEJGGbioinfo-minzhao.orgoncogenes_more.txt
UniProt tumor genesNARuniprot.orgtumor_genes.txt

Germline

Nirvana labelReferenceDataFusionCatcher filename
1000 Genomes ProjectPLOS ONE1000genomes.txt
Healthy (strong support)banned.txt
Illumina Body Map 2.0EBIbodymap2.txt
CACGGenomicscacg.txt
ConjoinGPLOS ONEconjoing.txt
Healthy prefrontal cortexBMC Medical GenomicsNCBI GEOcortex.txt
Duplicated Genes DatabasePLOS ONEgenouest.orgdgd.txt
GTEx healthy tissuesgtexportal.orggtex.txt
Healthyhealthy.txt
Human Protein AtlasMCPEBIhpa.txt
Babiceanu non-cancer tissuesNARNARnon-cancer_tissues.txt
non-tumor cell linesnon-tumor_cells.txt
TumorFusions normalNARNARtcga-normal.txt

Somatic

Nirvana labelReferenceDataFusionCatcher filename
Alaei-Mahabadi 18 cancersPNAS18cancers.txt
DepMap CCLEdepmap.orgccle.txt
CCLE KlijnNature BiotechnologyNature Biotechnologyccle2.txt
CCLE VellichirammalMolecular Therapy Nucleic Acidsccle3.txt
Cancer Genome ProjectCOSMICcgp.txt
ChimerKB 4.0NARkobic.re.krchimerdb4kb.txt
ChimerPub 4.0NARkobic.re.krchimerdb4pub.txt
ChimerSeq 4.0NARkobic.re.krchimerdb4seq.txt
COSMICNARCOSMICcosmic.txt
Bao gliomasGenome Researchgliomas.txt
Knownknown.txt
Mitelman DBISB-CGCGoogle Cloudmitelman.txt
TCGA oesophageal carcinomasNatureoesophagus.txt
Bailey pancreatic cancersNatureNaturepancreases.txt
PCAWGCellICGCpcawg.txt
Robinson prostate cancersCellCellprostate_cancer.txt
TCGAcancer.govtcga.txt
TumorFusions tumorNARNARtcga-cancer.txt
TCGA GaoCellCelltcga2.txt
TCGA VellichirammalMolecular Therapy Nucleic Acidstcga3.txt
TICdbBMC Genomicsunav.eduticdb.txt

Gene Pair TSV File

Most of the data files in FusionCatcher are two-column TSV files containing the Ensembl gene IDs that are paired together.

Example

Here are the first few lines of the 1000genomes.txt file:

ENSG00000006210 ENSG00000102962
ENSG00000006652 ENSG00000181016
ENSG00000014138 ENSG00000149798
ENSG00000026297 ENSG00000071242
ENSG00000035499 ENSG00000155959
ENSG00000055211 ENSG00000131013
ENSG00000055332 ENSG00000179915
ENSG00000062485 ENSG00000257727
ENSG00000065978 ENSG00000166501
ENSG00000066044 ENSG00000104980

Parsing

In Nirvana, we will only import a gene pair if both Ensembl gene IDs are recognized from either our GRCh37 or GRCh38 cache files.

Gene TSV File

Some of the data files are single-column files containing Ensembl gene IDs. This is commonly used in the data files representing oncogene data sources.

Example

Here are the first few lines of the oncogenes_more.txt file:

ENSG00000000938
ENSG00000003402
ENSG00000005469
ENSG00000005884
ENSG00000006128
ENSG00000006453
ENSG00000006468
ENSG00000007350
ENSG00000008294
ENSG00000008952

Parsing

Known Issues

Known Issues

FusionCatcher also uses creates custom Ensembl genes (e.g. ENSG09000000002) to handle missing Ensembl genes. Nirvana will ignore these entries since we only include the gene IDs that are currently recognized by Nirvana.

I suspect that these were originally RefSeq genes and if so, we can support those directly in Nirvana in the future.

Download URL

https://sourceforge.net/projects/fusioncatcher/files/data

JSON Output

   "fusionCatcher":[
{
"genes":{
"first":{
"hgnc":"ETV6",
"isOncogene":true
},
"second":{
"hgnc":"RUNX1"
},
"isParalogPair":true,
"isPseudogenePair":true,
"isReadthrough":true
},
"germlineSources":[
"1000 Genomes Project"
],
"somaticSources":[
"COSMIC",
"TCGA oesophageal carcinomas"
]
}
]
FieldTypeNotes
genesgenes object5' gene & 3' gene
germlineSourcesstring arraymatches in known germline data sources
somaticSourcesstring arraymatches in known somatic data sources

genes

FieldTypeNotes
firstgene object5' gene
secondgene object3' gene
isParalogPairbooltrue when both genes are paralogs for each other
isPseudogenePairbooltrue when both genes are pseudogenes for each other
isReadthroughbooltrue when this fusion gene is a readthrough event (both are on the same strand and there are no genes between them)

gene

FieldTypeNotes
hgncstringgene symbol. e.g. MSH6
isOncogenebooltrue when this gene is an oncogene
- - - - \ No newline at end of file diff --git a/3.16/data-sources/gnomad-lof-json/index.html b/3.16/data-sources/gnomad-lof-json/index.html deleted file mode 100644 index 7567cbc4..00000000 --- a/3.16/data-sources/gnomad-lof-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -gnomad-lof-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

gnomad-lof-json

"gnomAD":{ 
"pLi":1.00e0,
"pNull":8.94e-40,
"pRec":1.84e-16,
"synZ":-8.44e-2,
"misZ":5.96e-1,
"loeuf":1.13e0
}
FieldTypeNotes
pLifloatprobability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)
pNullfloatprobability of being completely tolerant of loss of function variation (observed = expected)
pRecfloatprobability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)
synZfloatcorrected synonymous Z score
misZfloatcorrected missense Z score
loeuffloatloss of function observed/expected upper bound fraction (LOEUF)
- - - - \ No newline at end of file diff --git a/3.16/data-sources/gnomad-small-variants-json/index.html b/3.16/data-sources/gnomad-small-variants-json/index.html deleted file mode 100644 index d5b2fdf4..00000000 --- a/3.16/data-sources/gnomad-small-variants-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -gnomad-small-variants-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

gnomad-small-variants-json

"gnomad":{ 
"coverage":20,
"allAf":0.190317,
"maleAf":0.193,
"femaleAf": 0.1935,
"afrAf":0.222876,
"amrAf":0.121394,
"easAf":0.239802,
"finAf":0.136833,
"nfeAf":0.181282,
"asjAf":0.258278,
"othAf":0.186094,
"allAn":30796,
"maleAn":15096,
"femaleAn":15700
"afrAn":8664,
"amrAn":832,
"easAn":1618,
"finAn":3486,
"nfeAn":14916,
"asjAn":302,
"othAn":978,
"allAc":5861,
"maleAc":2930,
"femaleAc": 2931,
"afrAc":1931,
"amrAc":101,
"easAc":388,
"finAc":477,
"nfeAc":2704,
"asjAc":78,
"othAc":182,
"allHc":561,
"afrHc":208,
"amrHc":6,
"easHc":42,
"finHc":31,
"nfeHc":242,
"asjHc":13,
"othHc":19,
"maleHc":280,
"femaleHc":281,
"controlsAllAf":0.190317,
"controlsAllAn":30796,
"controlsAllAc":5861,
"lowComplexityRegion":true,
"failedFilter":true
}
FieldTypeNotes
coverageintaverage coverage (non-negative integer values)
allAffloatallele frequency for all populations. Range: 0 - 1.0
maleAffloatallele frequency for male population. Range: 0 - 1.0
femaleAffloatallele frequency for female population. Range: 0 - 1.0
controlsAllAffloatallele frequency for the controls subset. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
maleAcintallele count for male population. Integer.
femaleAcintallele count for female population. Integer.
controlsAllAcintallele count for the controls subset. Integer.
allAnintallele number for all populations. Non-zero integer.
maleAnintallele number for male population. Non-zero integer.
femaleAnintallele number for female population. Non-zero integer.
controlsAllAnintallele number for the controls subset. Non-zero integer.
allHcintcount of homozygous individuals for all populations. Non-negative integer.
maleHcintcount of homozygous individuals for male population. Non-negative integer.
femaleHcintcount of homozygous individuals for female population. Non-negative integer.
afrAffloatallele frequency for the African / African American population. Range: 0 - 1.0
afrAcintallele count for the African / African American population. Integer.
afrAnintallele number for the African / African American population. Non-zero integer.
afrHcintcount of homozygous individuals for African / African American population. Non-negative integer.
amrAffloatallele frequency for the Latino population. Range: 0 - 1.0
amrAcintallele count for the Latino population. Integer.
amrAnintallele number for the Latino population. Non-zero integer.
amrHcintcount of homozygous individuals for Latino population. Non-negative integer.
easAffloatallele frequency for the East Asian population. Range: 0 - 1.0
easAcintallele count for the East Asian population. Integer.
easAnintallele number for the East Asian population. Non-zero integer.
easHcintcount of homozygous individuals for East Asian population. Non-negative integer.
finAffloatallele frequency for the Finnish population. Range: 0 - 1.0
finAcintallele count for the Finnish population. Integer.
finAnintallele number for the Finnish population. Non-zero integer.
finHcintcount of homozygous individuals for Finnish population. Non-negative integer
nfeAffloatallele frequency for the Non-Finnish European population. Range: 0 - 1.0
nfeAcintallele count for the Non-Finnish European population. Integer.
nfeAnintallele number for the Non-Finnish European population. Non-zero integer.
nfeHcintcount of homozygous individuals for Non-Finnish European population. Non-negative integer
othAffloatallele frequency for the Other population. Range: 0 - 1.0
othAcintallele count for the Other population. Integer.
othAnintallele number for the Other population. Non-zero integer.
othHcintcount of homozygous individuals for Other population. Non-negative integer
asjAffloatallele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0
asjAcintallele count for the Ashkenazi Jewish population Integer.
asjAnintallele number for the Ashkenazi Jewish population. Non-zero integer.
asjHcintcount of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer
sasAffloatallele frequency for the South Asian population. Range: 0 - 1.0
sasAcintallele count for the South Asian population Integer.
sasAnintallele number for the South Asian population. Non-zero integer.
sasHcintcount of homozygous individuals for the South Asian population. Non-negative integer.
failedFilterboolTrue if this variant failed any filters (Note: we do not list the failed filters)
lowComplexityRegionboolTrue if this variant is located in a low complexity region.
- - - - \ No newline at end of file diff --git a/3.16/data-sources/gnomad/index.html b/3.16/data-sources/gnomad/index.html deleted file mode 100644 index 5aae3233..00000000 --- a/3.16/data-sources/gnomad/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -gnomAD | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

gnomAD

Overview

The Genome Aggregation Database (gnomAD) is a resource developed by an international coalition of investigators, with the goal of aggregating and harmonizing both exome and genome sequencing data from a wide variety of large-scale sequencing projects, and making summary data available for the wider scientific community.

Publication

Koch, L., 2020. Exploring human genomic diversity with gnomAD. Nature Reviews Genetics, 21(8), pp.448-448.

Small Variants

VCF extraction

We currently extract the following info fields from gnomAD genome and exome VCF files:

##INFO=<ID=AC,Number=A,Type=Integer,Description="Alternate allele count for samples">
##INFO=<ID=AN,Number=A,Type=Integer,Description="Total number of alleles in samples">
##INFO=<ID=nhomalt,Number=A,Type=Integer,Description="Count of homozygous individuals in samples">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Depth of informative coverage for each sample; reads with MQ=255 or with bad mates are filtered">
##INFO=<ID=lcr,Number=0,Type=Flag,Description="Variant falls within a low complexity region">
##INFO=<ID=AC_afr,Number=A,Type=Integer,Description="Alternate allele count for samples of African-American ancestry">
##INFO=<ID=AN_afr,Number=A,Type=Integer,Description="Total number of alleles in samples of African-American ancestry">
##INFO=<ID=AF_afr,Number=A,Type=Float,Description="Alternate allele frequency in samples of African-American ancestry">
##INFO=<ID=nhomalt_afr,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of African-American ancestry">
##INFO=<ID=AC_amr,Number=A,Type=Integer,Description="Alternate allele count for samples of Latino ancestry">
##INFO=<ID=AN_amr,Number=A,Type=Integer,Description="Total number of alleles in samples of Latino ancestry">
##INFO=<ID=nhomalt_amr,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of Latino ancestry">
##INFO=<ID=AC_eas,Number=A,Type=Integer,Description="Alternate allele count for samples of East Asian ancestry">
##INFO=<ID=AN_eas,Number=A,Type=Integer,Description="Total number of alleles in samples of East Asian ancestry">
##INFO=<ID=nhomalt_eas,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of East Asian ancestry">
##INFO=<ID=AC_female,Number=A,Type=Integer,Description="Alternate allele count for female samples">
##INFO=<ID=AN_female,Number=A,Type=Integer,Description="Total number of alleles in female samples">
##INFO=<ID=nhomalt_female,Number=A,Type=Integer,Description="Count of homozygous individuals in female samples">
##INFO=<ID=AC_nfe,Number=A,Type=Integer,Description="Alternate allele count for samples of non-Finnish European ancestry">
##INFO=<ID=AN_nfe,Number=A,Type=Integer,Description="Total number of alleles in samples of non-Finnish European ancestry">
##INFO=<ID=nhomalt_nfe,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of non-Finnish European ancestry">
##INFO=<ID=AC_fin,Number=A,Type=Integer,Description="Alternate allele count for samples of Finnish ancestry">
##INFO=<ID=AN_fin,Number=A,Type=Integer,Description="Total number of alleles in samples of Finnish ancestry">
##INFO=<ID=nhomalt_fin,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of Finnish ancestry">
##INFO=<ID=AC_asj,Number=A,Type=Integer,Description="Alternate allele count for samples of Ashkenazi Jewish ancestry">
##INFO=<ID=AN_asj,Number=A,Type=Integer,Description="Total number of alleles in samples of Ashkenazi Jewish ancestry">
##INFO=<ID=nhomalt_asj,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of Ashkenazi Jewish ancestry">
##INFO=<ID=AC_oth,Number=A,Type=Integer,Description="Alternate allele count for samples of uncertain ancestry">
##INFO=<ID=AN_oth,Number=A,Type=Integer,Description="Total number of alleles in samples of uncertain ancestry">
##INFO=<ID=nhomalt_oth,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of uncertain ancestry">
##INFO=<ID=AC_male,Number=A,Type=Integer,Description="Alternate allele count for male samples">
##INFO=<ID=AN_male,Number=A,Type=Integer,Description="Total number of alleles in male samples">
##INFO=<ID=nhomalt_male,Number=A,Type=Integer,Description="Count of homozygous individuals in male samples">
##INFO=<ID=controls_AC,Number=A,Type=Integer,Description="Alternate allele count for samples in the controls subset">
##INFO=<ID=controls_AN,Number=A,Type=Integer,Description="Total number of alleles in samples in the controls subset">

We also extract the following extra fields from gnomAD exome VCF file:

##INFO=<ID=AC_sas,Number=A,Type=Integer,Description="Alternate allele count for samples of South Asian ancestry">
##INFO=<ID=AN_sas,Number=A,Type=Integer,Description="Total number of alleles in samples of South Asian ancestry">
##INFO=<ID=nhomalt_sas,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of South Asian ancestry">

Computation

Using these, we compute the following:

  • Coverage
  • Allele count, Homozygous count, allele number and allele frequencies for:
    • Global population
    • African/African Americans
    • Admixed Americans
    • Ashkenazi Jews
    • East Asians
    • Finnish
    • Non-Finnish Europeans
    • South Asian
    • Others (population not assigned)
    • Male
    • Female
    • Controls
Note
  • Coverage = DP / AN. Frequencies are computed using AC/AN for each population.
  • Please note that currently there is no genome sequencing data of south asian (SAS) population available in gnomAD.
  • Allele Count, Homozygous count, allele number and allele frequencies for control groups are also provided for the global population.

Merging genomes and exomes

When merging the genomes and exomes, the allele counts and allele numbers will be summed across both of the data sets.

info
  • For GRCh37, Nirvana currently uses gnomAD version 2.1 which contains both genomes and exomes data. Genomes and exomes data are merged in the output.
  • For GRCh38, Nirvana currently uses gnomAD version 3.0 which doesn't contain the exomes data. Therefore, only genomes data are presented in the output.

Filters

The following strategy will be used when there's a conflict in filter status:

Genomes PASSGenomes Filtered
Exomes PASSPASSOnly use exome data
Exomes FilteredOnly use genome dataFiltered

VCF download instructions

https://gnomad.broadinstitute.org/downloads

JSON output

"gnomad":{ 
"coverage":20,
"allAf":0.190317,
"maleAf":0.193,
"femaleAf": 0.1935,
"afrAf":0.222876,
"amrAf":0.121394,
"easAf":0.239802,
"finAf":0.136833,
"nfeAf":0.181282,
"asjAf":0.258278,
"othAf":0.186094,
"allAn":30796,
"maleAn":15096,
"femaleAn":15700
"afrAn":8664,
"amrAn":832,
"easAn":1618,
"finAn":3486,
"nfeAn":14916,
"asjAn":302,
"othAn":978,
"allAc":5861,
"maleAc":2930,
"femaleAc": 2931,
"afrAc":1931,
"amrAc":101,
"easAc":388,
"finAc":477,
"nfeAc":2704,
"asjAc":78,
"othAc":182,
"allHc":561,
"afrHc":208,
"amrHc":6,
"easHc":42,
"finHc":31,
"nfeHc":242,
"asjHc":13,
"othHc":19,
"maleHc":280,
"femaleHc":281,
"controlsAllAf":0.190317,
"controlsAllAn":30796,
"controlsAllAc":5861,
"lowComplexityRegion":true,
"failedFilter":true
}
FieldTypeNotes
coverageintaverage coverage (non-negative integer values)
allAffloatallele frequency for all populations. Range: 0 - 1.0
maleAffloatallele frequency for male population. Range: 0 - 1.0
femaleAffloatallele frequency for female population. Range: 0 - 1.0
controlsAllAffloatallele frequency for the controls subset. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
maleAcintallele count for male population. Integer.
femaleAcintallele count for female population. Integer.
controlsAllAcintallele count for the controls subset. Integer.
allAnintallele number for all populations. Non-zero integer.
maleAnintallele number for male population. Non-zero integer.
femaleAnintallele number for female population. Non-zero integer.
controlsAllAnintallele number for the controls subset. Non-zero integer.
allHcintcount of homozygous individuals for all populations. Non-negative integer.
maleHcintcount of homozygous individuals for male population. Non-negative integer.
femaleHcintcount of homozygous individuals for female population. Non-negative integer.
afrAffloatallele frequency for the African / African American population. Range: 0 - 1.0
afrAcintallele count for the African / African American population. Integer.
afrAnintallele number for the African / African American population. Non-zero integer.
afrHcintcount of homozygous individuals for African / African American population. Non-negative integer.
amrAffloatallele frequency for the Latino population. Range: 0 - 1.0
amrAcintallele count for the Latino population. Integer.
amrAnintallele number for the Latino population. Non-zero integer.
amrHcintcount of homozygous individuals for Latino population. Non-negative integer.
easAffloatallele frequency for the East Asian population. Range: 0 - 1.0
easAcintallele count for the East Asian population. Integer.
easAnintallele number for the East Asian population. Non-zero integer.
easHcintcount of homozygous individuals for East Asian population. Non-negative integer.
finAffloatallele frequency for the Finnish population. Range: 0 - 1.0
finAcintallele count for the Finnish population. Integer.
finAnintallele number for the Finnish population. Non-zero integer.
finHcintcount of homozygous individuals for Finnish population. Non-negative integer
nfeAffloatallele frequency for the Non-Finnish European population. Range: 0 - 1.0
nfeAcintallele count for the Non-Finnish European population. Integer.
nfeAnintallele number for the Non-Finnish European population. Non-zero integer.
nfeHcintcount of homozygous individuals for Non-Finnish European population. Non-negative integer
othAffloatallele frequency for the Other population. Range: 0 - 1.0
othAcintallele count for the Other population. Integer.
othAnintallele number for the Other population. Non-zero integer.
othHcintcount of homozygous individuals for Other population. Non-negative integer
asjAffloatallele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0
asjAcintallele count for the Ashkenazi Jewish population Integer.
asjAnintallele number for the Ashkenazi Jewish population. Non-zero integer.
asjHcintcount of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer
sasAffloatallele frequency for the South Asian population. Range: 0 - 1.0
sasAcintallele count for the South Asian population Integer.
sasAnintallele number for the South Asian population. Non-zero integer.
sasHcintcount of homozygous individuals for the South Asian population. Non-negative integer.
failedFilterboolTrue if this variant failed any filters (Note: we do not list the failed filters)
lowComplexityRegionboolTrue if this variant is located in a low complexity region.

LoF Gene Metrics

Tab delimited file example

gene transcript obs_mis exp_mis oe_mis mu_mis possible_mis obs_mis_pphen exp_mis_pphen oe_mis_pphen possible_mis_pphen obs_syn exp_syn oe_syn mu_syn possible_syn obs_lof mu_lof possible_lof exp_lof pLI pNull pRec oe_lof oe_syn_lower oe_syn_upper oe_mis_lower oe_mis_upper oe_lof_lower oe_lof_upper constraint_flag syn_zmis_z lof_z oe_lof_upper_rank oe_lof_upper_bin oe_lof_upper_bin_6 n_sites classic_caf max_af no_lofs obs_het_lof obs_hom_lof defined p exp_hom_lof classic_caf_afr classic_caf_amr classic_caf_asj classic_caf_eas classic_caf_fin classic_caf_nfe classic_caf_oth classic_caf_sas p_afr p_amr p_asj p_eas p_fin p_nfep_oth p_sas transcript_type gene_id transcript_level cds_length num_coding_exons gene_type gene_length exac_pLI exac_obs_lof exac_exp_lof exac_oe_lof brain_expression chromosome start_positionend_position
MED13 ENST00000397786 871 1.1178e+03 7.7921e-01 5.5598e-05 14195 314 5.2975e+02 5.9273e-01 6708 422 3.8753e+02 1.0890e+00 1.9097e-05 4248 0 4.9203e-06 1257 9.8429e+01 1.0000e+00 8.9436e-40 1.8383e-16 0.0000e+00 1.0050e+00 1.1800e+00 7.3600e-01 8.2400e-01 0.0000e+00 3.0000e-02 -1.3765e+00 2.6232e+00 9.1935e+00 0 0 0 2 1.2058e-05 8.0492e-06 124782 3 0 124785 1.2021e-05 1.8031e-05 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 9.2812e-05 8.8571e-06 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 9.2760e-05 8.8276e-06 0.0000e+00 0.0000e+00 protein_coding ENSG00000108510 2 6522 30 protein_coding 122678 1.0000e+00 0 6.4393e+01 0.0000e+00 NA 17 60019966 60142643

JSON key to TSV column mapping

JSON keyTSV columnDescription
pLipLIprobability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)
pNullpNullprobability of being completely tolerant of loss of function variation (observed = expected)
pRecpRecprobability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)
synZsyn_zcorrected synonymous Z score
misZmis_zcorrected missense Z score
loeufoe_lof_upperloss of function observed/expected upper bound fraction (LOEUF)

Gene symbol update

The input file provides Ensembl gene ids for each entry. We observed that they were unique while gene symbols may be repeated (multiple lines may have the same gene symbol). Since Ensembl gene Ids are more stable, and Nirvana transcript cache data contains Ensembl gene ids, we use these ids to extract the gene symbols from the transcript cache. For example, if ENSG0001 has gene symbol GENE1 in the input but Nirvana cache say ENSG0001 maps to GENE2, we use GENE2 as the gene symbol for that entry.

Conflict resolution

gnomAD uses Ensembl GeneID as unique identifiers in the source file but Nirvana uses HGNC gene symbols. Multiple Ensembl GeneIDs can map to the same HGNC symbol and therefore may result is conflict.

MDGA2   ENST00000426342 306 4.0043e+02  7.6419e-01  2.1096e-05  4724    78  1.6525e+02  4.7202e-01  1923    125 1.3737e+02  9.0993e-01  7.1973e-06  1413    4   2.0926e-06  453 3.8316e+01  9.9922e-01  8.6490e-12  7.8128e-04  1.0440e-01  7.8600e-01  1.0560e+00  6.9500e-01  8.4000e-01  5.0000e-02  2.3900e-01      8.2988e-01  1.6769e+00  5.1372e+00  1529    0   0   7   2.8103e-05  4.0317e-06  124784  7   0   124791  2.8047e-05  9.8167e-05  0.0000e+00  2.8962e-05  0.0000e+00  0.0000e+00  0.0000e+00  3.5391e-05  1.6672e-04  3.2680e-05  0.0000e+00  2.8962e-05  0.0000e+00  0.0000e+00  0.0000e+00  3.5308e-05  1.6492e-04  3.2678e-05  protein_coding  ENSG00000139915 2   2181    13  protein_coding  835332  9.9322e-01  3   2.7833e+01  1.0779e-01  NA  14  47308826    48144157
MDGA2 ENST00000439988 438 5.5311e+02 7.9189e-01 2.9490e-05 6608 105 2.0496e+02 5.1228e-01 2386 180 1.9491e+02 9.2351e-01 9.8371e-06 2048 11 2.8074e-06 627 5.1882e+01 6.6457e-01 5.5841e-10 3.3543e-01 2.1202e-01 8.1700e-01 1.0450e+00 7.3100e-01 8.5700e-01 1.3200e-01 3.5100e-01 8.3940e-01 1.7393e+00 5.2595e+00 2989 1 0 9 3.6173e-05 4.0463e-06 124782 9 0 124791 3.6061e-05 1.6228e-04 6.4986e-05 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 4.4275e-05 1.6672e-04 3.2680e-05 6.4577e-05 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 4.4135e-05 1.6492e-04 3.2678e-05 protein_coding ENSG00000272781 3 3075 17 protein_coding 832866 NA NA NA NA NA 14 47311134 48143999

In such cases, Nirvana chooses the entry with the smallest "LOEUF" value. The reason for choosing this value can be highlighted by the following table:

LOEUF decileHaplo-insufficientAutosomal DominantAutosomal RecessiveOlfactory Genes
0-10%104140360
10-20%47128721
20-30%17861120
30-40%8801734
40-50%7652068
50-60%4542076
60-70%04615418
70-80%24912049
80-90%0345896
90-100%02640174
Note

List of genes with conflicting entries

MDGA2:
{"pLI":9.99e-1,"pRec":7.81e-4,"pNull":8.65e-12,"synZ":8.30e-1,"misZ":1.68e0,"loeuf":2.39e-1}
{"pLI":6.65e-1,"pRec":3.35e-1,"pNull":5.58e-10,"synZ":8.39e-1,"misZ":1.74e0,"loeuf":3.51e-1}
CRYBG3:
{"pLI":9.27e-5,"pRec":1.00e0,"pNull":1.88e-7,"synZ":1.82e0,"misZ":4.68e-1,"loeuf":4.93e-1}
{"pLI":2.69e-4,"pRec":1.00e0,"pNull":1.20e-4,"synZ":2.63e0,"misZ":9.80e-1,"loeuf":5.98e-1}
CHTF8:
{"pLI":8.29e-1,"pRec":1.67e-1,"pNull":3.21e-3,"synZ":1.94e0,"misZ":9.48e-1,"loeuf":5.13e-1}
{"pLI":3.73e-1,"pRec":5.84e-1,"pNull":4.29e-2,"synZ":3.33e-1,"misZ":2.91e-1,"loeuf":9.92e-1}
SEPT1:
{"pLI":6.77e-8,"pRec":8.90e-1,"pNull":1.10e-1,"synZ":1.58e-1,"misZ":1.57e0,"loeuf":9.68e-1}
{"pLI":1.96e-8,"pRec":6.71e-1,"pNull":3.29e-1,"synZ":1.68e-1,"misZ":1.41e0,"loeuf":1.08e0}
ARL14EPL:
{"pLI":3.48e-2,"pRec":8.38e-1,"pNull":1.28e-1,"synZ":3.56e-1,"misZ":-1.87e-1,"loeuf":1.23e0}
{"pLI":3.23e-2,"pRec":8.29e-1,"pNull":1.38e-1,"synZ":1.15e0,"misZ":-4.05e-1,"loeuf":1.26e0}
UGT2A1:
{"pLI":2.90e-13,"pRec":1.40e-1,"pNull":8.60e-1,"synZ":-1.29e0,"misZ":-1.77e0,"loeuf":1.18e0}
{"pLI":3.88e-17,"pRec":2.87e-3,"pNull":9.97e-1,"synZ":-8.00e-1,"misZ":-1.40e0,"loeuf":1.53e0}
LTB4R2:
{"pLI":4.39e-4,"pRec":6.71e-1,"pNull":3.29e-1,"synZ":-5.24e-1,"misZ":-2.96e-1,"loeuf":1.40e0}
{"pLI":1.38e-5,"pRec":4.12e-1,"pNull":5.88e-1,"synZ":-4.58e-1,"misZ":-2.02e-1,"loeuf":1.54e0}
CDRT1:
{"pLI":4.98e-14,"pRec":5.31e-1,"pNull":4.69e-1,"synZ":8.18e-1,"misZ":6.57e-1,"loeuf":1.00e0}
{"pLI":3.50e-3,"pRec":6.37e-1,"pNull":3.59e-1,"synZ":4.89e-1,"misZ":6.90e-1,"loeuf":1.63e0}
MUC3A:
{"pLI":1.48e-10,"pRec":5.76e-1,"pNull":4.24e-1,"synZ":5.81e-2,"misZ":-6.01e-1,"loeuf":1.06e0}
{"pLI":4.03e-1,"pRec":4.79e-1,"pNull":1.17e-1,"synZ":4.05e-2,"misZ":-1.60e-1,"loeuf":1.70e0}
COG8:
{"pLI":2.97e-9,"pRec":5.04e-1,"pNull":4.96e-1,"synZ":-1.35e0,"misZ":-9.37e-2,"loeuf":1.13e0}
{"pLI":2.31e-3,"pRec":5.47e-1,"pNull":4.50e-1,"synZ":-4.94e-1,"misZ":-1.48e-1,"loeuf":1.76e0}
AC006486.1:
{"pLI":9.37e-1,"pRec":6.27e-2,"pNull":2.47e-4,"synZ":1.44e0,"misZ":2.12e0,"loeuf":3.41e-1}
{"pLI":1.14e-1,"pRec":6.16e-1,"pNull":2.70e-1,"synZ":-7.57e-2,"misZ":8.33e-2,"loeuf":1.84e0}
AL645922.1:
{"pLI":4.67e-16,"pRec":1.00e0,"pNull":4.15e-5,"synZ":7.99e-1,"misZ":1.61e0,"loeuf":6.92e-1}
{"pLI":1.60e-3,"pRec":2.78e-1,"pNull":7.21e-1,"synZ":-7.30e-2,"misZ":3.21e-1,"loeuf":1.96e0}
NBPF20:
{"pLI":1.42e-7,"pRec":3.40e-2,"pNull":9.66e-1,"synZ":-1.86e0,"misZ":-2.88e0,"loeuf":1.97e0}
{"pLI":1.92e-22,"pRec":7.96e-6,"pNull":1.00e0,"synZ":-9.73e0,"misZ":-7.67e0,"loeuf":1.97e0}
PRAMEF11:
{"pLI":6.16e-4,"pRec":7.42e-1,"pNull":2.58e-1,"synZ":-4.02e0,"misZ":-3.69e0,"loeuf":1.31e0}
{"synZ":-3.33e0,"misZ":-2.59e0}
FAM231D:
{"synZ":-1.98e0,"misZ":-1.44e0}
{"synZ":1.07e0,"misZ":3.13e-1}

Conflict resolution

  • Pick the entry with the lowest LOEUF score
  • If the same, pick the lowest pLI
  • Otherwise pick the entry with the max absolute value of synZ + misZ

Download URL

https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz

JSON output

"gnomAD":{ 
"pLi":1.00e0,
"pNull":8.94e-40,
"pRec":1.84e-16,
"synZ":-8.44e-2,
"misZ":5.96e-1,
"loeuf":1.13e0
}
FieldTypeNotes
pLifloatprobability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)
pNullfloatprobability of being completely tolerant of loss of function variation (observed = expected)
pRecfloatprobability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)
synZfloatcorrected synonymous Z score
misZfloatcorrected missense Z score
loeuffloatloss of function observed/expected upper bound fraction (LOEUF)
- - - - \ No newline at end of file diff --git a/3.16/data-sources/mito-heteroplasmy/index.html b/3.16/data-sources/mito-heteroplasmy/index.html deleted file mode 100644 index 9043d307..00000000 --- a/3.16/data-sources/mito-heteroplasmy/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Mitochondrial Heteroplasmy | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

Mitochondrial Heteroplasmy

Overview

Mitochondrial Heteroplasmy is an aggregate population data set that characterizes the amount of heteroplasmy observed for each variant. The latest version of this data set is based on re-processed 1000 Genomes Project data using the Illumina DRAGEN pipeline.

JSON File

Example

{
"T:C":{
"ad":[
1,
1,
1,
1,
1,
1
],
"allele_type":"alt",
"vrf":[
0.002369668246445498,
0.0024937655860349127,
0.0016129032258064516,
0.0025188916876574307,
0.0022935779816513763,
0.002008032128514056
],
"vrf_stats":{
"kurtosis":38.889891511122556,
"max":0.0025188916876574307,
"mean":5.4052190471990743e-05,
"min":0.0,
"nobs":246,
"skewness":6.346664692283075,
"stdev":0.0003461416264750575,
"variance":1.1981402557879823e-07
}
}
}

Parsing

From the JSON file, we're mainly interested in the following keys:

  • variant (i.e. T:C)
  • ad
  • vrf
  • nobs (number of observations)
Adjusting for null observations

The nobs value indicates how many observations were made. Ideally this would have been represented in the ad and vrf arrays, but it's left as an exercise for the reader.

Binning VRF Data

The vrf (variant read frequency) array in the JSON object above is paired with with the ad array (allele depths) shown above.

The data in the JSON object has a crazy number of significant digits. This means that as the number of samples increase, this array will grow. To make this more future-proof, Nirvana bins everything according to 0.1% increments.

With the binned data, we end up having 775 distinct vrf values in the entire JSON file. This also means that the variant with the largest number of VRFs would originally have 246 entries, but due to binning this will decrease to 143.

Pre-processing the Data

The JSON file is converted into a small TSV file that is embedded in Nirvana. Here is an example of the TSV file:

#CHROM  POS REF ALT VRF_BINS    VRF_COUNTS
chrM 1 G . 0.981,0.987,0.988,0.989,0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.998,0.999 1,2,2,4,7,8,11,19,43,60,48,64,499,1736
chrM 2 A . 0.981,0.987,0.988,0.989,0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.998,0.999 1,2,2,4,7,8,11,19,43,60,48,64,499,1736

Algorithm

Nirvana will calculate mitochondrial heteroplasmy data for every sample in the VCF. Using the computed VRF for each sample, we compute where in the empirical mitochondrial heteroplasmy distribution that VRF occurs and express that as a percentile.

Percentiles

Nirvana uses the statistical definition of percentile (indicating the value below which a given percentage of observations in a group of observations falls). Unless the sample's VRF is higher than all the VRFs represented in the distribution, the range will be [0, 1).

Download URL

Unavailable

The original data set is only available internally at Illumina at the moment.

JSON Output

"samples":[
{
"genotype":"0/1",
"variantFrequencies":[
0.333,
0.5
],
],
"alleleDepths":[
10,
20,
30
],
"heteroplasmyPercentile":[
23.13,
12.65
]
}
]
FieldTypeNotes
heteroplasmyPercentilefloat arrayone percentile for each variant frequency (each alternate allele)
- - - - \ No newline at end of file diff --git a/3.16/data-sources/mitomap-small-variants-json/index.html b/3.16/data-sources/mitomap-small-variants-json/index.html deleted file mode 100644 index 0f7ae994..00000000 --- a/3.16/data-sources/mitomap-small-variants-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -mitomap-small-variants-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

mitomap-small-variants-json

"mitomap":[ 
{
"refAllele":"G",
"altAllele":"A",
"diseases":[
"Bipolar disorder",
"Melanoma"
],
"hasHomoplasmy":false,
"hasHeteroplasmy":true,
"status":"Reported",
"clinicalSignificance":"confirmed pathogenic",
"scorePercentile":83.30,
"numGenBankFullLengthSeqs":2,
"pubMedIds":["2316527","6299878","6301949"],
"isAlleleSpecific":true
}
]
FieldTypeNotes
refAllelestring
altAllelestring
diseasesstring arrayassociated diseases
hasHomoplasmyboolean
hasHeteroplasmyboolean
statusstringrecord status
clinicalSignificancestringpredicted pathogenicity
scorePercentilefloatMitoTIP score
numGenBankFullLengthSeqsinteger# of GenBank full-length sequences
pubMedIdsstring array
isAlleleSpecificbooleantrue when the current variant alternate allele matches the MITOMAP alternate allele
- - - - \ No newline at end of file diff --git a/3.16/data-sources/mitomap-structural-variants-json/index.html b/3.16/data-sources/mitomap-structural-variants-json/index.html deleted file mode 100644 index e533d4bc..00000000 --- a/3.16/data-sources/mitomap-structural-variants-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -mitomap-structural-variants-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

mitomap-structural-variants-json

"mitomap":[ 
{
"chromosome":"MT",
"begin":3166,
"end":14152,
"variantType":"deletion",
"reciprocalOverlap":0.18068,
"annotationOverlap":0.42405
}
]
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring array
reciprocalOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
annotationOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
- - - - \ No newline at end of file diff --git a/3.16/data-sources/mitomap/index.html b/3.16/data-sources/mitomap/index.html deleted file mode 100644 index ee915291..00000000 --- a/3.16/data-sources/mitomap/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -MITOMAP | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

MITOMAP

Overview

MITOMAP provides a compendium of polymorphisms and mutations in human mitochondrial DNA.

Publication

Lott, M.T., Leipzig, J.N., Derbeneva, O., Xie, H.M., Chalkia, D., Sarmady, M., Procaccio, V., and Wallace, D.C. mtDNA variation and analysis using MITOMAP and MITOMASTER. Current Protocols in Bioinformatics 1(123):1.23.1-26 (2013). http://www.mitomap.org

Scraping HTML Pages

Example

MITOMAP is unique in that it doesn't offer the data in a downloadable format. As a result, the annotation content in Nirvana is scraped from the following MITOMAP pages:

  1. mtDNA Control Region Sequence Variants
  2. mtDNA Coding Region & RNA Sequence Variants
  3. Reported Mitochondrial DNA Base Substitution Diseases: rRNA/tRNA mutations
  4. Reported Mitochondrial DNA Base Substitution Diseases: Coding and Control Region Point Mutations
  5. Reported mtDNA Deletions
  6. mtDNA Simple Insertions

Parsing

Here's what the HTML code looks like:

["582","<a href='/MITOMAP/GenomeLoci#MTTF'>MT-TF</a>","Mitochondrial myopathy","T582C","tRNA Phe","-","+","Reported","<span style='display:inline-block;white-space:nowrap;'><a href='/cgi-bin/mitotip?pos=582&alt=C&quart=2'><u>72.90%</u></a> <i class='fa fa-arrow-up' style='color:orange' aria-hidden='true'></i></span>","0","<a href='/cgi-bin/print_ref_list?refs=90165,91590&title=RNA+Mutation+T582C' target='_blank'>2</a>"],
["583","<a href='/MITOMAP/GenomeLoci#MTTF'>MT-TF</a>","MELAS / MM & EXIT","G583A","tRNA Phe","-","+","Cfrm","<span style='display:inline-block;white-space:nowrap;'><a href='/cgi-bin/mitotip?pos=583&alt=A&quart=0'><u>93.10%</u></a> <i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i><i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i><i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i></span>","0","<a href='/cgi-bin/print_ref_list?refs=2066,90532,91590&title=RNA+Mutation+G583A' target='_blank'>3</a>"],

We're mainly interested in the following columns (numbers indicate the HTML page above):

  • Position1,2,3,4
  • Disease3,4
  • Nucleotide Change1,2
  • Allele3,4
  • Homoplasmy3,4
  • Heteroplasmy3,4
  • Status3,4
  • MitoTIP3,4
  • GB Seqs FL(CR)1,2,3,4
  • Deletion Junction5
  • Insert (nt)6
  • Insert Point (nt)6
  • References/Curated References1,2,3,4
MitoTIP

The MitoTIP information is used to populate the clinicalSignificance and scorePercentile JSON keys. The "frequency alert" entries are skipped since it's not directly relevant to clinical significance.

Left alignment

Many of the variants in MITOMAP have not been normalized. As part of our import procedure, we left align all insertions and deletions.

Variant Enumeration

Sometimes MITOMAP provides data that indicates that multiple values have been observed. Some examples of this are C-C(2-8) and A-AC or ACC. Alternate alleles containing IUPAC ambiguity codes are similarly enumerated.

Inversions

MITOMAP inversions are currently treated as MNVs.

Allele Parsing

The following MITOMAP allele parsing conventions are supported:

  • C123T
  • 16021_16022del
  • 8042del2
  • C9537insC
  • 3902_3908invACCTTGC
  • A-AC or ACC
  • C-C(2-8)
  • 8042delAT

PostgreSQL Dump File

Example

COPY mitomap.reference (id, authors, title, publication, editors, volume, number, pages, date, city, publisher, keywords, abstract, nlmid) FROM stdin;
1 Albring, M., Griffith, J. and Attardi, G. Association of a protein structure of probable membrane derivation with HeLa cell mitochondrial DNA near its origin of replication Proceedings of the National Academy of Sciences of the United States of America . 74 4 1348-1352 1977 . . Deoxyribonucleoproteins; DNA Replication; DNA, Mitochondrial; Hela Cells; Membrane Proteins; Microscopy, Electron; Molecular Weight; Neoplasm Proteins; Protein Binding Almost all (about 95 percent) of the mitochondrial DNA molecules released by Triton X-100 lysis of HeLa cell mitochondria in the presence of 0.15 M salt are associated with a single protein-containing structure varying in appearance between a 10-20 nm knob and a 100-500 nm membrane-like patch. Analysis by high resolution electron microscopy and by polyacrylamide gel electrophoresis after cleavage of mitochondrial DNA with the endonucleases EcoRI, HindIII, and Hpa II has shown that the protein structure is attached to the DNA in the region of the D-loop, and probably near the origin of mitochondrial DNA replication. The data strongly suggest that HeLa cell mitochondrial DNA is attached in vivo to the inner mitochondrial membrane at or near the origin of replication, and that a membrane fragment of variable size remains associated with the DNA during the isolation. After sodium dodecyl sulfate extraction of mitochondrial DNA, a small 5-10 nm protein is found at the same site on a fraction of the mitochondrial DNA molecules. 266177
2 Anderson, S., Bankier, A.T., Barrell, B.G., de Bruijn, M.H., Coulson, A.R., Drouin, J., Eperon, I.C., Nierlich, D.P., Roe, B.A., Sanger, F., Schreier, P.H., Smith, A.J., Staden, R., Young, I.G. Sequence and organization of the human mitochondrial genome Nature . 290 5806 457-465 1981 . . Base Sequence; Codon; DNA Replication; mtDNA; Evolution; Genes, Structural; Human; Nucleic Acid Precursors; Peptide Chain Initiation; Peptide Chain Termination; RNA, Ribosomal; RNA, Transfer; Transcription, Genetic The complete sequence of the 16,569-base pair human mitochondrial genome is presented. The genes for the 12S and 16S rRNAs, 22 tRNAs, cytochrome c oxidase subunits I, II and III, ATPase subunit 6, cytochrome b and eight other predicted protein coding genes have been located. The sequence shows extreme economy in that the genes have none or only a few noncoding bases between them, and in many cases the termination codons are not coded in the DNA but are created post- transcriptionally by polyadenylation of the mRNAs. 7219534

Parsing

From the PostgreSQL dump file, we're interested in parsing the mapping between reference IDs and the PubMed IDs:

  • id
  • nlmid
Why not use the PostgreSQL file for everything?

Ideally we would use this file for parsing all of our data, but the schema contains 80+ tables and we haven't invested the time yet to see how the tables are linked together to produce the 6 main HTML pages that we're interested in.

Known Issues

Duplicated records

Multiple records describing the same nucleotide change are merged into the same record. If any conflicting information is found (homoplasmy, heteroplasmy, status, clinical significance, score percentile, end coordinate, variant type), an exception is thrown.

  • For diseases and PubMed IDs, we take the union of the values in the duplicated records.
  • For full length GenBank sequences, we take the largest number from each of the duplicated records since it provides the strongest evidence for this variant.
Skipped records

Records that represent an alternate notation of the original variant are skipped. Similarly some variants with confusing alleles (T961delT+ / -C(n)ins) are also skipped.

Download URLs

JSON Output

Small Variants

"mitomap":[ 
{
"refAllele":"G",
"altAllele":"A",
"diseases":[
"Bipolar disorder",
"Melanoma"
],
"hasHomoplasmy":false,
"hasHeteroplasmy":true,
"status":"Reported",
"clinicalSignificance":"confirmed pathogenic",
"scorePercentile":83.30,
"numGenBankFullLengthSeqs":2,
"pubMedIds":["2316527","6299878","6301949"],
"isAlleleSpecific":true
}
]
FieldTypeNotes
refAllelestring
altAllelestring
diseasesstring arrayassociated diseases
hasHomoplasmyboolean
hasHeteroplasmyboolean
statusstringrecord status
clinicalSignificancestringpredicted pathogenicity
scorePercentilefloatMitoTIP score
numGenBankFullLengthSeqsinteger# of GenBank full-length sequences
pubMedIdsstring array
isAlleleSpecificbooleantrue when the current variant alternate allele matches the MITOMAP alternate allele

Structural Variants

"mitomap":[ 
{
"chromosome":"MT",
"begin":3166,
"end":14152,
"variantType":"deletion",
"reciprocalOverlap":0.18068,
"annotationOverlap":0.42405
}
]
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring array
reciprocalOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
annotationOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
- - - - \ No newline at end of file diff --git a/3.16/data-sources/omim-json/index.html b/3.16/data-sources/omim-json/index.html deleted file mode 100644 index 8df1679c..00000000 --- a/3.16/data-sources/omim-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -omim-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

omim-json

"omim":[ 
{
"mimNumber":600678,
"geneName":"MutS, E. coli, homolog of, 6",
"description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",
"phenotypes":[
{
"mimNumber":614350,
"phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",
"description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal dominant"
]
},
{
"mimNumber":608089,
"phenotype":"Endometrial cancer, familial",
"mapping":"molecular basis of the disorder is known"
},
{
"mimNumber":276300,
"phenotype":"Mismatch repair cancer syndrome",
"description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal recessive"
],
"comments" : [
"contribute to susceptibility to multifactorial disorders or to susceptibility to infection",
"unconfirmed or possibly spurious mapping"
]
}
]
}
]
FieldTypeNotes
mimNumberintOMIM ID for gene
geneNamestringgene name
descriptionstring
phenotypesobject arraysee Phenotype entry below

Phenotype

FieldTypeNotes
mimNumberint
phenotypestring
descriptionstring
mappingstringsee possible values below
inheritancestring arraysee possible values below
commentsstring arraysee possible values below

Mapping

  1. disorder was positioned by mapping of the wild type gene
  2. disease phenotype itself was mapped
  3. molecular basis of the disorder is known
  4. disorder is a chromosome deletion or duplication syndrome

Inheritance

  • autosomal recessive
  • autosomal dominant

Comments

  • contributes to the susceptibility to multifactorial disorders
  • variations that lead to apparently abnormal laboratory test values
  • unconfirmed mapping
- - - - \ No newline at end of file diff --git a/3.16/data-sources/omim/index.html b/3.16/data-sources/omim/index.html deleted file mode 100644 index 2f73e179..00000000 --- a/3.16/data-sources/omim/index.html +++ /dev/null @@ -1,23 +0,0 @@ - - - - - - - -OMIM | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

OMIM

Overview

OMIM is a comprehensive, authoritative compendium of human genes and genetic phenotypes that is freely available and updated daily.

Publications

Amberger JS, Bocchini CA, Scott AF, Hamosh A. OMIM.org: leveraging knowledge across phenotype-gene relationships. Nucleic Acids Res. 2019 Jan 8;47(D1):D1038-D1043. doi:10.1093/nar/gky1151. PMID: 30445645.

Amberger JS, Bocchini CA, Schiettecatte FJM, Scott AF, Hamosh A. OMIM.org: Online Mendelian Inheritance in Man (OMIM®), an online catalog of human genes and genetic disorders. Nucleic Acids Res. 2015 Jan;43(Database issue):D789-98. PMID: 25428349.

Parse OMIM data

Nirvana uses gene symbols as the gene identifiers internally. To generate the OMIM database, we first map the MIM numbers, which are the primary identifiers used by OMIM, to gene symbols supported by Nirvana. Please note that there can be multiple MIM numbers mapped to one gene symbol. Only MIM numbers successfully mapped to a Nirvana gene symbol are further processed. The OMIM API is used to fetch all the information associated with a gene MIM number, except the gene symbols.

mim2gene.txt

This mim2gene.txt (http://omim.org/static/omim/data/mim2gene.txt) file provides the mapping between MIM numbers and gene symbols. An example of this file is given below:

# MIM Number    MIM Entry Type (see FAQ 1.3 at https://omim.org/help/faq)   Entrez Gene ID (NCBI)   Approved Gene Symbol (HGNC) Ensembl Gene ID (Ensembl)
100050 predominantly phenotypes
100070 phenotype 100329167
100100 phenotype
100200 predominantly phenotypes
100300 phenotype
100500 moved/removed
100600 phenotype
100640 gene 216 ALDH1A1 ENSG00000165092
100650 gene/phenotype 217 ALDH2 ENSG00000111275
100660 gene 218 ALDH3A1 ENSG00000108602
100670 gene 219 ALDH1B1 ENSG00000137124
100675 predominantly phenotypes
100678 gene 39 ACAT2 ENSG00000120437

The information in the "Entrez Gene ID (NCBI)", "Approved Gene Symbol (HGNC)" and "Ensembl Gene ID (Ensembl)" columns are used to find the proper gene symbol supported by Nirvana, which may or may not be the same as the gene symbol listed here.

OMIM API

Nirvana retrieves the OMIM annotations from the OMIM API JSON responses. The "entry" handler is used to fetch all the annotations associated with a given OMIM gene. A sample JSON response from the API is provided there.

{
"omim": {
"version": "1.0",
"entryList": [
{
"entry": {
"prefix": "*",
"mimNumber": 100640,
"status": "live",
"titles": {
"preferredTitle": "ALDEHYDE DEHYDROGENASE 1 FAMILY, MEMBER A1; ALDH1A1",
"alternativeTitles": "ALDEHYDE DEHYDROGENASE 1; ALDH1;;\nACETALDEHYDE DEHYDROGENASE 1;;\nALDH, LIVER CYTOSOLIC;;\nRETINAL DEHYDROGENASE 1; RALDH1"
},
"textSectionList": [
{
"textSection": {
"textSectionName": "description",
"textSectionTitle": "Description",
"textSectionContent": "The ALDH1A1 gene encodes a liver cytosolic isoform of acetaldehyde dehydrogenase ({EC 1.2.1.3}), an enzyme involved in the major pathway of alcohol metabolism after alcohol dehydrogenase (ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650}), variation in which has been implicated in different responses to alcohol ingestion.\n\nALDH1 is associated with a low Km for NAD, a high Km for acetaldehyde, and is strongly inactivated by disulfiram. ALDH2 is associated with a high Km for NAD, and low Km for acetaldehyde, and is insensitive to inhibition by disulfiram ({4:Hsu et al., 1985})."
}
}
],
"geneMap": {
"sequenceID": 7709,
"chromosome": 9,
"chromosomeSymbol": "9",
"chromosomeSort": 225,
"chromosomeLocationStart": 72900670,
"chromosomeLocationEnd": 72953052,
"transcript": "ENST00000297785.7",
"cytoLocation": "9q21",
"computedCytoLocation": "9q21.13",
"mimNumber": 100640,
"geneSymbols": "ALDH1A1",
"geneName": "Aldehyde dehydrogenase-1 family, member A1, soluble",
"mappingMethod": "REa, A",
"confidence": "P",
"mouseGeneSymbol": "Aldh1a1",
"mouseMgiID": "MGI:1353450",
"geneInheritance": null
},
"externalLinks": {
"geneIDs": "216",
"hgncID": "402",
"ensemblIDs": "ENSG00000165092,ENST00000297785.8",
"approvedGeneSymbols": "ALDH1A1",
"ncbiReferenceSequences": "1519246465",
"proteinSequences": "194378740,211947843,2183299,178400,119582947,119582948,178372,40807656,194375548,30582681,209402710,4262707,194739599,4261625,178394,261487497,16306661,21361176,32815082,118495,62089228",
"uniGenes": "Hs.76392",
"swissProtIDs": "P00352",
"decipherGene": false,
"umlsIDs": "C1412333",
"gtr": true,
"cmgGene": false,
"keggPathways": true,
"gwasCatalog": false,

}
}
},
{
"entry": {
"prefix": "*",
"mimNumber": 102560,
"status": "live",
"titles": {
"preferredTitle": "ACTIN, GAMMA-1; ACTG1",
"alternativeTitles": "ACTIN, GAMMA; ACTG;;\nCYTOSKELETAL GAMMA-ACTIN;;\nACTIN, CYTOPLASMIC, 2"
},
"textSectionList": [
{
"textSection": {
"textSectionName": "description",
"textSectionTitle": "Description",
"textSectionContent": "Actins are a family of highly conserved cytoskeletal proteins that play fundamental roles in nearly all aspects of eukaryotic cell biology. The ability of a cell to divide, move, endocytose, generate contractile force, and maintain shape is reliant upon functional actin-based structures. Actin isoforms are grouped according to expression patterns: muscle actins predominate in striated and smooth muscle (e.g., ACTA1, {102610}, and ACTA2, {102620}, respectively), whereas the 2 cytoplasmic nonmuscle actins, gamma-actin (ACTG1) and beta-actin (ACTB; {102630}), are found in all cells ({13:Sonnemann et al., 2006})."
}
}
],
"geneMap": {
"sequenceID": 13666,
"chromosome": 17,
"chromosomeSymbol": "17",
"chromosomeSort": 947,
"chromosomeLocationStart": 81509970,
"chromosomeLocationEnd": 81512798,
"transcript": "ENST00000331925.7",
"cytoLocation": "17q25.3",
"computedCytoLocation": "17q25.3",
"mimNumber": 102560,
"geneSymbols": "ACTG1, DFNA20, DFNA26, BRWS2",
"geneName": "Actin, gamma-1",
"mappingMethod": "REa, A, Fd",
"confidence": "C",
"mouseGeneSymbol": "Actg1",
"mouseMgiID": "MGI:87906",
"geneInheritance": null,
"phenotypeMapList": [
{
"phenotypeMap": {
"mimNumber": 102560,
"phenotype": "Baraitser-Winter syndrome 2",
"phenotypeMimNumber": 614583,
"phenotypicSeriesNumber": "PS243310",
"phenotypeMappingKey": 3,
"phenotypeInheritance": "Autosomal dominant"
}
},
{
"phenotypeMap": {
"mimNumber": 102560,
"phenotype": "Deafness, autosomal dominant 20/26",
"phenotypeMimNumber": 604717,
"phenotypicSeriesNumber": "PS124900",
"phenotypeMappingKey": 3,
"phenotypeInheritance": "Autosomal dominant"
}
}
]
}
}
}
]
}
}

Content from the OMIM API JSON response is reorganized as shown in the Nirvana JSON Output

Mappings between the Nirvana JSON output and OMIM JSON API are listed in the table below:

Nirvana JSON key chainOMIM API JSON key chain
omim:mimNumberomim:entryList:entry:mimNumber
omim:geneNameomim:entryList:entry:geneMap:geneName
omim:descriptionomim:entryList:entry:textSectionList:textSection:textSectionContent
omim:phenotypes:mimNumberomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:mimNumber
omim:phenotypes:phenotypeomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotype
omim:phenotypes:descriptionomim:entryList:entry:textSectionList:textSection:textSectionContent
omim:phenotypes:mappingomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotypeMappingKey (see mapping below)
omim:phenotypes:inheritancesomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotypeInheritance
omim:phenotypes:commentsomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotype (see mapping below)

Mapping key to content

1 to disorder was positioned by mapping of the wild type gene
-2 to disease phenotype itself was mapped
-3 to molecular basis of the disorder is known
-4 to disorder is a chromosome deletion or duplication syndrome

Phenotype character to comment

? to unconfirmed or possibly spurious mapping
-[/] to nondiseases
-{/} to contribute to susceptibility to multifactorial disorders or to susceptibility to infection

There are different types of link in the OMIM description section. For example, in above JSON response, we have the description of MIM entry 100640:

The ALDH1A1 gene encodes a liver cytosolic isoform of acetaldehyde dehydrogenase ({EC 1.2.1.3}), an enzyme involved in the major pathway of alcohol metabolism after alcohol dehydrogenase (ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650}), variation in which has been implicated in different responses to alcohol ingestion.\n\nALDH1 is associated with a low Km for NAD, a high Km for acetaldehyde, and is strongly inactivated by disulfiram. ALDH2 is associated with a high Km for NAD, and low Km for acetaldehyde, and is insensitive to inhibition by disulfiram ({4:Hsu et al., 1985}).

As the descriptions will be shown as plain text, we remove the curry brackets surrounding links and try to make the text still readable with minimal modifications. Briefly:

  • Links referring to another MIM entry (e.g. {100650}) will be removed. Any word(s) specifically associated with the removed link will also be removed. For example, "(ADH, see {103700})" will become "(ADH)" after the process.
  • Links referring to a literature reference will be processed to remove the internal index and curry brackets. For example, "{4:Hsu et al., 1985}" becomes "Hsu et al., 1985".
  • All the other links will simple have their curry brackets removed. For example, "{EC 1.2.1.3}" becomes "EC 1.2.1.3".
  • If the content within a pair of parentheses becomes empty after being processed, the parentheses need to be removed as well and its surrounding white spaces should be properly processed. For example, "ALDH2 ({100650})," will become "ALDH2,".

Here is a list of examples about how the description section supposed to be processed:

Original textProcessed text
({516030}, {516040}, and {516050})
(e.g., D1, {168461}; D2, {123833}; D3, {123834})(e.g., D1; D2; D3)
(desmocollins; see DSC2, {125645})(desmocollins; see DSC2)
(e.g., see {102700}, {300755})
(ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650})(ADH). See also liver mitochondrial ALDH2
(see, e.g., CACNA1A; {601011})(see, e.g., CACNA1A)
(e.g., GSTA1; {138359}), mu (e.g., {138350})(e.g., GSTA1), mu
(NFKB; see {164011})(NFKB)
(see ISGF3G, {147574})(see ISGF3G)
(DCK; {EC 2.7.1.74}; {125450})(DCK; EC 2.7.1.74)

JSON output

"omim":[ 
{
"mimNumber":600678,
"geneName":"MutS, E. coli, homolog of, 6",
"description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",
"phenotypes":[
{
"mimNumber":614350,
"phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",
"description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal dominant"
]
},
{
"mimNumber":608089,
"phenotype":"Endometrial cancer, familial",
"mapping":"molecular basis of the disorder is known"
},
{
"mimNumber":276300,
"phenotype":"Mismatch repair cancer syndrome",
"description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal recessive"
],
"comments" : [
"contribute to susceptibility to multifactorial disorders or to susceptibility to infection",
"unconfirmed or possibly spurious mapping"
]
}
]
}
]
FieldTypeNotes
mimNumberintOMIM ID for gene
geneNamestringgene name
descriptionstring
phenotypesobject arraysee Phenotype entry below

Phenotype

FieldTypeNotes
mimNumberint
phenotypestring
descriptionstring
mappingstringsee possible values below
inheritancestring arraysee possible values below
commentsstring arraysee possible values below

Mapping

  1. disorder was positioned by mapping of the wild type gene
  2. disease phenotype itself was mapped
  3. molecular basis of the disorder is known
  4. disorder is a chromosome deletion or duplication syndrome

Inheritance

  • autosomal recessive
  • autosomal dominant

Comments

  • contributes to the susceptibility to multifactorial disorders
  • variations that lead to apparently abnormal laboratory test values
  • unconfirmed mapping

Building the supplementary files

The first step in builing the OMIM .nga files is to use the SAUtils command's subcommand downloadOMIM to download the necessary data. In order to download the data the user must possess an API key obtained from OMIM. This key has to be set as the environment variable OmimApiKey.

export OmimApiKey=<users-omim-api-key>
dotnet NirvanaBuild/SAUtils.dll downloadOMIM
---------------------------------------------------------------------------
SAUtils (c) 2021 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0
---------------------------------------------------------------------------

USAGE: dotnet SAUtils.dll downloadomim [options]
Download the OMIM gene annotation data

OPTIONS:
--uga, -u <path> universal gene archive path
--ref, -r <filename> input reference filename
--out, -o <VALUE> output directory
--help, -h displays the help menu
--version, -v displays the version

dotnet NirvanaBuild/SAUtils.dll downloadOMIM --ref References/7/Homo_sapiens.GRCh38.Nirvana.dat --uga Cache/27/UGA.tsv.gz --out ExternalDataSources/OMIM/2021-06-14
---------------------------------------------------------------------------
SAUtils (c) 2021 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0
---------------------------------------------------------------------------

Unable to resolve gene symbol conflict for CD300H: Ensembl: [ENSG00000284690]: AC079325.2, Entrez Gene: [100130520]: LOC100130520
Unable to resolve gene symbol conflict for STRIT1: Ensembl: [ENSG00000240045]: DWORF, Entrez Gene: [100507537]: LOC100507537
Unable to resolve gene symbol conflict for WAKMAR2: Ensembl: [ENSG00000237499]: AL357060.2, Entrez Gene: [100130476]: LOC100130476
Unable to resolve gene symbol conflict for PERCC1: Ensembl: [ENSG00000284395]: AL032819.3, Entrez Gene: [105371045]: LOC105371045
Unable to resolve gene symbol conflict for LASTR: Ensembl: [ENSG00000242147]: AL365356.5, Entrez Gene: [105376382]: LOC105376382
Unable to resolve gene symbol conflict for PRANCR: Ensembl: [ENSG00000257815]: LINC01481, Entrez Gene: [101928062]: LOC101928062
Unable to resolve gene symbol conflict for THORLNC: Ensembl: [ENSG00000226856]: AC093901.1, Entrez Gene: [100506797]: LOC100506797
Gene Symbol Update Statistics
============================================
# of gene symbols already up-to-date: 15,952
# of gene symbols updated: 330
# of genes where both IDs are null: 0
# of gene symbols not in cache: 148
# of resolved gene symbol conflicts: 15
# of unresolved gene symbol conflicts: 7

Time: 00:02:38.2

Once the download has succeeded, the nga files can be produced using the SAUtils command's subcommand omim.

dotnet NirvanaBuild/SAUtils.dll omim
---------------------------------------------------------------------------
SAUtils (c) 2021 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0
---------------------------------------------------------------------------

USAGE: dotnet SAUtils.dll omim [options]
Creates a gene annotation database from OMIM data

OPTIONS:
--m2g, -m <VALUE> MimToGeneSymbol tsv file
--json, -j <VALUE> OMIM entry json file
--out, -o <VALUE> output directory
--help, -h displays the help menu
--version, -v displays the version


dotnet NirvanaBuild/SAUtils.dll omim --m2g ExternalDataSources/OMIM/2021-06-14/MimToGeneSymbol.tsv --json ExternalDataSources/OMIM/2021-06-14/MimEntries.json.gz --out SupplementaryDatabase/63/
---------------------------------------------------------------------------
SAUtils (c) 2021 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0
---------------------------------------------------------------------------


Time: 00:00:04.5
- - - - \ No newline at end of file diff --git a/3.16/data-sources/phylop-json/index.html b/3.16/data-sources/phylop-json/index.html deleted file mode 100644 index 14baebe0..00000000 --- a/3.16/data-sources/phylop-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -phylop-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

phylop-json

"variants":[
{
"vid":"2:48010488:A",
"chromosome":"chr2",
"begin":48010488,
"end":48010488,
"refAllele":"G",
"altAllele":"A",
"variantType":"SNV",
"phylopScore":0.459
}
]
FieldTypeNotes
phylopScorefloatrange: -14.08 to 6.424
- - - - \ No newline at end of file diff --git a/3.16/data-sources/phylop/index.html b/3.16/data-sources/phylop/index.html deleted file mode 100644 index e430574f..00000000 --- a/3.16/data-sources/phylop/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -PhyloP | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

PhyloP

Overview

PhyloP (phylogenetic p-values) conservation scores are obtained from the [PHAST package] (http://compgen.bscb.cornell.edu/phast/) for multiple alignments of vertebrate genomes to the human genome. For GRCh38, the multiple alignments are against 19 mammals and for GRCh37, it is against 45 vertebrate genomes.

Publication

Siepel A, Bejerano G, Pedersen JS, Hinrichs AS, Hou M, Rosenbloom K, Clawson H, Spieth J, Hillier LW, Richards S, et al. Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. Genome Res. 2005 Aug;15(8):1034-50. (http://www.genome.org/cgi/doi/10.1101/gr.3715005)

WigFix File

The data is provided in WigFix files which is a text file that provides conservation scores for contiguous intervals in the following format:

fixedStep chrom=chr1 start=10918 step=1
0.064
0.058
0.064
0.058
0.064
0.064
fixedStep chrom=chr1 start=34045 step=1
0.111
0.100
0.111
0.111
0.100
0.111
0.111
0.111
0.100
0.111
-1.636

We convert them to binary files with indexes for fast query. Note that these are scores for genomic positions and are reported only for SNVs.

Download URL

GRCh37: http://hgdownload.cse.ucsc.edu/goldenpath/hg19/phyloP46way/vertebrate/

GRCh38: http://hgdownload.cse.ucsc.edu/goldenPath/hg38/phyloP20way/

JSON Output

Unlike other supplemetary datasources, phyloP scores are reported in the variants section.

"variants":[
{
"vid":"2:48010488:A",
"chromosome":"chr2",
"begin":48010488,
"end":48010488,
"refAllele":"G",
"altAllele":"A",
"variantType":"SNV",
"phylopScore":0.459
}
]
FieldTypeNotes
phylopScorefloatrange: -14.08 to 6.424
- - - - \ No newline at end of file diff --git a/3.16/data-sources/primate-ai-json/index.html b/3.16/data-sources/primate-ai-json/index.html deleted file mode 100644 index b06e3780..00000000 --- a/3.16/data-sources/primate-ai-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -primate-ai-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

primate-ai-json

"primateAI":[
{
"hgnc":"TP53",
"scorePercentile":0.3,
}
]
FieldTypeNotes
hgncstring
scorePercentilefloatrange: 0 - 1.0
- - - - \ No newline at end of file diff --git a/3.16/data-sources/primate-ai/index.html b/3.16/data-sources/primate-ai/index.html deleted file mode 100644 index f461c7b3..00000000 --- a/3.16/data-sources/primate-ai/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Primate AI | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

Primate AI

Overview

Primate AI is a deep residual neural network for classifying the pathogenicity of missense mutations. The method is described in the publication:

Publication

Sundaram, L., Gao, H., Padigepati, S.R. et al. Predicting the clinical impact of human mutation with deep neural networks. Nat Genet 50, 1161–1170 (2018). https://doi.org/10.1038/s41588-018-0167-z

TSV File

Example

chr pos ref alt refAA   altAA   strand_1pos_0neg    trinucleotide_context   UCSC_gene   ExAC_coverage   primateDL_score
chr10 1046704 C T R C 1 CCG uc001ift.3 45.49 0.849114537239
chr10 1046704 C G R G 1 CCG uc001ift.3 45.49 0.795686006546

Parsing

From the TSV file, we're mainly interested in the following columns:

  • chr
  • pos
  • ref
  • alt
  • primateDL_score

We also use UCSC_gene to filter out variants that don't have matching gene models in Nirvana.

Pre-processing

Converting UCSC IDs

Primate AI only provides UCSC IDs. As an initial pre-processing step, we'll need to convert these to either Entrez or Ensembl Gene IDs.

The following queries are used to download the conversions from UCSC:

mysql -h genome-mysql.soe.ucsc.edu -u genome -A -P 3306 \
-e "select * FROM knownToLocusLink;" hg19 > ucsc_locuslink.tsv

mysql -h genome-mysql.soe.ucsc.edu -u genome -A -P 3306 \
-e "select knownToEnsembl.name, knownToEnsembl.value, ensGene.name2 FROM knownToEnsembl, ensGene WHERE knownToEnsembl.value = ensGene.name;" \
hg19 > ucsc_ensembl.tsv

Running the Pre-Processor

The Primate AI pre-processor can be run as follows:

dotnet PrimateAiPreProcessor.dll UGA_develop.tsv PrimateAI_scores_v0.2.tsv.gz \
ucsc_locuslink.tsv ucsc_ensembl.tsv PrimateAI_0.2_GRCh37.tsv.gz

During conversion, 0.5% of the UCSC Ids cannot be converted to either Entrez or Ensembl gene IDs. Once the gene IDs have been acquired, we check to see which are available in Nirvana.

The following Entrez Gene IDs were not found:

399753
401980
504189
504191
100293534

Here is the output from the pre-processor:

- loading UCSC to Entrez Gene ID dictionary... 73,432 genes loaded.
- loading UCSC to Ensembl Gene ID dictionary... 76,178 genes loaded.
- loading UGA gene ID to gene dictionary... 103,277 genes loaded.
- parsing Primate AI variants... 70,121,953 variants parsed.

# variants with unknown gene ID: 27,253 / 70,121,953
# genes with unknown gene ID: 109 / 19,614

# variants not in UGA: 2,036 / 70,121,953
# genes not in UGA: 6 / 19,614

Known Issues

Known Issues

The Primate AI data set provides raw scores, but the scores are biased according to gene context. I.e. a 0.4 means something different in TP53 than it does in KRAS.

As a result, the Primate AI team provided guidance on aggregating these scores and presenting them as percentiles with respect to the associated gene. According to their research, the 25th percentile is a good proxy for benign variants and the 75th percentile is a good proxy for pathogenic variants.

Download URL

https://basespace.illumina.com/s/cPgCSmecvhb4

JSON Output

"primateAI":[
{
"hgnc":"TP53",
"scorePercentile":0.3,
}
]
FieldTypeNotes
hgncstring
scorePercentilefloatrange: 0 - 1.0
- - - - \ No newline at end of file diff --git a/3.16/data-sources/revel-json/index.html b/3.16/data-sources/revel-json/index.html deleted file mode 100644 index 98fa4e03..00000000 --- a/3.16/data-sources/revel-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -revel-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

revel-json

"revel":{ 
"score":0.027
}
FieldTypeNotes
scorefloatRange: 0 - 1.0
- - - - \ No newline at end of file diff --git a/3.16/data-sources/revel/index.html b/3.16/data-sources/revel/index.html deleted file mode 100644 index c0aa9d51..00000000 --- a/3.16/data-sources/revel/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -REVEL | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

REVEL

Overview

REVEL is an ensemble method for predicting the pathogenicity of missense variants based on a combination of scores from 13 individual tools: MutPred, FATHMM v2.3, VEST 3.0, PolyPhen-2, SIFT, PROVEAN, MutationAssessor, MutationTaster, LRT, GERP++, SiPhy, phyloP, and phastCons.

Publication

Ioannidis, N. M. et al. REVEL: An Ensemble Method for Predicting the Pathogenicity of Rare Missense Variants. The American Journal of Human Genetics 99, 877-885 (2016). https://doi.org/10.1016/j.ajhg.2016.08.016

CSV File

Example

chr,hg19_pos,grch38_pos,ref,alt,aaref,aaalt,REVEL
1,35142,35142,G,A,T,M,0.027
1,35142,35142,G,C,T,R,0.035
1,35142,35142,G,T,T,K,0.043
1,35143,35143,T,A,T,S,0.018
1,35143,35143,T,C,T,A,0.034

Parsing

From the CSV file, we're mainly interested in the following columns:

  • chr
  • hg19_pos
  • grch38_pos
  • ref
  • alt
  • REVEL

Known Issues

Sorting

Since the input file contains positions for both GRCh37 and GRCh38, we split it into two TSV files (for the sake of better readability) with identical format. The positions for GRCh37 were sorted but not for GRCh38. So we re-sort the variants by position in the GRCh38 file.

Conflicting Scores

When there are multiple scores available for the same variant (i.e. the same position with the same alternative allele), we pick the highest score.

Download URL

https://sites.google.com/site/revelgenomics/downloads

JSON Output

"revel":{ 
"score":0.027
}
FieldTypeNotes
scorefloatRange: 0 - 1.0
- - - - \ No newline at end of file diff --git a/3.16/data-sources/splice-ai-json/index.html b/3.16/data-sources/splice-ai-json/index.html deleted file mode 100644 index 1bf41881..00000000 --- a/3.16/data-sources/splice-ai-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -splice-ai-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

splice-ai-json

"spliceAI":[ 
{
"hgnc":"BLCAP",
"acceptorGainDistance":-3,
"acceptorGainScore":0.3,
"donorLossDistance":7,
"donorLossScore":0.9
},
{
"hgnc":"NNAT",
"acceptorGainDistance":-1,
"acceptorGainScore":0.2,
"donorGainDistance":-2,
"donorGainScore":0.3
}
]
FieldTypeNotes
hgncstringHGNC gene symbol
acceptorGainDistanceint± bp from current position
acceptorGainScorefloatrange: 0 - 1.0. 1 decimal place
acceptorLossDistanceint± bp from current position
acceptorLossScorefloatrange: 0 - 1.0. 1 decimal place
donorGainDistanceint± bp from current position
donorGainScorefloatrange: 0 - 1.0. 1 decimal place
donorLossDistanceint± bp from current position
donorLossScorefloatrange: 0 - 1.0. 1 decimal place
- - - - \ No newline at end of file diff --git a/3.16/data-sources/splice-ai/index.html b/3.16/data-sources/splice-ai/index.html deleted file mode 100644 index 9f48ca81..00000000 --- a/3.16/data-sources/splice-ai/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Splice AI | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

Splice AI

Overview

SpliceAI, a 32-layer deep neural network, predicts splicing from a pre-mRNA sequence.

Publication

K. Jaganathan, et al. Predicting splicing from primary sequence with deep learning. Cell, 176 (3) (2019), pp. 535-548 e24

VCF File

Example

##fileformat=VCFv4.0
##assembly=GRCh37/hg19
##INFO=<ID=SYMBOL,Number=1,Type=String,Description="HGNC gene symbol">
##INFO=<ID=STRAND,Number=1,Type=String,Description="+ or - depending on whether the gene lies in the positive or negative strand">
##INFO=<ID=TYPE,Number=1,Type=String,Description="E or I depending on whether the variant position is exonic or intronic (GENCODE V24lift37 canonical annotation)">
##INFO=<ID=DIST,Number=1,Type=Integer,Description="Distance between the variant position and the closest splice site (GENCODE V24lift37 canonical annotation)">
##INFO=<ID=DS_AG,Number=1,Type=Float,Description="Delta score (acceptor gain)">
##INFO=<ID=DS_AL,Number=1,Type=Float,Description="Delta score (acceptor loss)">
##INFO=<ID=DS_DG,Number=1,Type=Float,Description="Delta score (donor gain)">
##INFO=<ID=DS_DL,Number=1,Type=Float,Description="Delta score (donor loss)">
##INFO=<ID=DP_AG,Number=1,Type=Integer,Description="Delta position (acceptor gain) relative to the variant position">
##INFO=<ID=DP_AL,Number=1,Type=Integer,Description="Delta position (acceptor loss) relative to the variant position">
##INFO=<ID=DP_DG,Number=1,Type=Integer,Description="Delta position (donor gain) relative to the variant position">
##INFO=<ID=DP_DL,Number=1,Type=Integer,Description="Delta position (donor loss) relative to the variant position">
#CHROM POS ID REF ALT QUAL FILTER INFO
10 92946 . C T . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0000;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=-26;DP_AL=-10;DP_DG=3;DP_DL=35
10 92946 . C G . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0008;DS_AL=0.0000;DS_DG=0.0003;DS_DL=0.0000;DP_AG=34;DP_AL=-27;DP_DG=35;DP_DL=1
10 92946 . C A . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0004;DS_AL=0.0000;DS_DG=0.0001;DS_DL=0.0000;DP_AG=-10;DP_AL=-48;DP_DG=35;DP_DL=-21
10 92947 . A C . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0002;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=-49;DP_AL=-11;DP_DG=0;DP_DL=34
10 92947 . A T . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0002;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=33;DP_AL=-11;DP_DG=-22;DP_DL=34
10 92947 . A G . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0006;DS_AL=0.0000;DS_DG=0.0001;DS_DL=0.0000;DP_AG=33;DP_AL=-11;DP_DG=34;DP_DL=32

Parsing

From the VCF file, we're mainly interested in the following columns:

  • DS_AG - Δ score (acceptor gain)
  • DS_AL - Δ score (acceptor loss)
  • DS_DG - Δ score (donor gain)
  • DS_DL - Δ score (donor loss)
  • DP_AG - Δ position (acceptor gain) relative to the variant position
  • DP_AL - Δ position (acceptor loss) relative to the variant position
  • DP_DG - Δ position (donor gain) relative to the variant position
  • DP_DL - Δ position (donor loss) relative to the variant position

The Splice AI team suggests the following interpretation for the scores:

RangeConfidencePathogenicity
0 ≤ x < 0.1lowlikely benign
0.1 ≤ x ≤ 0.5mediumlikely pathogenic
x > 0.5highpathogenic

Pre-processing

Filtering

Splice AI provides a comprehensive list of entries throughout the genome. However, many of the entries have little value. I.e. observing low splice scores in intergenic regions. Not only do these extra entries require more storage, but the unused content has a negative impact on annotation speed.

As a result, Nirvana filters out all the values in the low confidence tier except for regions within 15 bp of nascent splice sites. For those regions, we found it useful to see if Splice AI predicted an interruption of the splicing mechanism.

Download URL

https://basespace.illumina.com/s/5u6ThOblecrh

JSON Output

"spliceAI":[ 
{
"hgnc":"BLCAP",
"acceptorGainDistance":-3,
"acceptorGainScore":0.3,
"donorLossDistance":7,
"donorLossScore":0.9
},
{
"hgnc":"NNAT",
"acceptorGainDistance":-1,
"acceptorGainScore":0.2,
"donorGainDistance":-2,
"donorGainScore":0.3
}
]
FieldTypeNotes
hgncstringHGNC gene symbol
acceptorGainDistanceint± bp from current position
acceptorGainScorefloatrange: 0 - 1.0. 1 decimal place
acceptorLossDistanceint± bp from current position
acceptorLossScorefloatrange: 0 - 1.0. 1 decimal place
donorGainDistanceint± bp from current position
donorGainScorefloatrange: 0 - 1.0. 1 decimal place
donorLossDistanceint± bp from current position
donorLossScorefloatrange: 0 - 1.0. 1 decimal place
- - - - \ No newline at end of file diff --git a/3.16/data-sources/topmed-json/index.html b/3.16/data-sources/topmed-json/index.html deleted file mode 100644 index f0218ba8..00000000 --- a/3.16/data-sources/topmed-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -topmed-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

topmed-json

"topmed":{ 
"allAc":20,
"allAn":125568,
"allAf":0.000159,
"allHc":0,
"failedFilter":true
}
FieldTypeNotes
allAcintTOPMed allele count
allAnintTOPMed allele number. Non-zero integer.
allAffloatTOPMed allele frequency (computed by Nirvana)
allHcintTOPMed homozygous count
failedFilterboolTrue if this variant failed any filters
- - - - \ No newline at end of file diff --git a/3.16/data-sources/topmed/index.html b/3.16/data-sources/topmed/index.html deleted file mode 100644 index ceb21587..00000000 --- a/3.16/data-sources/topmed/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -TOPMed | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

TOPMed

Overview

The Trans-Omics for Precision Medicine (TOPMed) program, sponsored by the National Institutes of Health (NIH) National Heart, Lung and Blood Institute (NHLBI), is part of a broader Precision Medicine Initiative, which aims to provide disease treatments tailored to an individual’s unique genes and environment. TOPMed contributes to this Initiative through the integration of whole-genome sequencing (WGS) and other omics (e.g., metabolic profiles, epigenomics, protein and RNA expression patterns) data with molecular, behavioral, imaging, environmental, and clinical data.

Publication

Kowalski, M.H., Qian, H., Hou, Z., Rosen, J.D., Tapia, A.L., Shan, Y., Jain, D., Argos, M., Arnett, D.K., Avery, C. and Barnes, K.C., 2019. Use of> 100,000 NHLBI Trans-Omics for Precision Medicine (TOPMed) Consortium whole genome sequences improves imputation quality and detection of rare variant associations in admixed African and Hispanic/Latino populations. PLoS genetics, 15(12), p.e1008500.

VCF extraction

We currently extract the following fields from TOPMed VCF file:

##INFO=<ID=AN,Number=1,Type=Integer,Description="Number of Alleles in Samples with Coverage">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Alternate Allele Counts in Samples with Coverage">
##INFO=<ID=AF,Number=A,Type=Float,Description="Alternate Allele Frequencies">
##INFO=<ID=Het,Number=A,Type=Integer,Description="Number of samples with heterozygous genotype calls">
##INFO=<ID=Hom,Number=A,Type=Integer,Description="Number of samples with homozygous alternate genotype calls">

Example:

chr1    10132   TOPMed_freeze_5?chr1:10,132     T       C       255     SVM     VRT=1;NS=62784;AN=125568;AC=32;AF=0.000254842;Het=32;Hom=0      NA:FRQ  125568:0.000254842

GRCh37 liftover

The data is not available for GRCh37 on TOPMed website. We performed a liftover from GRCh38 to GRCh37 using dbSNP ids.

Download URL

https://bravo.sph.umich.edu/freeze5/hg38/download

JSON output

"topmed":{ 
"allAc":20,
"allAn":125568,
"allAf":0.000159,
"allHc":0,
"failedFilter":true
}
FieldTypeNotes
allAcintTOPMed allele count
allAnintTOPMed allele number. Non-zero integer.
allAffloatTOPMed allele frequency (computed by Nirvana)
allHcintTOPMed homozygous count
failedFilterboolTrue if this variant failed any filters
- - - - \ No newline at end of file diff --git a/3.16/file-formats/custom-annotations/index.html b/3.16/file-formats/custom-annotations/index.html deleted file mode 100644 index 9e36c4ed..00000000 --- a/3.16/file-formats/custom-annotations/index.html +++ /dev/null @@ -1,42 +0,0 @@ - - - - - - - -Custom Annotations | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

Custom Annotations

Overview

While the team tries to keep data sources up-to-date, you might want to start incorporate new annotations ahead of our update cycle. Another -common use case involves protected health information (PHI). Custom annotations are a mechanism that enables both use cases.

Here are some examples of how our collaborators use custom annotations:

  • associating context from both a patient-level and a patient cohort level with the variant annotations
  • adding content that is licensed (e.g. HGMD) to the variant annotations

At the moment, we have two different custom annotation file formats. One provides additional annotations to variants (both small variants and SVs) -while the other caters to gene annotations.

In both cases, the custom annotation file format is a tab-delimited file that is separated into two parts: the header & the data.

The header is where you can customize how you want the data to appear in the JSON file and provide context about the genome assembly and how -Nirvana should match the variants.

At Illumina, there are usually many components downstream of Nirvana that have to parse our annotations. If a customer provides a custom -annotation, those downstream tools need to understand more about the data such as:

  • data type (e.g. number, boolean, or a string)
  • data category (e.g. is this an allele count, allele number, allele frequency, etc.)
  • associated population (i.e. if this is an allele frequency)

For each custom annotation, Nirvana uses this context to create a JSON schema that can be sent to downstream tools. If -a tool knows that this is an allele frequency, it can validate user input to ensure that it's in the range of [0, 1].

Variant File Format

Basic Allele Frequency Example

Create the Custom Annotation TSV

Imagine that you want to create a basic allele frequency custom annotation for small variants. If we visualized the tab-delimited file -(TSV), it would look something like this:

Col 1Col 2Col 3Col 4Col 5
#title=MyDataSource
#assembly=GRCh38
#matchVariantsBy=allele
#CHROMPOSREFALTallAf
#categories...AlleleFrequency
#descriptions...ALL
#type...number
chr1623603511TGAT0.000006579
chr1668801894GA0.000006569
chr1911107436GA0.00003291

Here's the full TSV file.

Let's go over the header and discuss the contents:

  • title indicates the name of the JSON key
  • assembly indicates that this data is only valid for GRCh38
  • matchVariantsBy indicates that we should only match the annotations if they are allele-specific
  • categories provides hints to downstream tools on how they might want to treat the data. In this case, we indicate that it's an allele -frequency.
  • descriptions are used in special circumstances to provide more context. Even though column 5 is called allAf, it might not be clear to a -downstream tool that this means a global allele frequency using all sub-populations. In this case, ALL indicates the intended population.
  • type indicates to downstream tools the data type. Since allele frequencies are numbers, we'll write number in this column.
Reference Base Checking

Nirvana validates all the reference bases in a custom annotation. If a variant or genomic region is specified that has the wrong reference base, an error will be produced.

Sorting

The variants within each chromosome must be sorted by genomic position.

Convert to Nirvana Format

First we need to convert the TSV file to Nirvana's native file format and let's put that file in a new directory called CA:

$ mkdir CA
$ dotnet bin/Release/netcoreapp2.1/SAUtils.dll customvar \
-r Data/References/Homo_sapiens.GRCh38.Nirvana.dat -i MyDataSource.tsv -o CA
---------------------------------------------------------------------------
SAUtils (c) 2020 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0
---------------------------------------------------------------------------

Chromosome 16 completed in 00:00:00.1
Chromosome 19 completed in 00:00:00.0

Time: 00:00:00.2

Annotate with Nirvana

Let's annotate the following VCF (notice that it's one of the variants that we have in our custom annotation):

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
16 68801894 . G A . . .

Here's the full VCF file.

Since Nirvana can handle multiple directories with external annotations, all we need to do is specify our new CA directory in addition to -the normal Nirvana command-line.

$ dotnet bin/Release/netcoreapp2.1/Nirvana.dll -c Data/Cache/GRCh38/Both \
-r Data/References/Homo_sapiens.GRCh38.Nirvana.dat \
--sd Data/SupplementaryAnnotation/GRCh38 --sd CA -i TestCA.vcf -o TestCA
---------------------------------------------------------------------------
Nirvana (c) 2020 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0
---------------------------------------------------------------------------

Initialization Time Positions/s
---------------------------------------------------------------------------
Cache 00:00:01.8
SA Position Scan 00:00:00.0 19

Reference Preload Annotation Variants/s
---------------------------------------------------------------------------
chr16 00:00:00.2 00:00:01.3 1

Summary Time Percent
---------------------------------------------------------------------------
Initialization 00:00:01.9 25.5 %
Preload 00:00:00.2 3.3 %
Annotation 00:00:01.3 18.2 %

Time: 00:00:06.3

Investigate the Results

We would expect the following data to show up in our JSON output file:

      "variants": [
{
"vid": "16-68801894-G-A",
"chromosome": "16",
"begin": 68801894,
"end": 68801894,
"refAllele": "G",
"altAllele": "A",
"variantType": "SNV",
"hgvsg": "NC_000016.10:g.68801894G>A",
"phylopScore": 1,
"MyDataSource": {
"refAllele": "G",
"altAllele": "A",
"allAf": 7e-06
},
"clinvar": [

Here's the full JSON file.

Nirvana preserves up to 6 decimal places for allele frequency data.

Categories & Descriptions Example

Create the Custom Annotation TSV

Building on the previous example, we can add other types of annotations like predictions and general notes.

Col 1Col 2Col 3Col 4Col 5Col 6Col 7
#title=MyDataSource
#assembly=GRCh38
#matchVariantsBy=allele
#CHROMPOSREFALTallAfpathogenicitynotes
#categories...AlleleFrequencyPrediction.
#descriptions...ALL..
#type...numberstringstring
chr1623603511TGAT0.000006579P.
chr1668801894GA0.000006569LPSeen in case 123
chr1911107436GA0.00003291..

Here's the full TSV file.

Placeholders

You can use a period to denote an empty value (much in the same way as periods are used in VCF files to signify missing values). While -Nirvana also accepts empty columns in the TSV file, we use them in these examples to promote readability.

Let's go over what's new in this example:

  • Column 6 adds a field called pathogenicity which uses the Prediction category. When using this category, Nirvana will -validate to make -sure that the field contains either the abbreviations (B, LB, VUS, LP, and P) or the long-form equivalents (e.g. benign or pathogenic).
  • Column 7 adds a field called notes and it doesn't have a category or description. We're just going to use it to add some internal -notes.

Annotate with Nirvana

Let's use a new VCF file. It includes all the same positions as our custom annotation file, but only the middle variant also matches the -alternate allele (allele-specific match):

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
16 23603511 . TG T . . .
16 68801894 . G A . . .
19 11107436 . G C . . .

Here's the full VCF file.

Investigate the Results

Because we specified #matchVariantsBy=allele in our custom annotation file, only the middle variant will get an annotation:

      "variants": [
{
"vid": "16-68801894-G-A",
"chromosome": "16",
"begin": 68801894,
"end": 68801894,
"refAllele": "G",
"altAllele": "A",
"variantType": "SNV",
"hgvsg": "NC_000016.10:g.68801894G>A",
"phylopScore": 1,
"MyDataSource": {
"refAllele": "G",
"altAllele": "A",
"allAf": 7e-06,
"pathogenicity": "LP",
"notes": "Seen in case 123"
},
"clinvar": [

Here's the full JSON file.

Using Positional Matches

What would happen if we changed to #matchVariantsBy=position? Two things will happen. First, our positional variants will now match:

      "variants": [
{
"vid": "16-23603511-TG-T",
"chromosome": "16",
"begin": 23603512,
"end": 23603512,
"refAllele": "G",
"altAllele": "-",
"variantType": "deletion",
"hgvsg": "NC_000016.10:g.23603512delG",
"MyDataSource": [
{
"refAllele": "GA",
"altAllele": "-",
"allAf": 7e-06,
"pathogenicity": "P"
}
],
"clinvar": [

In addition, you will now see an extra flag for our allele-specific variant:

      "variants": [
{
"vid": "16-68801894-G-A",
"chromosome": "16",
"begin": 68801894,
"end": 68801894,
"refAllele": "G",
"altAllele": "A",
"variantType": "SNV",
"hgvsg": "NC_000016.10:g.68801894G>A",
"phylopScore": 1,
"MyDataSource": [
{
"refAllele": "G",
"altAllele": "A",
"allAf": 7e-06,
"pathogenicity": "LP",
"notes": "Seen in case 123",
"isAlleleSpecific": true
}
],
"clinvar": [

Genomic Region Example

Create the Custom Annotation TSV

In the previous example, we added a note for the middle variant, but sometimes it's handy to annotate a genomic region. Consider the following example:

Col 1Col 2Col 3Col 4Col 5
#title=MyDataSource
#assembly=GRCh38
#matchVariantsBy=allele
#CHROMPOSREFENDnotes
#categories....
#descriptions....
#type...string
chr1620000000T70000000Lots of false positives in this region

Here's the full TSV file.

Let's go over what's new in this example:

  • Column 5 now has a field called notes. In essence, it looks exactly like column 7 from our previous example.
  • The main difference is that now one of our custom annotation entries is actually a genomic region. Any variant that overlaps with that region will get a custom annotation.

In the previous example we learned about positional matching vs allele-specific matching. For genomic regions, #matchVariantsBy=allele and #matchVariantsBy=position produce -the same result.

Annotate with Nirvana

Let's use the same VCF file as our previous example.

Investigate the Results

    {
"chromosome": "16",
"position": 23603511,
"refAllele": "TG",
"altAlleles": [
"T"
],
"cytogeneticBand": "16p12.2",
"MyDataSource": [
{
"start": 20000000,
"end": 70000000,
"notes": "Lots of false positives in this region",
"reciprocalOverlap": 0,
"annotationOverlap": 0
}
],
"variants": [

Here's the full JSON file.

Reciprocal & Annotation Overlap

For all intervals, Nirvana internally calculates two overlaps: a variant overlap and an annotation overlap. Variant overlap is the percentage of the variant's length that is -overlapped. Annotation overlap is the percentage of the annotation's length that is overlap.

Reciprocal overlap is the minimum of those two overlaps. Given that the annotation is 50 Mbp and the deletion is one 1 bp, both overlaps will be pretty close to 0.

We will also see this annotation for the other variant on chr16:

    {
"chromosome": "16",
"position": 68801894,
"refAllele": "G",
"altAlleles": [
"A"
],
"cytogeneticBand": "16q22.1",
"MyDataSource": [
{
"start": 20000000,
"end": 70000000,
"notes": "Lots of false positives in this region",
"reciprocalOverlap": 0,
"annotationOverlap": 0
}
],
"variants": [
Targeting Structural Variants

Often we use genomic regions to represent other known CNVs and SVs in the genome. In this use case, we usually don't want to match these regions to other small variants. To -force Nirvana to match regions only to other SVs, use the #matchVariantsBy=sv option in the header.

Mixing Small Variants and Genomic Regions

Create the Custom Annotation TSV

Previously we looked at examples that either had small variants or genomic regions. Let's create a file that contains both:

Col 1Col 2Col 3Col 4Col 5Col 6
#title=MyDataSource
#assembly=GRCh38
#matchVariantsBy=allele
#CHROMPOSREFALTENDnotes
#categories.....
#descriptions.....
#type....string
chr1623603511TGAT..
chr1668801894GA..
chr1911107436GA..
chr2110510818C.10699435Interval #1
chr2110510818C<DEL>10699435Interval #2
chr2212370388TT[chr22:12370729[.Known false-positive

Here's the full TSV file.

Let's go over what's new in this example:

  • Column 4 now has the REF field. Exception for the case listed below, this is only used by small variants or translocation breakends.
  • Column 5 now has the END field. This is only used by genomic regions.
  • There are two custom annotations on chr21 and the start and end coordinates look the same, so what's different? Interval #2 has a symbolic allele in the ALT column. When this is used in custom annotation, the start position is treated as the padding base (using VCF conventions). When Nirvana matches a variant to interval #2, it will ignore the padding base and consider the start position to be at position 10510819.

Annotate with Nirvana

Let's use a new VCF file to study how matching works for intervals #1 and #2:

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
21 10510818 . C <DUP> . . END=10699435;SVTYPE=DUP
22 12370388 . T T[chr22:12370729[ . . SVTYPE=BND

Here's the full VCF file.

The first variant is similar to the custom annotation labelled "interval #2". Position 10510818 is the padding base, so it effectively starts at position 10510819.

Investigate the Results

  "positions": [
{
"chromosome": "21",
"position": 10510818,
"svEnd": 10699435,
"refAllele": "C",
"altAlleles": [
"<DUP>"
],
"cytogeneticBand": "21p11.2",
"MyDataSource": [
{
"start": 10510818,
"end": 10699435,
"notes": "Interval #1",
"reciprocalOverlap": 0.99999,
"annotationOverlap": 0.99999
},
{
"start": 10510819,
"end": 10699435,
"notes": "Interval #2",
"reciprocalOverlap": 1,
"annotationOverlap": 1
}
],

Here's the full JSON file.

As expected, the variant and interval #2 have matching endpoints, therefore there is 100% overlap. Interval #1 technically starts 1 bp earlier, so its overlap 99.9%.

Further down the JSON file, we find the annotated translocation breakend:

      "variants": [
{
"vid": "22-12370388-T-T[chr22:12370729[",
"chromosome": "22",
"begin": 12370388,
"end": 12370388,
"isStructuralVariant": true,
"refAllele": "T",
"altAllele": "T[chr22:12370729[",
"variantType": "translocation_breakend",
"MyDataSource": {
"refAllele": "T",
"altAllele": "T[chr22:12370729[",
"notes": "Known false-positive"
}
}

Gene File Format

Basic Gene Example

Create the Custom Annotation TSV

Previously we looked at examples that either had small variants or genomic regions, however, sometimes we would like to add custom gene annotations. The gene custom annotation file format -looks slightly different:

Col 1Col 2Col 3Col 4
#title=MyDataSource
#geneSymbolgeneIdphenotypenotes
#categories...
#descriptions...
#type.stringstring
TP537157Colorectal cancer, hereditary nonpolyposis, type 5.
KRASENSG00000133703Mismatch repair cancer syndromeSeen in cohort 123

Here's the full TSV file.

Let's go over what's in this example:

  • Column 2 has the geneId field. This can be either an Entrez Gene ID or an Ensembl ID.
Gene Symbols

Gene symbols are always in flux and are being updated on a daily basis at the NCBI and at HGNC. Due to this, Nirvana uses the geneId to match genes rather than the gene symbol. However, to -make the custom annotation files easier to read, we've included the geneSymbol column as well.

Unknown Gene IDs

When Nirvana parses the gene custom annotation file, it will note any gene IDs that are currently not recognized in Nirvana. In such a case, Nirvana will display an error showing all the -unrecognized gene IDs.

Annotate with Nirvana

Let's use a VCF file that contain variants in TP53 and KRAS:

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
12 25227255 . A T . . .
17 7675074 . C A . . .

Here's the full VCF file.

Investigate the Results

  "genes": [
{
"name": "KRAS",
"clingenGeneValidity": [
{
"diseaseId": "MONDO_0009026",
"disease": "Costello syndrome",
"classification": "disputed",
"classificationDate": "2018-07-24"
}
],
"clingenDosageSensitivityMap": {
"haploinsufficiency": "no evidence to suggest that dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "no evidence to suggest that dosage sensitivity is associated with clinical phenotype"
},
"gnomAD": {
"pLi": 0.000788,
"pRec": 0.789,
"pNull": 0.21,
"synZ": 0.336,
"misZ": 2.32,
"loeuf": 1.24
},
"MyDataSource": {
"phenotype": "Mismatch repair cancer syndrome",
"notes": "Seen in cohort 123"
}
},

This is the abbreviated output for KRAS. Here's the full JSON file if you want to see the complete KRAS entry.

Customizing the Header

Title

For the title, you can provide any string that hasn't already been used. The title should be unique.

caution

Make sure that the title does not conflict with other keys in the JSON file.

For small variants, you can't provide a title that conflicts with other keys in the variant object. Some examples of this would be -vid, chromosome, transcripts, etc.. The title should also not conflict with other data source keys like clinvar or gnomad.

For structural variants, you can't provide a title that conflicts with other keys in the position object. Some examples of this would be -chromosome, svLength, cytogeneticBand, etc. The title should also not conflict with other data source keys like clingen or dgv.

caution

Care should be taken not to annotate using multiple custom annotations that all use the same title.

Genome Assemblies

The following genome assemblies can be specified:

  • GRCh37
  • GRCh38

Matching Criteria

The matching criteria instructs how Nirvana should match a VCF variant to the custom annotation.

The following matching criteria can be specified:

  • allele - use this when you only want allele-specific matches. This is commonly the case when using allele frequency data sources like gnomAD
  • position - use this when you want positional matches. This is commonly used with disease phenotype data sources like ClinVar
  • sv - use this when you want to match to all other overlapping SVs. This use case arose when we were adding custom annotations for baseline -copy number intervals along the genome.

Categories

Categories are not used by Nirvana, but are often used by downstream tools. Categories provide hints for how those tools should filter or display -the annotation data.

When a category is specified, Nirvana will provide additional validation for those fields. The following table describes each category:

CategoryDescriptionValidation
AlleleCountallele counts for a specific populationSee the supported populations below
AlleleNumberallele numbers for a specific populationSee the supported populations below
AlleleFrequencyallele frequencies for a specific populationSee the supported populations below
PredictionACMG-style pathogenicity classificationsbenign (B)
likely benign (LB)
VUS
likely pathogenic (LP)
pathogenic (P)
Filterfree text that signals downstream tools to add the column to the filterMax 20 characters
Descriptionfree-text descriptionMax 100 characters
Identifierany IDMax 50 characters
HomozygousCountcount of homozygous individuals for a specific populationSee the supported populations below
Scoreany score valueAny double-precision floating point number

Descriptions

Descriptions are used to add more context to the categories. For now, descriptions are mainly used to associate allele counts, numbers, and frequencies with their respective populations.

Populations

The following populations were specified in the HapMap project, 1000 Genomes Project, ExAC, and gnomAD.

Population CodeSuper-population CodeDescription
ACBAFRAfrican Caribbeans in Barbados
AFRAFRAfrican
ALLALLAll populations
AMRAMRAd Mixed American
ASJAshkenazi Jewish
ASWAFRAmericans of African Ancestry in SW USA
BEBSASBengali from Bangladesh
CDXEASChinese Dai in Xishuangbanna, China
CEUEURUtah Residents (CEPH) with Northern and Western European Ancestry
CHBEASHan Chinese in Beijing, China
CHSEASSouthern Han Chinese
CLMAMRColombians from Medellin, Colombia
EASEASEast Asian
ESNAFREsan in Nigeria
EUREUREuropean
FINEURFinnish in Finland
GBREURBritish in England and Scotland
GIHSASGujarati Indian from Houston, Texas
GWDAFRGambian in Western Divisions in the Gambia
IBSEURIberian population in Spain
ITUSASIndian Telugu from the UK
JPTEASJapanese in Tokyo, Japan
KHVEASKinh in Ho Chi Minh City, Vietnam
LWKAFRLuhya in Webuye, Kenya
MAGAFRMandinka in the Gambia
MKKAFRMaasai in Kinyawa, Kenya
MSLAFRMende in Sierra Leone
MXLAMRMexican Ancestry from Los Angeles, USA
NFEEUREuropean (Non-Finnish)
OTHOTHOther
PELAMRPeruvians from Lima, Peru
PJLSASPunjabi from Lahore, Pakistan
PURAMRPuerto Ricans from Puerto Rico
SASSASSouth Asian
STUSASSri Lankan Tamil from the UK
TSIEURToscani in Italia
YRIAFRYoruba in Ibadan, Nigeria

Data Types

Each custom annotation can be one of the following data types:

  • bool - true or false
  • number - any integer or floating-point number
  • string - text
tip

For boolean variables, only keys with a true value will be output to the JSON object.

Using SAUtils

Nirvana includes a tool called SAUtils that converts various data sources into Nirvana's native binary format. The sub-commands customvar and customgene are used to specify a variant file or a gene file respectively.

Convert Variant File

dotnet bin/Release/netcoreapp2.1/SAUtils.dll customvar \
-r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \
-i MyDataSource.tsv \
-o SupplementaryAnnotation
  • the -r argument specifies the compressed reference path
  • the -i argument specifies the input TSV path
  • the -o argument specifies the output directory

Convert Gene File

dotnet bin/Release/netcoreapp2.1/SAUtils.dll customgene \
--uga Nirvana_UGA.tsv \
-i MyDataSource.tsv \
-o SupplementaryAnnotation
  • the --uga argument specifies the Nirvana universal gene archive (UGA) path
  • the -i argument specifies the input TSV path
  • the -o argument specifies the output directory
- - - - \ No newline at end of file diff --git a/3.16/file-formats/nirvana-json-file-format/index.html b/3.16/file-formats/nirvana-json-file-format/index.html deleted file mode 100644 index a6da1632..00000000 --- a/3.16/file-formats/nirvana-json-file-format/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Nirvana JSON File Format | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

Nirvana JSON File Format

Overview

Conventions

In the Nirvana JSON representation, we try to maximize the amount of useful information that is relayed in the output file. As such, we have several conventions that are useful to know about:

  • With boolean key/value pairs, we only output the keys that have a true value. I.e. there's no reason to display "isStructuralVariant":false a few million times when annotating a small variant VCF.
  • When transferring data from the VCF file to the JSON (e.g. for allele depths (AD)), it is common to use a period (.) as a placeholder for missing data in the VCF file. Nirvana treats periods like empty or null strings and therefore will not output those entries.

JSON Layout

info

In general, each position corresponds to a row in the original VCF file.

For each gene that was referenced in the transcripts found in the positions section, there will be additional gene-level annotation in the gene section.

Parsing

info

We've put together a new section that discusses how to parse our JSON files easily using examples in a Python Jupyter notebook and a R version as well. In addition, we have information about how to quickly dump content from our JSON file using a tabix-like utility called JASIX.

{
"header":{
"annotator":"Nirvana 3.0.0-alpha.5+g6c52e247",
"creationTime":"2017-06-14 15:53:13",
"genomeAssembly":"GRCh37",
"dataSources":[
{
"name":"OMIM",
"version":"unknown",
"description":"An Online Catalog of Human Genes and Genetic Disorders",
"releaseDate":"2017-05-03"
},
{
"name":"VEP",
"version":"84",
"description":"BothRefSeqAndEnsembl",
"releaseDate":"2017-01-16"
},
{
"name":"ClinVar",
"version":"20170503",
"description":"A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence",
"releaseDate":"2017-05-03"
},
{
"name":"phyloP",
"version":"hg19",
"description":"46 way conservation score between humans and 45 other vertebrates",
"releaseDate":"2009-11-10"
}
],
"samples":[
"NA12878",
"NA12891",
"NA12892"
]
},
FieldTypeNotes
annotatorstringthe name of the annotator and the current version
creationTimestringyyyy-MM-dd hh:mm:ss
genomeAssemblystringsee possible values below
schemaVersionintegerincremented whenever the core structure of the JSON file introduces breaking changes
dataVersionstring
dataSourcesobject arraysee Data Source entry below
samplesstring arraythe order of these sample names will be used throughout the JSON file when enumerating samples

Data Source

FieldTypeNotes
namestring
versionstring
descriptionstringoptional description of the data source
releaseDatestringyyyy-MM-dd

Genome Assemblies

  • GRCh37
  • GRCh38
  • hg19
  • SARSCoV2

Positions

"positions":[
{
"chromosome":"chr2",
"position":48010488,
"repeatUnit":"GGCCCC",
"refRepeatCount":3,
"svEnd":48020488,
"refAllele":"G",
"altAlleles":[
"A",
"GT"
],
"quality":461,
"filters":[
"PASS"
],
"ciPos":[
-170,
170
],
"ciEnd":[
-175,
175
],
"svLength":1000,
"strandBias":1.23,
"jointSomaticNormalQuality":29,
"cytogeneticBand":"2p16.3",
FieldTypeVariant TypeNotes
chromosomestringallexactly as displayed in the vcf
positionintegerallexactly as displayed in the vcf (1-based notation). Range: 1 - 250 million
repeatUnitstringSTRprovided by ExpansionHunter
refRepeatCountintegerSTRprovided by ExpansionHunter
svEndintegerSV
refAllelestringallexactly as displayed in the vcf
altAllelestring arrayallexactly as displayed in the vcf
qualityfloatallexactly as displayed in the vcf (Normally an integer, but some variant callers using floating point. Has been observed as high as 500k)
filtersstring arrayallexactly as displayed in the vcf
ciPosinteger arraySV
ciEndinteger arraySV
svLengthintegerSV
strandBiasfloatsmall variantprovided by GATK (from SB)
jointSomaticNormalQualityintegerSVprovided by the Manta variant caller (SOMATICSCORE)
cytogeneticBandstringalle.g. 17p13.1

ClinGen

"clingen":[
{
"chromosome":"17",
"begin":525,
"end":14667519,
"variantType":"copy_number_gain",
"id":"nsv996083",
"clinicalInterpretation":"pathogenic",
"observedGains":1,
"validated":true,
"phenotypes":[
"Intrauterine growth retardation"
],
"phenotypeIds":[
"HP:0001511",
"MedGen:C1853481"
],
"reciprocalOverlap":0.00131
},
{
"chromosome":"17",
"begin":45835,
"end":7600330,
"variantType":"copy_number_loss",
"id":"nsv869419",
"clinicalInterpretation":"pathogenic",
"observedLosses":1,
"validated":true,
"phenotypes":[
"Developmental delay AND/OR other significant developmental or morphological phenotypes"
],
"reciprocalOverlap":0.00254
}
]
FieldTypeNotes
clingenobject array
chromosomestringEnsembl-style chromosome names
begininteger1-based position
endinteger1-based position
variantTypestringAny of the sequence alterations defined here.
idstringIdentifier from the data source. Alternatively a VID
clinicalInterpretationstringsee possible values below
observedGainsintegerRange: 0 - (231 - 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.
observedLossesintegerRange: 0 - (231 - 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.
validatedboolean
phenotypesstring arrayDescription of the phenotype.
phenotypeIdsstring arrayDescription of the phenotype IDs.
reciprocalOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).

clinicalInterpretation

  • benign
  • curated benign
  • curated pathogenic
  • likely benign
  • likely pathogenic
  • path gain
  • path loss
  • pathogenic
  • uncertain
"clingenDosageSensitivityMap": [{
"chromosome": "15",
"begin": 30900686,
"end": 32153204,
"haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",
"reciprocalOverlap": 0.00147,
"annotationOverlap": 0.33994
},
{
"chromosome": "15",
"begin": 31727418,
"end": 32153204,
"haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "dosage sensitivity unlikely",
"reciprocalOverlap": 0.00147,
"annotationOverlap": 1
}]
FieldTypeNotes
clingenDosageSensitivityMapobject array
chromosomestringEnsembl-style chromosome names
begininteger1-based position
endinteger1-based position
haploinsufficiencystringsee possible values below
triplosensitivitystring(same as haploinsufficiency) 
reciprocalOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).
annotationOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).

haploinsufficiency and triplosensitivity

  • no evidence to suggest that dosage sensitivity is associated with clinical phenotype
  • little evidence suggesting dosage sensitivity is associated with clinical phenotype
  • emerging evidence suggesting dosage sensitivity is associated with clinical phenotype
  • sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype
  • gene associated with autosomal recessive phenotype
  • dosage sensitivity unlikely

1000 Genomes (SV)

"oneKg":[
{
"chromosome":"1",
"begin":1595369,
"end":1612441,
"variantType": "copy_number_variation",
"id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",
"allAn": 5008,
"allAc": 2702,
"allAf": 0.539537,
"afrAf": 0.6052,
"amrAf": 0.3675,
"eurAf": 0.5357,
"easAf": 0.5368,
"sasAf": 0.5797,
"reciprocalOverlap": 0.07555
}
],
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring
idstring
allAnfloating pointallele number for all populations. Non-zero integer.
allAcfloating pointallele count for all populations. Integer.
allAffloating pointallele frequency for all populations. Range: 0 - 1.0
afrAffloating pointallele frequency for the African super population. Range: 0 - 1.0
amrAffloating pointallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
eurAffloating pointallele frequency for the European super population. Range: 0 - 1.0
easAfintegerallele frequency for the East Asian super population. Range: 0 - 1.0
sasAfintegerallele frequency for the South Asian super population. Range: 0 - 1.0
reciprocalOverlapfloating pointrange: 0 - 1.

MITOMAP (SV)

"mitomap":[ 
{
"chromosome":"MT",
"begin":3166,
"end":14152,
"variantType":"deletion",
"reciprocalOverlap":0.18068,
"annotationOverlap":0.42405
}
]
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring array
reciprocalOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
annotationOverlapfloatRange: 0 - 1. Specified up to 5 decimal places

Samples

"samples":[
{
"genotype":"0/1",
"variantFrequencies":[
0.333,
0.5
],
"totalDepth":57,
"genotypeQuality":12,
"copyNumber":3,
"repeatUnitCounts":[
10,
20
],
"alleleDepths":[
10,
20,
30
],
"failedFilter":true,
"splitReadCounts":[
10,
20
],
"pairedEndReadCounts":[
10,
20
],
"isDeNovo":true,
"diseaseAffectedStatuses":[
"-"
],
"artifactAdjustedQualityScore":89.3,
"likelihoodRatioQualityScore":78.2,
"heteroplasmyPercentile":[
23.13,
12.65
]
}
]
FieldTypeNotes
genotypestring
variantFrequenciesfloat arrayrange: 0 - 1.0. One value per alternate allele
totalDepthintegernon-negative integer values
genotypeQualityintegernon-negative integer values. Typically maxes out at 99
copyNumberintegernon-negative integer values
repeatUnitCountsinteger arrayExpansionHunter-specific
alleleDepthsinteger arraynon-negative integer values
failedFilterbool
splitReadCountsinteger arrayManta-specific
pairedEndReadCountsinteger arrayManta-specific
isDeNovobool
diseaseAffectedStatusesstring arrayExpansionHunter-specific
artifactAdjustedQualityScorefloatPEPE-specific. Range: 0 - 100.0
likelihoodRatioQualityScorefloatPEPE-specific. Range: 0 - 100.0
heteroplasmyPercentilefloatrange: 0 - 100. 2 decimal places. One value per alternate allele
Empty Samples

If a sample does not contain any entries, we will create a sample object that contains the isEmpty key. This ensures that sample ordering is preserved while indicating that a sample is intentionally empty.

"samples":[
{
"isEmpty":true
}
],

Variants

"variants":[
{
"vid":"2:48010488:A",
"chromosome":"chr2",
"begin":48010488,
"end":48010488,
"isReferenceMinorAllele":true,
"isStructuralVariant":true,
"refAllele":"G",
"altAllele":"A",
"variantType":"SNV",
"isDecomposedVariant":true,
"isRecomposedVariant":true,
"linkedVids":["2:48010488:GTA:ATC"],
"hgvsg":"NC_000002.11:g.48010488G>A",
"phylopScore":0.459
FieldTypeNotes
vidstringsee Variant Identifiers
chromosomestring
beginint1-based non-negative integer values. Range: 1 - 250 million
endint1-based non-negative integer values. Range: 1 - 250 million
isReferenceMinorAllelebooltrue when this is a reference minor allele
isStructuralVariantbooltrue when the variant is a structural variant
inLowComplexityRegionbooltrue when the variant lies in a low complexity region (gnomAD low complexity regions)
refAllelestringparsimonious representation of the reference allele
altAllelestringparsimonious representation of the alternate allele.
variantTypestringuses Sequence Ontology sequence alterations
isDecomposedVariantbooltrue when the decomposed variant has been used to create another recomposed variant
isRecomposedVariantbooltrue when the variant is recomposed from two or more decomposed variants
linkedVidsstring arraylist of VIDs for variants connecting decomposed and recomposed variants
hgvsgstringHGVS g. notation
phylopScorefloatphyloP conservation score. Range: -14.08 to 6.424
Reference Minor Alleles

Nirvana supports annotating reference minor alleles. In such a case, refAllele will be replaced by the global major allele and altAllele will be replaced with the original reference allele.

Flagging Decomposed & Recomposed Variants

When two or more decomposed variants are recomposed into an MNV, the decomposed variants will be marked with "isDecomposedVariant":true.

Similarly, the recomposed variant will be shown as a new VCF position. This recomposed variant will be flagged with "isRecomposedVariant":true.

Transcripts

"transcripts":[
{
"transcript":"ENST00000445503.1",
"source":"Ensembl",
"bioType":"nonsense_mediated_decay",
"codons":"gGg/gAg",
"aminoAcids":"G/E",
"cdnaPos":"268",
"cdsPos":"116",
"exons":"1/9",
"introns":"1/8",
"proteinPos":"39",
"geneId":"ENSG00000116062",
"hgnc":"MSH6",
"consequence":[
"missense_variant",
"NMD_transcript_variant"
],
"hgvsc":"ENST00000445503.1:c.116G>A",
"hgvsp":"ENSP00000405294.1:p.(Gly39Glu)",
"geneFusion":{
"exon":6,
"intron":5,
"fusions":[
{
"hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000437180.1}:c.58+568_1443",
"exon":3,
"intron":2
},
{
"hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000300305.3}:c.58+568_1443",
"exon":2,
"intron":1
}
]
},
"isCanonical":true,
"polyPhenScore":0.95,
"polyPhenPrediction":"probably damaging",
"proteinId":"ENSP00000405294.1",
"siftScore":0.61,
"siftPrediction":"tolerated",
"completeOverlap":true
}
]
FieldTypeNotes
transcriptstringtranscript ID. e.g. ENST00000445503.1
sourcestringRefSeq / Ensembl
bioTypestringdescriptions of the biotypes from Ensembl
codonsstring
aminoAcidsstring
cdnaPosstring
cdsPosstring
exonsstringexons affected by the variant
intronsstringintrons affected by the variant
proteinPosstring
geneIdstringgene ID. e.g. ENSG00000116062
hgncstringgene symbol. e.g. MSH6
consequencestring arraySequence Ontology Consequences
hgvscstringHGVS coding nomenclature
hgvspstringHGVS protein nomenclature
geneFusionobjectsee Gene Fusions entry below
isCanonicalbooltrue when this is a canonical transcript
polyPhenScorefloatrange: 0 - 1.0
polyPhenPredictionstringsee possible values below
proteinIdstringprotein ID. E.g. ENSP00000405294.1
siftScorefloatrange: 0 - 1.0
siftPredictionstringsee possible values below
completeOverlapbooltrue when this transcript is completely overlapped by the variant

PolyPhen

  • probably damaging
  • possibly damaging
  • benign
  • unknown

SIFT

  • tolerated
  • deleterious
  • tolerated - low confidence
  • deleterious - low confidence

Amino Acid Conservation

"aminoAcidConservation": {
"scores": [0.34]
}
FieldTypeNotes
aminoAcidConservationobject
scoresobject array of doublespercent conserved with respect to human amino acid residue. Range: 0.01 - 1.00

Gene Fusions

FieldTypeNotes
exonintactual exon where the breakpoint was located
intronintactual intron where the breakpoint was located
fusionsobject arraysee Fusion entry below

Fusion

FieldTypeNotes
exonintactual exon where the other breakpoint was located
intronintactual intron where the other breakpoint was located
hgvscstringHGVS coding nomenclature describing the two genes and the transcripts that are fused along with

Regulatory Regions

"regulatoryRegions":[
{
"id":"ENSR00001542175",
"type":"promoter",
"consequence":[
"regulatory_region_variant"
]
}
]
FieldTypeNotes
idstring
typestringsee possible values below
consequencestring arraysee possible values below

Regulatory Types

  • CTCF_binding_site
  • enhancer
  • open_chromatin_region
  • promoter
  • promoter_flanking_region
  • TF_binding_site

Regulatory Consequences

  • regulatory_region_variant
  • regulatory_region_ablation
  • regulatory_region_amplification
  • regulatory_region_truncation

ClinVar

"clinvar":[
{
"id":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"significance":[
"benign"
],
"refAllele":"G",
"altAllele":"A",
"lastUpdatedDate":"2020-03-01",
"isAlleleSpecific":true
},
{
"id":"RCV000030258.4",
"variationId":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"alleleOrigins":[
"germline"
],
"refAllele":"G",
"altAllele":"A",
"phenotypes":[
"Lynch syndrome"
],
"medGenIds":[
"C1333990"
],
"omimIds":[
"120435"
],
"significance":[
"benign"
],
"lastUpdatedDate":"2017-05-01",
"isAlleleSpecific":true
}
]
FieldTypeNotes
idstringClinVar ID
variationIdstringClinVar VCV ID
reviewStatusstringsee possible values below
alleleOriginsstring arraysee possible values below
refAllelestring
altAllelestring
phenotypesstring array
medGenIdsstring arrayMedGen IDs
omimIdsstring arrayOMIM IDs
orphanetIdsstring arrayOrphanet IDs
significancestring arraysee possible values below
lastUpdatedDatestringyyyy-MM-dd
pubMedIdsstring arrayPubMed IDs
isAlleleSpecificbooltrue when the current variant alternate allele matches the ClinVar alternate allele

reviewStatus:

  • no assertion provided
  • no assertion criteria provided
  • criteria provided, single submitter
  • practice guideline
  • classified by multiple submitters
  • criteria provided, conflicting interpretations
  • criteria provided, multiple submitters, no conflicts
  • no interpretation for the single variant

alleleOrigins:

  • unknown
  • other
  • germline
  • somatic
  • inherited
  • paternal
  • maternal
  • de-novo
  • biparental
  • uniparental
  • not-tested
  • tested-inconclusive

significance:

  • uncertain significance
  • not provided
  • benign
  • likely benign
  • likely pathogenic
  • pathogenic
  • drug response
  • histocompatibility
  • association
  • risk factor
  • protective
  • affects
  • conflicting data from submitters
  • other
  • no interpretation for the single variant
  • conflicting interpretations of pathogenicity

1000 Genomes

"oneKg":{
"allAf":0.200879,
"afrAf":0.210287,
"amrAf":0.139769,
"easAf":0.275794,
"eurAf":0.181909,
"sasAf":0.173824,
"allAn":5008,
"afrAn":1322,
"amrAn":694,
"easAn":1008,
"eurAn":1006,
"sasAn":978,
"allAc":1006,
"afrAc":278,
"amrAc":97,
"easAc":278,
"eurAc":183,
"sasAc":170
}
FieldTypeNotes
allAffloatallele frequency for all populations. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
allAnintallele number for all populations. Non-zero integer.
afrAffloatallele frequency for the African super population. Range: 0 - 1.0
afrAcintallele count for the African super population. Integer.
afrAnintallele number for the African super population. Non-zero integer.
amrAffloatallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
amrAcintallele count for the Ad Mixed American super population. Integer.
amrAnintallele number for the Ad Mixed American super population. Non-zero integer.
easAffloatallele frequency for the East Asian super population. Range: 0 - 1.0
easAcintallele count for the East Asian super population. Integer.
easAnintallele number for the East Asian super population. Non-zero integer.
eurAffloatallele frequency for the European super population. Range: 0 - 1.0
eurAcintallele count for the European super population. Integer.
eurAnintallele number for the European super population. Non-zero integer.
sasAffloatallele frequency for the South Asian super population. Range: 0 - 1.0
sasAcintallele count for the South Asian super population. Integer.
sasAnintallele number for the South Asian super population. Non-zero integer.

gnomAD

"gnomad":{ 
"coverage":20,
"allAf":0.190317,
"maleAf":0.193,
"femaleAf": 0.1935,
"afrAf":0.222876,
"amrAf":0.121394,
"easAf":0.239802,
"finAf":0.136833,
"nfeAf":0.181282,
"asjAf":0.258278,
"othAf":0.186094,
"allAn":30796,
"maleAn":15096,
"femaleAn":15700
"afrAn":8664,
"amrAn":832,
"easAn":1618,
"finAn":3486,
"nfeAn":14916,
"asjAn":302,
"othAn":978,
"allAc":5861,
"maleAc":2930,
"femaleAc": 2931,
"afrAc":1931,
"amrAc":101,
"easAc":388,
"finAc":477,
"nfeAc":2704,
"asjAc":78,
"othAc":182,
"allHc":561,
"afrHc":208,
"amrHc":6,
"easHc":42,
"finHc":31,
"nfeHc":242,
"asjHc":13,
"othHc":19,
"maleHc":280,
"femaleHc":281,
"controlsAllAf":0.190317,
"controlsAllAn":30796,
"controlsAllAc":5861,
"lowComplexityRegion":true,
"failedFilter":true
}
FieldTypeNotes
coverageintaverage coverage (non-negative integer values)
allAffloatallele frequency for all populations. Range: 0 - 1.0
maleAffloatallele frequency for male population. Range: 0 - 1.0
femaleAffloatallele frequency for female population. Range: 0 - 1.0
controlsAllAffloatallele frequency for the controls subset. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
maleAcintallele count for male population. Integer.
femaleAcintallele count for female population. Integer.
controlsAllAcintallele count for the controls subset. Integer.
allAnintallele number for all populations. Non-zero integer.
maleAnintallele number for male population. Non-zero integer.
femaleAnintallele number for female population. Non-zero integer.
controlsAllAnintallele number for the controls subset. Non-zero integer.
allHcintcount of homozygous individuals for all populations. Non-negative integer.
maleHcintcount of homozygous individuals for male population. Non-negative integer.
femaleHcintcount of homozygous individuals for female population. Non-negative integer.
afrAffloatallele frequency for the African / African American population. Range: 0 - 1.0
afrAcintallele count for the African / African American population. Integer.
afrAnintallele number for the African / African American population. Non-zero integer.
afrHcintcount of homozygous individuals for African / African American population. Non-negative integer.
amrAffloatallele frequency for the Latino population. Range: 0 - 1.0
amrAcintallele count for the Latino population. Integer.
amrAnintallele number for the Latino population. Non-zero integer.
amrHcintcount of homozygous individuals for Latino population. Non-negative integer.
easAffloatallele frequency for the East Asian population. Range: 0 - 1.0
easAcintallele count for the East Asian population. Integer.
easAnintallele number for the East Asian population. Non-zero integer.
easHcintcount of homozygous individuals for East Asian population. Non-negative integer.
finAffloatallele frequency for the Finnish population. Range: 0 - 1.0
finAcintallele count for the Finnish population. Integer.
finAnintallele number for the Finnish population. Non-zero integer.
finHcintcount of homozygous individuals for Finnish population. Non-negative integer
nfeAffloatallele frequency for the Non-Finnish European population. Range: 0 - 1.0
nfeAcintallele count for the Non-Finnish European population. Integer.
nfeAnintallele number for the Non-Finnish European population. Non-zero integer.
nfeHcintcount of homozygous individuals for Non-Finnish European population. Non-negative integer
othAffloatallele frequency for the Other population. Range: 0 - 1.0
othAcintallele count for the Other population. Integer.
othAnintallele number for the Other population. Non-zero integer.
othHcintcount of homozygous individuals for Other population. Non-negative integer
asjAffloatallele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0
asjAcintallele count for the Ashkenazi Jewish population Integer.
asjAnintallele number for the Ashkenazi Jewish population. Non-zero integer.
asjHcintcount of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer
sasAffloatallele frequency for the South Asian population. Range: 0 - 1.0
sasAcintallele count for the South Asian population Integer.
sasAnintallele number for the South Asian population. Non-zero integer.
sasHcintcount of homozygous individuals for the South Asian population. Non-negative integer.
failedFilterboolTrue if this variant failed any filters (Note: we do not list the failed filters)
lowComplexityRegionboolTrue if this variant is located in a low complexity region.

dbSNP

"dbsnp":[
"rs1042821"
]
FieldTypeNotes
dbsnpstring arraydbSNP rsIDs

MITOMAP

"mitomap":[ 
{
"refAllele":"G",
"altAllele":"A",
"diseases":[
"Bipolar disorder",
"Melanoma"
],
"hasHomoplasmy":false,
"hasHeteroplasmy":true,
"status":"Reported",
"clinicalSignificance":"confirmed pathogenic",
"scorePercentile":83.30,
"numGenBankFullLengthSeqs":2,
"pubMedIds":["2316527","6299878","6301949"],
"isAlleleSpecific":true
}
]
FieldTypeNotes
refAllelestring
altAllelestring
diseasesstring arrayassociated diseases
hasHomoplasmyboolean
hasHeteroplasmyboolean
statusstringrecord status
clinicalSignificancestringpredicted pathogenicity
scorePercentilefloatMitoTIP score
numGenBankFullLengthSeqsinteger# of GenBank full-length sequences
pubMedIdsstring array
isAlleleSpecificbooleantrue when the current variant alternate allele matches the MITOMAP alternate allele

Primate AI

"primateAI":[
{
"hgnc":"TP53",
"scorePercentile":0.3,
}
]
FieldTypeNotes
hgncstring
scorePercentilefloatrange: 0 - 1.0

REVEL

"revel":{ 
"score":0.027
}
FieldTypeNotes
scorefloatRange: 0 - 1.0

Splice AI

"spliceAI":[ 
{
"hgnc":"BLCAP",
"acceptorGainDistance":-3,
"acceptorGainScore":0.3,
"donorLossDistance":7,
"donorLossScore":0.9
},
{
"hgnc":"NNAT",
"acceptorGainDistance":-1,
"acceptorGainScore":0.2,
"donorGainDistance":-2,
"donorGainScore":0.3
}
]
FieldTypeNotes
hgncstringHGNC gene symbol
acceptorGainDistanceint± bp from current position
acceptorGainScorefloatrange: 0 - 1.0. 1 decimal place
acceptorLossDistanceint± bp from current position
acceptorLossScorefloatrange: 0 - 1.0. 1 decimal place
donorGainDistanceint± bp from current position
donorGainScorefloatrange: 0 - 1.0. 1 decimal place
donorLossDistanceint± bp from current position
donorLossScorefloatrange: 0 - 1.0. 1 decimal place

TOPMed

"topmed":{ 
"allAc":20,
"allAn":125568,
"allAf":0.000159,
"allHc":0,
"failedFilter":true
}
FieldTypeNotes
allAcintTOPMed allele count
allAnintTOPMed allele number. Non-zero integer.
allAffloatTOPMed allele frequency (computed by Nirvana)
allHcintTOPMed homozygous count
failedFilterboolTrue if this variant failed any filters

Genes

"genes":[
{
"name":"MSH6",
"hgncId":7329,
"summary":"This gene encodes a member of the DNA mismatch repair MutS family. In E. coli, the MutS protein helps in the recognition of mismatched nucleotides prior to their repair. A highly conserved region of approximately 150 aa, called the Walker-A adenine nucleotide binding motif, exists in MutS homologs. The encoded protein heterodimerizes with MSH2 to form a mismatch recognition complex that functions as a bidirectional molecular switch that exchanges ADP and ATP as DNA mismatches are bound and dissociated. Mutations in this gene may be associated with hereditary nonpolyposis colon cancer, colorectal cancer, and endometrial cancer. Transcripts variants encoding different isoforms have been described. [provided by RefSeq, Jul 2013]",
/* this is where gene-level data sources can be found e.g. OMIM */
}
]
FieldTypeNotes
namestringHGNC gene symbol
hgncIdintHGNC ID
summarystringshort description of the gene from OMIM

OMIM

"omim":[ 
{
"mimNumber":600678,
"geneName":"MutS, E. coli, homolog of, 6",
"description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",
"phenotypes":[
{
"mimNumber":614350,
"phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",
"description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal dominant"
]
},
{
"mimNumber":608089,
"phenotype":"Endometrial cancer, familial",
"mapping":"molecular basis of the disorder is known"
},
{
"mimNumber":276300,
"phenotype":"Mismatch repair cancer syndrome",
"description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal recessive"
],
"comments" : [
"contribute to susceptibility to multifactorial disorders or to susceptibility to infection",
"unconfirmed or possibly spurious mapping"
]
}
]
}
]
FieldTypeNotes
mimNumberintOMIM ID for gene
geneNamestringgene name
descriptionstring
phenotypesobject arraysee Phenotype entry below

Phenotype

FieldTypeNotes
mimNumberint
phenotypestring
descriptionstring
mappingstringsee possible values below
inheritancestring arraysee possible values below
commentsstring arraysee possible values below

Mapping

  1. disorder was positioned by mapping of the wild type gene
  2. disease phenotype itself was mapped
  3. molecular basis of the disorder is known
  4. disorder is a chromosome deletion or duplication syndrome

Inheritance

  • autosomal recessive
  • autosomal dominant

Comments

  • contributes to the susceptibility to multifactorial disorders
  • variations that lead to apparently abnormal laboratory test values
  • unconfirmed mapping

gnomAD LoF Gene Metrics

"gnomAD":{ 
"pLi":1.00e0,
"pNull":8.94e-40,
"pRec":1.84e-16,
"synZ":-8.44e-2,
"misZ":5.96e-1,
"loeuf":1.13e0
}
FieldTypeNotes
pLifloatprobability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)
pNullfloatprobability of being completely tolerant of loss of function variation (observed = expected)
pRecfloatprobability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)
synZfloatcorrected synonymous Z score
misZfloatcorrected missense Z score
loeuffloatloss of function observed/expected upper bound fraction (LOEUF)

ClinGen Disease Validity

"clingenGeneValidity":[
{
"diseaseId":"MONDO_0007893",
"disease":"Noonan syndrome with multiple lentigines",
"classification":"no reported evidence",
"classificationDate":"2018-06-07"
},
{
"diseaseId":"MONDO_0015280",
"disease":"cardiofaciocutaneous syndrome",
"classification":"no reported evidence",
"classificationDate":"2018-06-07"
}
]
FieldTypeNotes
clingenGeneValidityobject
diseaseIdstringMonarch Disease Ontology ID (MONDO)
diseasestringdisease label
classificationstringsee below for possible values
classificationDatestringyyyy-MM-dd

classification

  • no reported evidence
  • disputed
  • limited
  • moderate
  • definitive
  • strong
  • refuted
- - - - \ No newline at end of file diff --git a/3.16/index.html b/3.16/index.html deleted file mode 100644 index f5c7463e..00000000 --- a/3.16/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Introduction | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

Nirvana provides clinical-grade annotation of genomic variants (SNVs, MNVs, insertions, deletions, indels, STRs, gene fusions, and SVs (including CNVs). It can be run as a stand-alone package, as an AWS Lambda function, or integrated into larger software tools that require variant annotation.

The input to Nirvana are VCFs and the output is a structured JSON representation of all annotation and sample information (as extracted from the VCF). Nirvana handles multiple alternate alleles and multiple samples with ease.

The software is being developed under a rigorous SDLC and testing process to ensure accuracy of the results and enable embedding in other software with regulatory needs. Nirvana uses a continuous integration pipeline where millions of variant annotations are monitored against baseline values daily.

Fun Fact

Nirvana is a backronym for NImble and Robust VAriant aNnotAtor

What does Nirvana annotate?

We use Sequence Ontology consequences to describe how each variant impacts a given transcript:

In addition, we also use external data sources to provide additional context for each variant:

Licensing

Code

Nirvana source code is provided under the GPLv3 license. Nirvana includes several third party packages provided under other open source licenses, please see Dependencies for additional details.

Data

The data used by Nirvana is publicly available, however some data sources have special restrictions on use by non-academic entities.

Nirvana Team

Active Team

The Nirvana team works on the core functionality, AWS annotation services, in addition to keeping the annotation data sources up-to-date.

Current members of the Nirvana team are listed in alphabetical order below.

Joseph Platzer

Test Lead. Joins Nirvana with a history of building sequencing tools and keeping the customer first.

Michael Strömberg

Nirvana founder and now ever grateful Nirvana cheerleader to those who actually write code for it.

Rajat Shuvro Roy

Lead developer. Loves to speed up things and make services available to all interested users.

Honorary Alumni

Nirvana would never be what it is today without the huge contributions from these folks who have moved on to bigger and greater things.

Haochen Li

Detail-oriented quick thinker that keeps cool even in the most stressful situations. Now working as a Senior Bioinformatics Data Scientist at GRAIL.

Julien Lajugie

Julien is a legend around these parts. When he's not taking down opponents in Taekwondo or melting riffs in his rock band, he's demolishing bugs and making the world a better place.

Shuli Kang

Oncology bioinformatician from USC before joining our team at Illumina. Now working as a Senior Bioinformatics Scientist at Novartis Gene Therapies.

Yu Jiang

Biostatistics genius from Duke University before joining our team at Illumina. Now working as a Research Engineer at Facebook AI Research.
- - - - \ No newline at end of file diff --git a/3.16/introduction/covid19/index.html b/3.16/introduction/covid19/index.html deleted file mode 100644 index a7909a4b..00000000 --- a/3.16/introduction/covid19/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Annotating COVID-19 | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

Annotating COVID-19

The Nirvana development team is mainly focused on providing annotations for the human genome. This focus allows us to maximize our resources towards understanding human health.

However, nothing in our architecture prevents us from supporting other genomes. Earlier this year, we had an opportunity to put that statement to the test - we added support for annotating the SARS-CoV-2 genome, the virus that causes the COVID-19 disease.

In addition to normal transcript annotation, we also supply:

  • allele frequencies
  • protein domains
SARS-CoV-2 Galaxy Project

The allele frequencies used by Nirvana were provided by the SARS-CoV-2 Galaxy Project. This is an international effort that provides ongoing analysis of COVID-19 using Galaxy, BioConda, and public research infrastructures.

Getting Nirvana

If you don't have Nirvana already, please consult our Getting Started page first.

Downloading the COVID-19 data files

Here's a data zip file containing new gene models, reference, and external data sources for SARS-CoV-2:

Just go to the directory that contains your Nirvana Data directory.

cd ~/Nirvana
curl -O https://illumina.github.io/NirvanaDocumentation/files/Covid19Data.zip
unzip Covid19Data.zip

Download a COVID-19 VCF file

Here's a COVID-19 VCF file you can play around with:

curl -O https://illumina.github.io/NirvanaDocumentation/files/Covid19Mutations.vcf.gz

Running Nirvana

Once you have downloaded the data sets, use the following command to annotate your VCF:

dotnet bin/Release/netcoreapp2.1/Nirvana.dll \
-c Data/Cache/SARS-CoV-2/SARS-CoV-2 \
--sd Data/SupplementaryAnnotation/SARS-CoV-2 \
-r Data/References/SARS-CoV-2.ASM985889v3.dat \
-i Covid19Mutations.vcf.gz \
-o Covid19Mutations
  • the -c argument specifies the cache prefix
  • the --sd argument specifies the supplementary annotation directory
  • the -r argument specifies the compressed reference path
  • the -i argument specifies the input VCF path
  • the -o argument specifies the output filename prefix

When running Nirvana, performance metrics are shown as it evaluates each chromosome in the input VCF file:

---------------------------------------------------------------------------
Nirvana (c) 2020 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0
---------------------------------------------------------------------------

Initialization Time Positions/s
---------------------------------------------------------------------------
Cache 00:00:00.0
SA Position Scan 00:00:00.0 1763

Reference Preload Annotation Variants/s
---------------------------------------------------------------------------
NC_045512 00:00:00.0 00:00:00.1 173

Summary Time Percent
---------------------------------------------------------------------------
Initialization 00:00:00.0 2.0 %
Preload 00:00:00.0 0.3 %
Annotation 00:00:00.1 6.0 %

Time: 00:00:01.5

The output will be a JSON file called Covid19Mutations.json.gz. Here's the full JSON file.

Investigating the Results

Here's an example of what a COVID-19 variant looks like in the JSON output:

{
"chromosome":"NC_045512.2",
"position":27323,
"refAllele":"C",
"altAlleles":[
"T"
],
"filters":[
"PASS"
],
"proteinDomains":[
{
"start":27202,
"end":27384,
"proteinId":"YP_009724394.1",
"domainId":"cl13556",
"domainName":"Sars6 super family",
"reciprocalOverlap":0.00546,
"annotationOverlap":0.00546
}
],
"variants":[
{
"vid":"NC_045512.2-27323-C-T",
"chromosome":"NC_045512.2",
"begin":27323,
"end":27323,
"refAllele":"C",
"altAllele":"T",
"variantType":"SNV",
"hgvsg":"NC_045512.2:g.27323C>T",
"alleleFrequency":{
"refAllele":"C",
"altAllele":"T",
"allAc":8,
"allAn":1058,
"allAf":0.007561
},
"transcripts":[
{
"transcript":"YP_009724394.1",
"source":"RefSeq",
"bioType":"protein_coding",
"codons":"tCt/tTt",
"aminoAcids":"S/F",
"cdnaPos":"122",
"cdsPos":"122",
"exons":"1/1",
"proteinPos":"41",
"geneId":"43740572",
"hgnc":"ORF6",
"consequence":[
"missense_variant"
],
"hgvsc":"YP_009724394.1:c.122C>T",
"hgvsp":"YP_009724394.1:p.(Ser41Phe)",
"proteinId":"YP_009724394.1"
},
{
"transcript":"YP_009724395.1",
"source":"RefSeq",
"bioType":"protein_coding",
"geneId":"43740573",
"hgnc":"ORF7a",
"consequence":[
"upstream_gene_variant"
],
"proteinId":"YP_009724395.1"
}
]
}
]
}
- - - - \ No newline at end of file diff --git a/3.16/introduction/dependencies/index.html b/3.16/introduction/dependencies/index.html deleted file mode 100644 index f6c88afa..00000000 --- a/3.16/introduction/dependencies/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Dependencies | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

Dependencies

All of the following dependencies have been included in this repository.

NameLicenseUsage
Amazon.LambdaApacheAWS extensions for .NET CLI
AWSSDKApacheAWS Lambda, S3, SNS support
Json.NETMITJASIX utility
libdeflateMITBlockCompression library
MoqBSDMocking framework for unit tests
NDesk.OptionsMIT/X11CommandLine library
xUnitApacheUnit testing framework
zlib-ngzlibBlockCompression library
zstdBSDBlockCompression library
- - - - \ No newline at end of file diff --git a/3.16/introduction/getting-started/index.html b/3.16/introduction/getting-started/index.html deleted file mode 100644 index 584bcc77..00000000 --- a/3.16/introduction/getting-started/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Getting Started | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

Getting Started

Nirvana is written in C# using .NET Core (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files.

tip

Nirvana currently uses .NET Core 3.1 or later. Please make sure that you have the most current runtime from the .NET Core downloads page.

Quick Start

If you want to get started right away, we've created a script that downloads Nirvana, compiles it, and starts annotating a test file:

curl -O https://illumina.github.io/NirvanaDocumentation/files/TestNirvana.sh
bash ./TestNirvana.sh

We have verified that this script works on Windows (using Git Bash or WSL), Linux, and Mac OS X.

Getting Nirvana

Compile from Source

The following will grab the latest version of Nirvana from GitHub and compile it using the .NET Core compiler:

git clone https://github.com/Illumina/Nirvana.git
cd Nirvana
dotnet build -c Release

GitHub Release Notes

Alternatively, you can grab the latest binaries from our GitHub Releases page:

mkdir -p Nirvana/Data
cd Nirvana
unzip Nirvana-3.16.1-dotnet-3.1.0.zip

Docker

You can find us on Docker Hub under annotation/nirvana:

caution

We think Docker is fantastic. However, because our data files are usually accessed through a Docker volume, there is a noticeable performance penalty when running Nirvana in Docker.

mkdir -p Nirvana/Data
cd Nirvana
docker pull annotation/nirvana:3.14

For Docker, we have special instructions for running the Downloader:

sudo docker run --rm -it -v Data:/scratch annotation/nirvana:3.14 dotnet \
/opt/nirvana/Downloader.dll --ga GRCh37 -o /scratch

Similarly, we have special instructions for running Nirvana (Here's a toy VCF in case you need it):

sudo docker run --rm -it -v Data:/scratch annotation/nirvana:3.14 dotnet \
/opt/nirvana/Nirvana.dll -c /scratch/Cache/GRCh37/Both \
-r /scratch/References/Homo_sapiens.GRCh37.Nirvana.dat \
--sd /scratch/SupplementaryAnnotation/GRCh37 \
-i /scratch/HiSeq.10000.vcf.gz -o /scratch/HiSeq

Downloading the data files

To download the latest data sources (or update the ones that you already have), use the following command to automate the download from S3:

dotnet bin/Release/netcoreapp3.1/Downloader.dll \
--ga GRCh37 \
-o Data
  • the --ga argument specifies the genome assembly which can be GRCh37, GRCh38, or both.
  • the -o argument specifies the output directory
Glitches in the Matrix

Every once in a while, the download process does not go smoothly. Perhaps the internet connection cut out or you ran out of disk space. The Downloader attempts to detect these situations by checking the file sizes at the very end. If you see that a file was marked truncated, try fixing the root cause and running the downloader again.

tip

From time to time, you can re-run the Downloader to get the latest annotation files. It will only download the files that changed.

Download a test VCF file

Here's a toy VCF file you can play around with:

curl -O https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.vcf.gz

Running Nirvana

Once you have downloaded the data sets, use the following command to annotate your VCF:

dotnet bin/Release/netcoreapp3.1/Nirvana.dll \
-c Data/Cache/GRCh37/Both \
--sd Data/SupplementaryAnnotation/GRCh37 \
-r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \
-i HiSeq.10000.vcf.gz \
-o HiSeq.10000
  • the -c argument specifies the cache prefix
  • the --sd argument specifies the supplementary annotation directory
  • the -r argument specifies the compressed reference path
  • the -i argument specifies the input VCF path
  • the -o argument specifies the output filename prefix

When running Nirvana, performance metrics are shown as it evaluates each chromosome in the input VCF file:

---------------------------------------------------------------------------
Nirvana (c) 2021 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.16.1
---------------------------------------------------------------------------

Initialization Time Positions/s
---------------------------------------------------------------------------
Cache 00:00:01.2
SA Position Scan 00:00:00.1 55,270

Reference Preload Annotation Variants/s
---------------------------------------------------------------------------
chr1 00:00:00.1 00:00:01.5 6,323

Summary Time Percent
---------------------------------------------------------------------------
Initialization 00:00:01.3 23.9 %
Preload 00:00:00.1 2.9 %
Annotation 00:00:01.5 27.2 %

Peak memory usage: 1.434 GB
Time: 00:00:05.2

The output will be a JSON file called HiSeq.10000.json.gz. Here's the full JSON file.

- - - - \ No newline at end of file diff --git a/3.16/introduction/parsing-json/index.html b/3.16/introduction/parsing-json/index.html deleted file mode 100644 index 9283c847..00000000 --- a/3.16/introduction/parsing-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Parsing Nirvana JSON | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

Parsing Nirvana JSON

Why JSON?

VCF is a fantastic file format that was developed during the methods development activities within the 1000 Genomes Project. Prior to that, variant callers were outputting information into a variety of tab-delimited formats. Sometimes based on existing standards (like GFF), while most were proprietary. The primary intent of VCF files was to provide a human-readable, standardized representation of genetic variants. Similar to SAM/BAM files, VCF files used BCF files as their binary counterpart.

In the very beginning, Nirvana offered VCF output for annotation. While many variant annotators offer an option to output VCF files, one could argue if they are still human-readable. Here's an example from a VCF file produced by VEP v102:

chr3    107840527   .   A   ATTTTTTTTT,AT,ATTTTTTTT 153.51  PASS    AN=6;MQ=244.10;
SOR=1.739;QD=2.24;DP=57;AF=0.500,0.167,0.333;FS=0.000;AC=3,1,2;CSQ=TTTTTTTTT|
intron_variant&non_coding_transcript_variant|MODIFIER|LINC00635|ENSG00000241469|
Transcript|ENST00000608506.6|lncRNA||4/4|ENST00000608506.6:n.622-132_622-124dup|||||||
rs35564779||-1||HGNC|HGNC:27184|||5|||||||||Ensembl|||||||||||||||||||||||||||||||||||
|||||||||0.792|-0.109757,T|intron_variant&non_coding_transcript_variant|MODIFIER|
LINC00635|ENSG00000241469|Transcript|ENST00000608506.6|lncRNA||4/4|
ENST00000608506.6:n.622-124dup|||||||rs35564779||-1||HGNC|HGNC:27184|||5|||||||||
Ensembl||||||||||||||||||||||||||||||||||||||||||||0.932|-0.075622,TTTTTTTT|
intron_variant&non_coding_transcript_variant|MODIFIER|LINC00635|ENSG00000241469|
Transcript|ENST00000608506.6|lncRNA||4/4|ENST00000608506.6:n.622-131_622-124dup|||||||
rs35564779||-1||HGNC|HGNC:27184|||5|||||||||Ensembl|||||||||||||||||||||||||||||||||||
|||||||||0.808|-0.105490,TTTTTTTTT|intron_variant&non_coding_transcript_variant|
MODIFIER|LINC00636|ENSG00000240423|Transcript|ENST00000649048.1|lncRNA||2/3|
ENST00000649048.1:n.179+5223_179+5231dup|||||||rs35564779||1||HGNC|HGNC:27702|||||||||
|||Ensembl||||||||||||||||||||||||||||||||||||||||||||0.792|-0.109757, (etc.)

Originally Nirvana used the same VCF notation as VEP uses above. The problem is that you end up with a large amount of text that is difficult to parse out by eye and requires the use of several delimiters to divide the information into useful segments. When we originally annotated this variant using VEP, this single variant used 488,909 bytes (almost ½ MB). Surprisingly, we found that this broke some downstream tools that had preconceived notions of how long a single line could be in a VCF file.

caution

Whitespace is not allowed in the VCF INFO field. This means that if you wanted to express a gene description from OMIM: "HRAS PROTOONCOGENE, GTPase; HRAS", you would need to replace the spaces with something else like an underline. You would also need to hope that the VCF parser correctly handles embedded commas and semicolons in the description.

What do other annotators use?

Unfortunately, file format standardization has not made it all the way to variant annotation yet. The GA4GH Annotation group had many discussions on the topic several years ago. While a set of JSON schemas were created in that effort, there wasn't enough momentum to make this a new standard.

While there is some overlap in general file formats (JSON vs VCF vs TSV), none of those are compatible with each other. I.e. the VCF representation in VEP and snpEff is different just like the JSON schemas used by VEP, Nirvana, and GA4GH are different.

SourceFormats
VEPJSON, TSV, VCF
snpEffVCF
AnnovarTSV
NirvanaJSON
GA4GHJSON

We are interested in working together with others in the annotation space to develop a common annotation file format. Our belief is that this would accelerate methods development and benchmarking activities within annotation much in the same way the creation of SAM/BAM & VCF/BCF accelerated secondary analysis development.

What do we gain by using JSON?

  • JSON files are better at showing hierarchical and other relational data. For example when we output ClinVar data, we often want to output several overlapping RCV entries (variants coupled with a disease phenotype). In each, we would want to output a list of phenotypes, clinical significance, etc. That is difficult to accomplish in a human-readable way using VCF files (without resorting to growing lexicon of delimiters).
  • JSON files use JavaScript data types, while VCF INFO fields don't directly have data types. Instead, external metadata located in the VCF header is required to indicated the preferred data type.
  • JSON files are more verbose. Often this is seen as a negative, but compression largely compensates for this. Given the following excerpt from the VCF example above HGNC:27184|||5|||||||||Ensembl it's not immediately obvious what the 5 refers to (without checking the VCF header for details). With JSON files, you would always see a key name associated with a value.
  • JSON files can be natively imported into different search and analytics solutions like Elasticsearch and Snowflake.
  • JSON strings do not have any limitations on the use of whitespace.

Parsing JSON

Our JSON files are organized similarly to original VCF variants:

Nirvana JSON files can get very large and sometimes we receive feedback that a bioinformatician tried to read the JSON file into Python or R resulting in a program that ran out of available RAM. This happens because those parsers try to load everything into memory all at once.

To get around those issues, we play some clever tricks with newlines that enables our users to parse our JSON files quickly and efficiently.

Organization

Our JSON file is arranged as follows:

  • the header section is located on the first line
  • each line after that corresponds to a position (same as a row in a VCF file)
    • until you reach the genes section ],"genes":[
  • each line after that corresponds to a gene
    • until you reach the end ]}

Knowing this, you can load each position line as an independent JSON object and extract the information you need.

Jupyter Notebook

To demonstrate this, we have put together a Jupyter notebook demonstrating how to do this in Python and a R version as well.

JASIX

One of the tools that we really like in the VCF ecosystem is tabix. Unfortunately, tabix only works for tab-delimited file formats. As a result, we created a similar tool for Nirvana JSON files called JASIX.

Here's an example of how you might use JASIX:

dotnet bin/Release/netcoreapp2.1/Jasix.dll -i dragen.json.gz -q chr1:942450-942455
  • the -i argument specifies the Nirvana JSON path
  • the -q argument specifies a genomic range (you can use as many of these as you want)

JASIX also includes additional options for showing the Nirvana header or for extracting different sections (like the genes section).

The output from JASIX is compliant JSON object shown in pretty-printed form:

{"positions":[
{
"chromosome": "chr1",
"position": 942451,
"refAllele": "T",
"altAlleles": [
"C"
],
"quality": 484.23,
"filters": [
"PASS"
],
"cytogeneticBand": "1p36.33",
"samples": [
{
"genotype": "1/1",
"variantFrequencies": [
1
],
"totalDepth": 21,
"genotypeQuality": 60,
"alleleDepths": [
0,
21
]
},
{
"genotype": "1/1",
"variantFrequencies": [
1
],
"totalDepth": 32,
"genotypeQuality": 93,
"alleleDepths": [
0,
32
]
},
{
"genotype": "1/1",
"variantFrequencies": [
1
],
"totalDepth": 36,
"genotypeQuality": 105,
"alleleDepths": [
0,
36
]
}
],
"variants": [
{
"vid": "1-942451-T-C",
"chromosome": "chr1",
"begin": 942451,
"end": 942451,
"refAllele": "T",
"altAllele": "C",
"variantType": "SNV",
"hgvsg": "NC_000001.11:g.942451T>C",
"phylopScore": -0.1,
"clinvar": [
{
"id": "VCV000836156.1",
"reviewStatus": "criteria provided, single submitter",
"significance": [
"uncertain significance"
],
"refAllele": "T",
"altAllele": "T",
"lastUpdatedDate": "2020-08-20"
},
{
"id": "RCV001037211.1",
"variationId": 836156,
"reviewStatus": "criteria provided, single submitter",
"alleleOrigins": [
"germline"
],
"refAllele": "T",
"altAllele": "T",
"phenotypes": [
"not provided"
],
"medGenIds": [
"CN517202"
],
"significance": [
"uncertain significance"
],
"lastUpdatedDate": "2020-08-20",
"pubMedIds": [
"28492532"
]
}
],
"dbsnp": [
"rs6672356"
],
"gnomad": {
"coverage": 25,
"allAf": 0.999855,
"allAn": 123742,
"allAc": 123724,
"allHc": 61853,
"afrAf": 0.999416,
"afrAn": 10278,
"afrAc": 10272,
"afrHc": 5133,
"amrAf": 0.99995,
"amrAn": 20008,
"amrAc": 20007,
"amrHc": 10003,
"easAf": 1,
"easAn": 6054,
"easAc": 6054,
"easHc": 3027,
"finAf": 1,
"finAn": 8696,
"finAc": 8696,
"finHc": 4348,
"nfeAf": 0.999899,
"nfeAn": 49590,
"nfeAc": 49585,
"nfeHc": 24790,
"asjAf": 1,
"asjAn": 7208,
"asjAc": 7208,
"asjHc": 3604,
"sasAf": 0.99967,
"sasAn": 18160,
"sasAc": 18154,
"sasHc": 9074,
"othAf": 1,
"othAn": 3748,
"othAc": 3748,
"othHc": 1874,
"maleAf": 0.9999,
"maleAn": 69780,
"maleAc": 69773,
"maleHc": 34883,
"femaleAf": 0.999796,
"femaleAn": 53962,
"femaleAc": 53951,
"femaleHc": 26970,
"controlsAllAf": 0.999815,
"controlsAllAn": 48654,
"controlsAllAc": 48645
},
"oneKg": {
"allAf": 1,
"afrAf": 1,
"amrAf": 1,
"easAf": 1,
"eurAf": 1,
"sasAf": 1,
"allAn": 5008,
"afrAn": 1322,
"amrAn": 694,
"easAn": 1008,
"eurAn": 1006,
"sasAn": 978,
"allAc": 5008,
"afrAc": 1322,
"amrAc": 694,
"easAc": 1008,
"eurAc": 1006,
"sasAc": 978
},
"primateAI": [
{
"hgnc": "SAMD11",
"scorePercentile": 0.87
}
],
"revel": {
"score": 0.145
},
"topmed": {
"allAf": 0.999809,
"allAn": 125568,
"allAc": 125544,
"allHc": 62760
},
"transcripts": [
{
"transcript": "ENST00000420190.6",
"source": "Ensembl",
"bioType": "protein_coding",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"downstream_gene_variant"
],
"proteinId": "ENSP00000411579.2"
},
{
"transcript": "ENST00000342066.7",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "1110",
"cdsPos": "1027",
"exons": "10/14",
"proteinPos": "343",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000342066.7:c.1027T>C",
"hgvsp": "ENSP00000342313.3:p.(Trp343Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000342313.3",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000618181.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "732",
"cdsPos": "652",
"exons": "7/11",
"proteinPos": "218",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000618181.4:c.652T>C",
"hgvsp": "ENSP00000480870.1:p.(Trp218Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000480870.1",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000622503.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "1110",
"cdsPos": "1030",
"exons": "10/14",
"proteinPos": "344",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000622503.4:c.1030T>C",
"hgvsp": "ENSP00000482138.1:p.(Trp344Arg)",
"isCanonical": true,
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000482138.1",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000618323.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "cTg/cCg",
"aminoAcids": "L/P",
"cdnaPos": "712",
"cdsPos": "632",
"exons": "8/12",
"proteinPos": "211",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000618323.4:c.632T>C",
"hgvsp": "ENSP00000480678.1:p.(Leu211Pro)",
"polyPhenScore": 0,
"polyPhenPrediction": "unknown",
"proteinId": "ENSP00000480678.1",
"siftScore": 0.03,
"siftPrediction": "deleterious - low confidence"
},
{
"transcript": "ENST00000616016.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "ccT/ccC",
"aminoAcids": "P",
"cdnaPos": "944",
"cdsPos": "864",
"exons": "9/13",
"proteinPos": "288",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"synonymous_variant"
],
"hgvsc": "ENST00000616016.4:c.864T>C",
"hgvsp": "ENST00000616016.4:c.864T>C(p.(Pro288=))",
"proteinId": "ENSP00000478421.1"
},
{
"transcript": "ENST00000618779.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "921",
"cdsPos": "841",
"exons": "9/13",
"proteinPos": "281",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000618779.4:c.841T>C",
"hgvsp": "ENSP00000484256.1:p.(Trp281Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000484256.1",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000616125.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "783",
"cdsPos": "703",
"exons": "8/12",
"proteinPos": "235",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000616125.4:c.703T>C",
"hgvsp": "ENSP00000484643.1:p.(Trp235Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000484643.1",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000620200.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "cTg/cCg",
"aminoAcids": "L/P",
"cdnaPos": "427",
"cdsPos": "347",
"exons": "5/9",
"proteinPos": "116",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000620200.4:c.347T>C",
"hgvsp": "ENSP00000484820.1:p.(Leu116Pro)",
"polyPhenScore": 0,
"polyPhenPrediction": "unknown",
"proteinId": "ENSP00000484820.1",
"siftScore": 0.16,
"siftPrediction": "tolerated - low confidence"
},
{
"transcript": "ENST00000617307.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "867",
"cdsPos": "787",
"exons": "9/13",
"proteinPos": "263",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000617307.4:c.787T>C",
"hgvsp": "ENSP00000482090.1:p.(Trp263Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000482090.1",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "NM_152486.2",
"source": "RefSeq",
"bioType": "protein_coding",
"codons": "Cgg/Cgg",
"aminoAcids": "R",
"cdnaPos": "1107",
"cdsPos": "1027",
"exons": "10/14",
"proteinPos": "343",
"geneId": "148398",
"hgnc": "SAMD11",
"consequence": [
"synonymous_variant"
],
"hgvsc": "NM_152486.2:c.1027T>C",
"hgvsp": "NM_152486.2:c.1027T>C(p.(Arg343=))",
"isCanonical": true,
"proteinId": "NP_689699.2"
},
{
"transcript": "ENST00000341065.8",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "750",
"cdsPos": "751",
"exons": "8/12",
"proteinPos": "251",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000341065.8:c.750T>C",
"hgvsp": "ENSP00000349216.4:p.(Trp251Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000349216.4",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000455979.1",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "507",
"cdsPos": "508",
"exons": "4/7",
"proteinPos": "170",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000455979.1:c.507T>C",
"hgvsp": "ENSP00000412228.1:p.(Trp170Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000412228.1",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000478729.1",
"source": "Ensembl",
"bioType": "processed_transcript",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"downstream_gene_variant"
]
},
{
"transcript": "ENST00000474461.1",
"source": "Ensembl",
"bioType": "retained_intron",
"cdnaPos": "389",
"exons": "3/4",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"non_coding_transcript_exon_variant"
],
"hgvsc": "ENST00000474461.1:n.389T>C"
},
{
"transcript": "ENST00000466827.1",
"source": "Ensembl",
"bioType": "retained_intron",
"cdnaPos": "191",
"exons": "2/2",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"non_coding_transcript_exon_variant"
],
"hgvsc": "ENST00000466827.1:n.191T>C"
},
{
"transcript": "ENST00000464948.1",
"source": "Ensembl",
"bioType": "retained_intron",
"cdnaPos": "286",
"exons": "1/2",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"non_coding_transcript_exon_variant"
],
"hgvsc": "ENST00000464948.1:n.286T>C"
},
{
"transcript": "NM_015658.3",
"source": "RefSeq",
"bioType": "protein_coding",
"geneId": "26155",
"hgnc": "NOC2L",
"consequence": [
"downstream_gene_variant"
],
"isCanonical": true,
"proteinId": "NP_056473.2"
},
{
"transcript": "ENST00000483767.5",
"source": "Ensembl",
"bioType": "retained_intron",
"geneId": "ENSG00000188976",
"hgnc": "NOC2L",
"consequence": [
"downstream_gene_variant"
]
},
{
"transcript": "ENST00000327044.6",
"source": "Ensembl",
"bioType": "protein_coding",
"geneId": "ENSG00000188976",
"hgnc": "NOC2L",
"consequence": [
"downstream_gene_variant"
],
"isCanonical": true,
"proteinId": "ENSP00000317992.6"
},
{
"transcript": "ENST00000477976.5",
"source": "Ensembl",
"bioType": "retained_intron",
"geneId": "ENSG00000188976",
"hgnc": "NOC2L",
"consequence": [
"downstream_gene_variant"
]
},
{
"transcript": "ENST00000496938.1",
"source": "Ensembl",
"bioType": "processed_transcript",
"geneId": "ENSG00000188976",
"hgnc": "NOC2L",
"consequence": [
"downstream_gene_variant"
]
}
]
}
]
}
]}
- - - - \ No newline at end of file diff --git a/3.16/utilities/jasix/index.html b/3.16/utilities/jasix/index.html deleted file mode 100644 index b3dbcf3a..00000000 --- a/3.16/utilities/jasix/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Jasix | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.16

Jasix

Overview

The Jasix index is aimed at providing TABIX like indexing capabilities for the Nirvana JSON output.

Creating the Jasix index

The Jasix index (that comes in a .jsi) file is generated on-the-fly with Nirvana output. It can also be generated independently by running the Jasix command line utility on the JSON output file. Please note that the Jasix utility can only consume JSON files that follow the Nirvana JSON output format. The following code blocks demonstrate the help menu and index generating functionalities of Jasix.

Example

dotnet Jasix.dll -h
USAGE: dotnet Jasix.dll -i in.json.gz [options]
Indexes a Nirvana annotated JSON file

OPTIONS:
--header, -t print also the header lines
--only-header, -H print only the header lines
--chromosomes, -l list chromosome names
--index, -c create index
--in, -i <VALUE> input
--out, -o <VALUE> compressed output file name (default:console)
--query, -q <VALUE> query range
--section, -s <VALUE> complete section (positions or genes) to output
--help, -h displays the help menu
--version, -v displays the version
dotnet Jasix.dll --index -i input.json.gz
---------------------------------------------------------------------------
Jasix (c) 2017 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 2.0.0
---------------------------------------------------------------------------

Ref Sequence chrM indexed in 00:00:00.2
Ref Sequence chr1 indexed in 00:00:05.8
Ref Sequence chr2 indexed in 00:00:06.0
.
.
.
Peak memory usage: 28.5 MB
Time: 00:01:14.8

Querying the index

The Jasix query format is chr:start-end. If not provided, it assumes end=start. If only chr is provided, all entries for that chromosome will be provided.

dotnet Jasix.dll -i input.json.gz chrM:5000-7000
{
"positions":[
{
"chromosome":"chrM",
"refAllele":"C",
"position":5581,
"quality":3070.00,
"filters":[
"LowGQXHomSNP"
],
"altAlleles":[
"T"
],
"samples":[
{
"variantFreq":1,
"totalDepth":1625,
"genotypeQuality":1,
"alleleDepths":[
0,
1625
],
"genotype":"1/1"
}
],
"variants":[
{
"altAllele":"T",
"refAllele":"C",
"begin":5581,
"chromosome":"chrM",
"end":5581,
"variantType":"SNV",
"vid":"MT:5581:T"
}
]
},
{
"chromosome":"chrM",
"refAllele":"A",
"position":6267,
"quality":1637.00,
"filters":[
"LowGQXHetSNP"
],
"altAlleles":[
"G"
],
"samples":[
{
"variantFreq":0.6873,
"totalDepth":323,
"genotypeQuality":1,
"alleleDepths":[
101,
222
],
"genotype":"0/1"
}
],
"variants":[
{
"altAllele":"G",
"refAllele":"A",
"begin":6267,
"chromosome":"chrM",
"end":6267,
"variantType":"SNV",
"vid":"MT:6267:G"
}
]
}
]
}

The default output stream is Console. However, if an output filename is provided, Jasix outputs the results to that file in a bgzip compressed format. The output is always a valid JSON entry. If requested (via -t option) the header of the indexed file will be provided. Multiple queries can be submitted in the same command and the output will contain them within the same "positions" block in order of the submitted queries (Warning: if the queries are out of order, or overlapping, the output will be out or order and intersecting).

dotnet Jasix.dll -i input.json.gz  -q chrM:5000-7000 -q chrM:8500-9500 -t
{
"header":{
"annotator":"Illumina Annotation Engine 1.6.2.0",
"creationTime":"2017-08-30 11:42:57",
"genomeAssembly":"GRCh37",
"schemaVersion":6,
"dataVersion":"84.24.39",
"dataSources":[
{
"name":"VEP",
"version":"84",
"description":"Ensembl",
"releaseDate":"2017-01-16"
}
],
"samples":[
"Mother"
]
},
"positions":[
{
"chromosome":"chrM",
"refAllele":"C",
"position":5581,
"quality":3070.00,
"filters":[
"LowGQXHomSNP"
],
"altAlleles":[
"T"
],
"samples":[
{
"variantFreq":1,
"totalDepth":1625,
"genotypeQuality":1,
"alleleDepths":[
0,
1625
],
"genotype":"1/1"
}
],
"variants":[
{
"altAllele":"T",
"refAllele":"C",
"begin":5581,
"chromosome":"chrM",
"end":5581,
"variantType":"SNV",
"vid":"MT:5581:T"
}
]
},
{
"chromosome":"chrM",
"refAllele":"A",
"position":6267,
"quality":1637.00,
"filters":[
"LowGQXHetSNP"
],
"altAlleles":[
"G"
],
"samples":[
{
"variantFreq":0.6873,
"totalDepth":323,
"genotypeQuality":1,
"alleleDepths":[
101,
222
],
"genotype":"0/1"
}
],
"variants":[
{
"altAllele":"G",
"refAllele":"A",
"begin":6267,
"chromosome":"chrM",
"end":6267,
"variantType":"SNV",
"vid":"MT:6267:G"
}
]
},
{
"chromosome":"chrM",
"refAllele":"G",
"position":8702,
"quality":3070.00,
"filters":[
"LowGQXHomSNP"
],
"altAlleles":[
"A"
],
"samples":[
{
"variantFreq":0.9987,
"totalDepth":1534,
"genotypeQuality":1,
"alleleDepths":[
2,
1532
],
"genotype":"1/1"
}
],
"variants":[
{
"altAllele":"A",
"refAllele":"G",
"begin":8702,
"chromosome":"chrM",
"end":8702,
"variantType":"SNV",
"vid":"MT:8702:A"
}
]
},
{
"chromosome":"chrM",
"refAllele":"G",
"position":9378,
"quality":3070.00,
"filters":[
"LowGQXHomSNP"
],
"altAlleles":[
"A"
],
"samples":[
{
"variantFreq":1,
"totalDepth":1018,
"genotypeQuality":1,
"alleleDepths":[
0,
1018
],
"genotype":"1/1"
}
],
"variants":[
{
"altAllele":"A",
"refAllele":"G",
"begin":9378,
"chromosome":"chrM",
"end":9378,
"variantType":"SNV",
"vid":"MT:9378:A"
}
]
}
]
}

Extracting a section

The Nirvana JSON file has three sections: header, positions and genes. Header can be printed using the -H option. If you are interested in only the positions or genes section, you can use the -s or --section option.

dotnet Jasix.dll -i input.json.gz  -s genes
[
{
"name": "ABCB10",
"omim": [
{
"mimNumber": 605454,
"geneName": "ATP-binding cassette, subfamily B, member 10"
}
]
},
{
"name": "ABCD3",
"omim": [
{
"mimNumber": 170995,
"geneName": "ATP-binding cassette, subfamily D, member 3 (peroxisomal membrane protein 1, 70kD)",
"description": "The ABCD3 gene encodes a peroxisomal membrane transporter involved in the transport of branched-chain fatty acids and C27 bile acids into the peroxisome; the latter function is a crucial step in bile acid biosynthesis (summary by Ferdinandusse et al., 2015).",
"phenotypes": [
{
"mimNumber": 616278,
"phenotype": "?Bile acid synthesis defect, congenital, 5",
"mapping": "molecular basis of the disorder is known",
"inheritances": [
"Autosomal recessive"
],
"comments": [
"unconfirmed or possibly spurious mapping"
]
}
]
}
]
}
]
- - - - \ No newline at end of file diff --git a/3.17/core-functionality/canonical-transcripts/index.html b/3.17/core-functionality/canonical-transcripts/index.html deleted file mode 100644 index 9a13f51f..00000000 --- a/3.17/core-functionality/canonical-transcripts/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Canonical Transcripts | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

Canonical Transcripts

Overview

One of the more polarizing topics within annotation is the notion of canonical transcripts. Because of alternative splicing, we often have several transcripts for each gene. In the human genome, there are an average of 3.4 transcripts per gene (Tung, 2020). As scientists, we seem to have a need for identifying a representative example of a gene - even if there's no biological basis for the motivation.

Golden Helix Blog

A few years ago, the guys over at Golden Helix wrote an excellent post about the pitfalls and issues surrounding the identification of canonical transcripts: What’s in a Name: The Intricacies of Identifying Variants.

In Nirvana, we wanted to identify an algorithm for determining the canonical transcript and apply it consistently to all of our transcript data sources.

Known Algorithms

UCSC

UCSC publishes a list of canonical transcripts in its knownCanonical table which is available via the TableBrowser. Of the RefSeq data sources, it was the only one we could find that provided canonical transcripts:

The canonical transcript is defined as either the longest CDS, if the gene has translated transcripts, or the longest cDNA.

If you were to implement this and compare it with the knownCanonical table, you would see a lot of exceptions to the rule.

Ensembl

The Ensembl glossary states:

The canonical transcript is used in the gene tree analysis in Ensembl and does not necessarily reflect the most biologically relevant transcript of a gene. For human, the canonical transcript for a gene is set according to the following hierarchy:

  1. Longest CCDS translation with no stop codons.
  2. If no (1), choose the longest Ensembl/Havana merged translation with no stop codons.
  3. If no (2), choose the longest translation with no stop codons.
  4. If no translation, choose the longest non-protein-coding transcript.

ACMG

From the ACMG Guidelines for the Interpretation of Sequence Variants:

A reference transcript for each gene should be used and provided in the report when describing coding variants. The transcript should represent either the longest known transcript and/or the most clinically relevant transcript.

ClinVar

From the ClinVar paper:

When there are multiple transcripts for a gene, ClinVar selects one HGVS expression to construct a preferred name. By default, this selection is based on the first reference standard transcript identified by the RefSeqGene/LRG (Locus Reference Genomic) collaboration.

Unified Approach

Our approach is almost identical to the one Golden Helix discussed in their article:

  1. If we're looking at RefSeq, only consider NM & NR transcripts as candidates for canonical transcripts.
  2. Sort the transcripts in the following order:
    1. Locus Reference Genomic (LRG) entries occur before non-LRG entries
    2. Descending CDS length
    3. Descending transcript length
    4. Ascending accession number
  3. Grab the first entry
- - - - \ No newline at end of file diff --git a/3.17/core-functionality/gene-fusions/index.html b/3.17/core-functionality/gene-fusions/index.html deleted file mode 100644 index c12ccacc..00000000 --- a/3.17/core-functionality/gene-fusions/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Gene Fusion Detection | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

Gene Fusion Detection

Overview

Gene fusions often result from large genomic rearrangements such as structural variants. While WGS secondary analysis pipelines typically contain alignment and variant calling stages, very few of them contain dedicated gene fusion callers. When they are included, they are usually associated with RNA-Seq pipelines where gene fusions can be readily observed.

Since gene fusions are frequently observed in cancer and since many sequencing experiments do not include paired RNA-Seq data, we have added gene fusion detection and annotation to Nirvana.

The rich diversity in gene fusion architectures and their likely mechanisms can be seen below:

Publication

Kumar-Sinha, C., Kalyana-Sundaram, S. & Chinnaiyan, A.M. Landscape of gene fusions in epithelial cancers: seq and ye shall find. Genome Med 7, 129 (2015)

Approach

Nirvana uses structural variant calls to evaluate if they form either putative intra-chromosomal or inter-chromosomal gene fusions. Let's consider two transcripts, NM_014206.3 (TMEM258) and NM_013402.4 (FADS1). Both of these genes are on the reverse strand in the genome. The vertical bar indicates the breakpoint where these transcripts are fused:

TMEM258 &amp; FADS1 transcripts

The above explains where the transcripts are fused together, but it doesn't explain in which orientation. By using the directionality encoded in the translocation breakend, we can rearrange these two transcripts in four ways:

TMEM258 &amp; FADS1 gene fusions

Only two of the combinations yields a fusion contains both the transcription start site (TSS) and the stop codon. In one case, we can even detect an in-frame gene fusion.

Interpreting translocation breakends

At first glance, translocation breakends are a bit daunting. However, once you understand how they work, they're actually quite simple. For more information, we recommend reading section 5.4 in the VCF 4.2 specification.

REFALTMeaning
st[p[piece extending to the right of p is joined after t
st]p]reverse comp piece extending left of p is joined after t
s]p]tpiece extending to the left of p is joined before t
s[p[treverse comp piece extending right of p is joined before t

Variant Types

Specifically we can identify gene fusions from the following structural variant types:

  • deletions (<DEL>)
  • tandem_duplications (<DUP:TANDEM>)
  • inversions (<INV>)
  • translocation breakpoints (AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[)

Criteria

The following criteria must be met for Nirvana to identify a gene fusion:

  1. After accounting for gene orientation and genomic rearrangements, both transcripts must have the same orientation
  2. Both transcripts must be from the same transcript source (i.e. we won't mix and match between RefSeq and Ensembl transcripts)
  3. Both transcripts must belong to different genes
  4. Both transcripts cannot have a coding region that already overlaps without the variant (i.e. in cases where two genes naturally overlap, we don't want to call a gene fusion)

ETV6/RUNX1 Example

ETV6/RUNX1 is the most common gene fusion in childhood B-cell precursor acute lymphoblastic leukemia (ALL). Patients with this translocation are associated with a good prognosis and excellent response to treatment.

VCF

Here's a simplified representation of the translocation breakends called by the Manta structural variant caller:

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
chr12 12026270 . C [chr21:36420865[C . PASS SVTYPE=BND
chr12 12026305 . A A]chr21:36420571] . PASS SVTYPE=BND
chr21 36420571 . C C]chr12:12026305] . PASS SVTYPE=BND
chr21 36420865 . C [chr12:12026270[C . PASS SVTYPE=BND

When you put these calls together, the resulting genomic rearrangement looks something like this:

JSON Output

The annotation for the first variant in the VCF looks like this:

{
"chromosome": "chr12",
"position": 12026270,
"refAllele": "C",
"altAlleles": [
"[chr21:36420865[C"
],
"filters": [
"PASS"
],
"cytogeneticBand": "12p13.2",
"clingen": [
{
"chromosome": "12",
"begin": 173786,
"end": 34835837,
"variantType": "copy_number_gain",
"id": "nsv995956",
"clinicalInterpretation": "pathogenic",
"phenotypes": [
"Decreased calvarial ossification",
"Delayed gross motor development",
"Feeding difficulties",
"Frontal bossing",
"Morphological abnormality of the central nervous system",
"Patchy alopecia"
],
"phenotypeIds": [
"HP:0002007",
"HP:0002011",
"HP:0002194",
"HP:0002232",
"HP:0005474",
"HP:0011968",
"MedGen:C0232466",
"MedGen:C1862862",
"MedGen:CN001816",
"MedGen:CN001820",
"MedGen:CN001989",
"MedGen:CN004852"
],
"observedGains": 1,
"validated": true
}
],
"variants": [
{
"vid": "12-12026270-C-[chr21:36420865[C",
"chromosome": "chr12",
"begin": 12026270,
"end": 12026270,
"isStructuralVariant": true,
"refAllele": "C",
"altAllele": "[chr21:36420865[C",
"variantType": "translocation_breakend",
"cosmicGeneFusions": [
{
"id": "COSF2245",
"numSamples": 249,
"geneSymbols": [
"ETV6",
"RUNX1"
],
"hgvsr": "ENST00000396373.4(ETV6):r.1_1283::ENST00000300305.3(RUNX1):r.504_6222",
"histologies": [
{
"name": "acute lymphoblastic B cell leukaemia",
"numSamples": 169
},
{
"name": "acute lymphoblastic leukaemia",
"numSamples": 80
}
],
"sites": [
{
"name": "haematopoietic and lymphoid tissue",
"numSamples": 249
}
],
"pubMedIds": [
7761424,
7780150,
8609706,
8751464,
8982044,
9067587,
9207408,
9226156,
9628428,
10463610,
10774753,
11091202,
12621238,
12661004,
12750722,
15104290,
15642392,
24557455,
26925663
]
}
],
"fusionCatcher": [
{
"genes": {
"first": {
"hgnc": "ETV6",
"isOncogene": true
},
"second": {
"hgnc": "RUNX1",
"isOncogene": true
}
},
"somaticSources": [
"DepMap CCLE",
"Cancer Genome Project",
"ChimerKB 4.0",
"ChimerPub 4.0",
"ChimerSeq 4.0",
"Known",
"Mitelman DB",
"OncoKB",
"TICdb"
]
}
],
"transcripts": [
{
"transcript": "ENST00000396373.4",
"source": "Ensembl",
"bioType": "protein_coding",
"introns": "5/7",
"geneId": "ENSG00000139083",
"hgnc": "ETV6",
"consequence": [
"transcript_variant",
"unidirectional_gene_fusion"
],
"geneFusions": [
{
"transcript": "ENST00000437180.1",
"bioType": "protein_coding",
"intron": 2,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000437180.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"
},
{
"transcript": "ENST00000300305.3",
"bioType": "protein_coding",
"intron": 1,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000300305.3(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"
},
{
"transcript": "ENST00000482318.1",
"bioType": "nonsense_mediated_decay",
"intron": 2,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000482318.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"
},
{
"transcript": "ENST00000486278.2",
"bioType": "protein_coding",
"intron": 2,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000486278.2(RUNX1):r.?_-15+274::ENST00000396373.4(ETV6):r.1009+3367_?"
},
{
"transcript": "ENST00000455571.1",
"bioType": "protein_coding",
"intron": 2,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000455571.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"
},
{
"transcript": "ENST00000475045.2",
"bioType": "protein_coding",
"intron": 11,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000475045.2(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"
},
{
"transcript": "ENST00000416754.1",
"bioType": "protein_coding",
"intron": 2,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000416754.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"
}
],
"isCanonical": true,
"proteinId": "ENSP00000379658.3"
},
{
"transcript": "NM_001987.4",
"source": "RefSeq",
"bioType": "protein_coding",
"introns": "5/7",
"geneId": "2120",
"hgnc": "ETV6",
"consequence": [
"transcript_variant",
"unidirectional_gene_fusion"
],
"geneFusions": [
{
"transcript": "NM_001754.4",
"bioType": "protein_coding",
"intron": 2,
"geneId": "861",
"hgnc": "RUNX1",
"hgvsr": "NM_001754.4(RUNX1):r.?_58+274::NM_001987.4(ETV6):r.1009+3367_?"
}
],
"isCanonical": true,
"proteinId": "NP_001978.1"
}
]
}
]
}
FieldTypeNotes
transcriptstringtranscript ID
bioTypestringdescriptions of the biotypes from Ensembl
exonintexon that contained fusion breakpoint
intronintintron that contained fusion breakpoint
geneIdstringgene ID. e.g. ENSG00000116062
hgncstringgene symbol. e.g. MSH6
hgvsrstringHGVS RNA nomenclature

Gene Fusion Data Sources

To provide more context to our gene fusions, we provide the following gene fusion data sources:

Consequences

When a gene fusion is identified, we add the following Sequence Ontology consequence:

              "consequence": [
"transcript_variant",
"unidirectional_gene_fusion"
],

Gene Fusions Section

The geneFusions section is contained within the object of the originating transcript. It will contain all the pairwise gene fusions that obey the criteria outline above. In the case of ENST00000396373.4, there 7 other Ensembl transcripts that would produce a gene fusion. For NM_001987.4, there was only one transcript (NM_001754.4) that produce a gene fusion.

For each originating transcript, we report the following for each partner transcript:

  • transcript ID
  • gene ID
  • HGNC gene symbol
  • transcript bio type (e.g. protein_coding)
  • intron or exon number containing the breakpoint
  • HGVS RNA notation
tip

Before Nirvana 3.15, we provided HGVS coding notation. However, HGVS r. notation is more appropriate for these types fusion splicing events (see HGVS SVD-WG007).

          "geneFusions": [
{
"transcript": "NM_001754.4",
"bioType": "protein_coding",
"intron": 2,
"geneId": "861",
"hgnc": "RUNX1",
"hgvsr": "NM_001754.4(RUNX1):r.?_58+274::NM_001987.4(ETV6):r.1009+3367_?"
}
],

The HGVS RNA notation above indicates that the gene fusion starts with NM_001754.4 (RUNX1) until CDS position 58 and continues with NM_001987.4 (ETV6). 1009+3367 indicates that the fusion occurred 3367 bp within intron 2.

- - - - \ No newline at end of file diff --git a/3.17/core-functionality/mnv-recomposition/index.html b/3.17/core-functionality/mnv-recomposition/index.html deleted file mode 100644 index fab80ff5..00000000 --- a/3.17/core-functionality/mnv-recomposition/index.html +++ /dev/null @@ -1,22 +0,0 @@ - - - - - - - -MNV Recomposition | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

MNV Recomposition

Overview

Most annotation tools handle variants independently. The problem with this approach is that nearby variants could affect the same codon leading to a very different annotation. For example, consider the following example (Danecek, 2017):

When handled independently, the two variants (C→T & G→A) would be annotated as missense annotations. However, if we consider them together, the resulting MNV would yield a stop gain.

By default, Nirvana identifies these types of cases where two or more SNVs would affect the same codon. In addition, it's able to perform this operation on VCFs containing large numbers of samples (we've tested this on 2,500+ samples using the 1000 Genomes Project VCF files).

Publication

Petr Danecek, Shane A McCarthy, BCFtools/csq: haplotype-aware variant consequences, Bioinformatics, Volume 33, Issue 13, 1 July 2017, Pages 2037–2039

Supported variant types

At the moment, Nirvana only supports recomposing multiple SNVs into an MNV. The Danecek paper makes a compelling case for supporting frameshifting variants paired with frame-restoring variants. We've also received requests for supporting the recomposition of an SNV with insertions and deletions. While this is something we've looked into, it represents functionality that many of our clinical customers are not yet comfortable with.

Criteria

Nirvana will recompose a set of SNVs if two or more SNVs are located in the same codon for any codon in any of the overlapping transcripts.

The following criteria must also be met for at least one sample:

  1. Genotypes are provided for the VCF variants and all variants are in phase or homozygous variant.
  2. All the available phase set IDs are the same (homozygous variants are available to all phase sets)
  3. The genotype ploidy for all the variants are the same.
  4. No unsupported variant type (i.e. insertion or deletion) overlaps the recomposed variants
  5. The first and last base in at least one of the recomposed alleles must be non-reference.

Examples

During variant recomposition, if two SNVs affect the same codon, it becomes the seed codon. If there are SNVs in the adjacent codons, they will be aggregated into the seed codon.

  • Three SNVs in two adjacent codons. The recomposed alternate allele is ATAG: -

  • Three SNVs in two adjacent codons (larger distance). The recomposed alternate allele is ATATCC: -

  • Nirvana can use multiple reading frames to aggregate the seed codon. In this example, the seed codon is highlighted in green. If we look at reading frame 1, we see that the T→A variant occurs in the ACT codon. The adjacent codon to the left also has a variant C→T. As a result, there can be up to four bases between SNVs when aggregating the flanking codons. The recomposed alternate allele is TTCACATAGCACTCAC: -

  • Nothing will be recomposed if there's no seed codon: -

Multiple Samples

Recomposing variants while handling multiple samples can be complex. The recomposition criteria described above often leads to sample-specific recomposed variants. Here we show the recomposition of three variants with sample-specific criteria marked in bold:

POSREFALTSample 1Sample 2Sample 3
Decomposed Variant 1100AC0|10|11|1
Decomposed Variant 2101CG0/11|10|0
Decomposed Variant 3102TA1|1.0|1
Recomposed Variant 1100ACAG, CG.1|2.
Recomposed Variant 2100ACTCCT, CCA..1|2

In the example above, the heterozygous genotype in sample 1 at position 101 would prevent the MNVs from being recomposed. Similarly, the unknown genotype for sample 2 at position 102 would produce a smaller MNV than the one expressed for sample 3.

Phase Sets

Homozygous variants, same phase set

Recomposed phase set becomes . since homozygous variants belong to all phase sets.

POSREFALTGenotypePhase Set
Decomposed Variant 1100AT1|1567
Decomposed Variant 2101CG1|1567
Recomposed Variant100ACTG1|1.

Mixing phased and unphased variants

POSREFALTGenotypePhase Set
Decomposed Variant 1100AT0|1567
Decomposed Variant 2101CG1/1.
Recomposed Variant100ACAG,TG1|2567

Variants in different phase sets

POSREFALTGenotypePhase Set
Decomposed Variant 1100AT0|1567
Decomposed Variant 2101CG1|1890
Recomposed Variant100ACAG,TG1|2.

Unphased homozygous variants

POSREFALTGenotypePhase Set
Decomposed Variant 1100AT1/1.
Decomposed Variant 2101CG1/1.
Recomposed Variant100ACTG1/1.

Homozygous variants are not commutative

POSREFALTGenotypePhase Set
Decomposed Variant 1100AT0|1567
Decomposed Variant 2101CG1|1567
Decomposed Variant 3102GT0|1890

In this example, the homozygous variant at position 101 cannot bridge the gap between other two variants since there could be a switching error between phase sets 567 & 890. As a result, we have to create two overlapping MNVs:

POSREFALTGenotypePhase Set
Recomposed Variant 1100ACAG, TG1|2567
Recomposed Variant 2101CGGG, GT1|2890

Conflicting Genotypes

JSON Output

Given the following VCF entries:

#CHROM  POS ID  REF ALT QUAL    FILTER  INFO    FORMAT  S1  S2  S3
chr1 12861477 . T C . PASS . GT:PS 0/0:. 0/0:. 0|1:12861477
chr1 12861478 . G A . PASS . GT:PS 0/0:. 0/0:. 0|1:12861477

Each original variant would be annotated as usual. The difference is that both will now have a isDecomposedVariant flag set to true in addition to an entry in the linkedVids field that points to the new MNV:

{
"chromosome":"chr1",
"position":12861477,
"refAllele":"T",
"altAlleles":[
"C"
],
"filters":[
"PASS"
],
"samples":[
{
"genotype":"0/0",
},
{
"genotype":"0/0",
},
{
"genotype":"0|1",
}
],
"variants":[
{
"vid":"1-12861477-T-C",
"chromosome":"chr1",
"begin":12861477,
"end":12861477,
"refAllele":"T",
"altAllele":"C",
"variantType":"SNV",
"isDecomposedVariant":true,
"linkedVids":[
"1-12861477-TG-CA"
],
"hgvsg":"NC_000001.11:g.12861477T>C",
"transcripts":[ ... ]
}
]
},
{
"chromosome":"chr1",
"position":12861478,
"refAllele":"G",
"altAlleles":[
"A"
],
"filters":[
"PASS"
],
"samples":[
{
"genotype":"0/0",
},
{
"genotype":"0/0",
},
{
"genotype":"0|1",
}
],
"variants":[
{
"vid":"1-12861478-G-A",
"chromosome":"chr1",
"begin":12861478,
"end":12861478,
"refAllele":"G",
"altAllele":"A",
"variantType":"SNV",
"isDecomposedVariant":true,
"linkedVids":[
"1-12861477-TG-CA"
],
"hgvsg":"NC_000001.11:g.12861478G>A",
"transcripts":[ ... ]
}
]
}

The recomposed variant gets a separate entry where the isRecomposedVariant flag is set to true and the linkedVids field links to the constituent SNVs:

    {
"chromosome": "chr1",
"position": 12861477,
"refAllele": "TG",
"altAlleles": [
"CA"
],
"filters": [
"PASS"
],
"cytogeneticBand": "1p36.21",
"samples": [
{
"genotype": "0|0"
},
{
"genotype": "0|0"
},
{
"genotype": "0|1"
}
],
"variants": [
{
"vid": "1-12861477-TG-CA",
"chromosome": "chr1",
"begin": 12861477,
"end": 12861478,
"refAllele": "TG",
"altAllele": "CA",
"variantType": "MNV",
"isRecomposedVariant": true,
"linkedVids": [
"1-12861477-T-C",
"1-12861478-G-A"
],
"hgvsg": "NC_000001.11:g.12861477_12861478inv",
"transcripts":[ ... ]
]
}
]
},
Recomposed QUAL, FILTER, and GQ

Although the example above does not demonstrate it, Nirvana tries to set the quality score, filter, and genotype quality (GQ) for the recomposed variant. The QUAL score is calculated to be the minimum QUAL score for all the constituent SNVs. The same method is used for the genotype quality (GQ) scores. For the filters field, PASS will be used if all constituent variants passed their filters, otherwise we set it to FilteredVariantsRecomposed.

- - - - \ No newline at end of file diff --git a/3.17/core-functionality/variant-ids/index.html b/3.17/core-functionality/variant-ids/index.html deleted file mode 100644 index 4e477ce0..00000000 --- a/3.17/core-functionality/variant-ids/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Variant IDs | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

Variant IDs

Overview

Many downstream tools use a variant identifier to store annotation results. We've standardized on using variant identifiers (VIDs) that originated from the notation used by the Broad Institute.

The Broad VID scheme is not only simple, but it has the advantage that a user could create a bare bones VCF entry from the information captured in the identifier. One of the limitations of the Broad VID scheme is that it does not define how to handle structural variants. Our VID scheme attempts to fill that gap.

Conventions
  • all chromosomes use Ensembl style notation (i.e. 22 instead of chr22)
  • for a reference variant (i.e. no alt allele), replace the period (.) with the reference base
  • padding bases are used, neither the reference nor alternate allele can be empty
  • some large variant callers lazily output N for the reference allele. If this is the case, replace it with the true reference base

Small Variants

VCF Examples

chr1    66507   .   T   A   184.45  PASS    .
chr1 66521 . T TATATA 144.53 PASS .
chr1 66572 . GTA G,GTACTATATATTATA 45.45 PASS .

Format

chromosomepositionreference allelealternate allele

VID Examples

  • 1-66507-T-A
  • 1-66521-T-TATATA
  • 1-66572-GTA-G
  • 1-66572-G-GTACTATATATTA

Translocation Breakends

VCF Example

chr1    2617277 .   A   AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[  .   PASS    SVTYPE=BND

Format

chromosomepositionreference allelealternate allele

VID Example

  • 1-2617277-A-AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[

All Other Structural Variants

VCF Examples

chr1    1000    .   G   <ROH>   .   PASS    END=3001000;SVTYPE=ROH
chr1 1350082 . G <DEL> . PASS END=1351320;SVTYPE=DEL
chr1 1477854 . C <DUP:TANDEM> . PASS END=1477984;SVTYPE=DUP
chr1 1477968 . T <INS> . PASS END=1477968;SVTYPE=INS
chr1 1715898 . N <DUP> . PASS SVTYPE=CNV;END=1750149
chr1 2650426 . N <DEL> . PASS SVTYPE=CNV;END=2653074
chr2 321682 . T <INV> . PASS SVTYPE=INV;END=421681
chr20 2633403 . G <STR2> . PASS END=2633421

Format

chromosomepositionend positionreference allelealternate alleleSVTYPE

VID Examples

  • 1-1000-3001000-G-<ROH>-ROH
  • 1-1350082-1351320-G-<DEL>-DEL
  • 1-1477854-1477984-C-<DUP:TANDEM>-DUP
  • 1-1477968-1477968-T-<INS>-INS
  • 1-1715898-1750149-A-<DUP>-CNV (replace the N with A)
  • 1-2650426-2653074-N-<DEL>-CNV (keep the N)
  • 2-321682-421681-T-<INV>-INV
  • 20-2633403-2633421-G-<STR2>-STR
- - - - \ No newline at end of file diff --git a/3.17/data-sources/1000Genomes-snv-json/index.html b/3.17/data-sources/1000Genomes-snv-json/index.html deleted file mode 100644 index a738abd7..00000000 --- a/3.17/data-sources/1000Genomes-snv-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -1000Genomes-snv-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

1000Genomes-snv-json

"oneKg":{
"allAf":0.200879,
"afrAf":0.210287,
"amrAf":0.139769,
"easAf":0.275794,
"eurAf":0.181909,
"sasAf":0.173824,
"allAn":5008,
"afrAn":1322,
"amrAn":694,
"easAn":1008,
"eurAn":1006,
"sasAn":978,
"allAc":1006,
"afrAc":278,
"amrAc":97,
"easAc":278,
"eurAc":183,
"sasAc":170
}
FieldTypeNotes
allAffloatallele frequency for all populations. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
allAnintallele number for all populations. Non-zero integer.
afrAffloatallele frequency for the African super population. Range: 0 - 1.0
afrAcintallele count for the African super population. Integer.
afrAnintallele number for the African super population. Non-zero integer.
amrAffloatallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
amrAcintallele count for the Ad Mixed American super population. Integer.
amrAnintallele number for the Ad Mixed American super population. Non-zero integer.
easAffloatallele frequency for the East Asian super population. Range: 0 - 1.0
easAcintallele count for the East Asian super population. Integer.
easAnintallele number for the East Asian super population. Non-zero integer.
eurAffloatallele frequency for the European super population. Range: 0 - 1.0
eurAcintallele count for the European super population. Integer.
eurAnintallele number for the European super population. Non-zero integer.
sasAffloatallele frequency for the South Asian super population. Range: 0 - 1.0
sasAcintallele count for the South Asian super population. Integer.
sasAnintallele number for the South Asian super population. Non-zero integer.
- - - - \ No newline at end of file diff --git a/3.17/data-sources/1000Genomes-sv-json/index.html b/3.17/data-sources/1000Genomes-sv-json/index.html deleted file mode 100644 index ed833d21..00000000 --- a/3.17/data-sources/1000Genomes-sv-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -1000Genomes-sv-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

1000Genomes-sv-json

"oneKg":[
{
"chromosome":"1",
"begin":1595369,
"end":1612441,
"variantType": "copy_number_variation",
"id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",
"allAn": 5008,
"allAc": 2702,
"allAf": 0.539537,
"afrAf": 0.6052,
"amrAf": 0.3675,
"eurAf": 0.5357,
"easAf": 0.5368,
"sasAf": 0.5797,
"reciprocalOverlap": 0.07555
}
],
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring
idstring
allAnfloating pointallele number for all populations. Non-zero integer.
allAcfloating pointallele count for all populations. Integer.
allAffloating pointallele frequency for all populations. Range: 0 - 1.0
afrAffloating pointallele frequency for the African super population. Range: 0 - 1.0
amrAffloating pointallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
eurAffloating pointallele frequency for the European super population. Range: 0 - 1.0
easAfintegerallele frequency for the East Asian super population. Range: 0 - 1.0
sasAfintegerallele frequency for the South Asian super population. Range: 0 - 1.0
reciprocalOverlapfloating pointrange: 0 - 1.
- - - - \ No newline at end of file diff --git a/3.17/data-sources/1000Genomes/index.html b/3.17/data-sources/1000Genomes/index.html deleted file mode 100644 index ce75af4f..00000000 --- a/3.17/data-sources/1000Genomes/index.html +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - - -1000 Genomes | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

1000 Genomes

Overview

The goal of the 1000 Genomes Project was to find most genetic variants with frequencies of at least 1% in the populations studied. It was the first project to sequence the genomes of a large number of people, to provide a comprehensive resource on human genetic variation. Data from the 1000 Genomes Project was quickly made available to the worldwide scientific community through freely accessible public databases.

Publication

Sudmant, P., Rausch, T., Gardner, E. et al. An integrated map of structural variation in 2,504 human genomes. Nature 526, 75–81 (2015). https://doi.org/10.1038/nature15394

Populations

Small Variants

VCF File Parsing

The original VCF files come with allele frequency fields (e.g. ALL_AF, AMR_AF) but we recompute them using allele counts and allele numbers in order to get 6 digit precision. The allele counts and allele numbers (e.g. AMR_AC, AMR_AN) are not expressed in the INFO field. Instead the genotypes need to be parsed to compute that information. Our team converted the original data to VCF entries with allele counts and allele numbers like the following.

#CHROM  POS ID  REF ALT QUAL    FILTER  INFO
1 15274 rs62636497 A G,T 100 PASS AC=1739,3210;AF=0.347244,0.640974;AN=5008;NS=2504;DP=23255;EAS_AF=0.4812,0.5188;AMR_AF=0.2752,0.7205;AFR_AF=0.323,0.6369;EUR_AF=0.2922,0.7078;SAS_AF=0.3497,0.6472;AA=g|||;VT=SNP;MULTI_ALLELIC;EAS_AN=1008;EAS_AC=485,523;EUR_AN=1006;EUR_AC=294,712;AFR_AN=1322;AFR_AC=427,842;AMR_AN=694;AMR_AC=191,500;SAS_AN=978;SAS_AC=342,633

The ancestral allele, if it exists, is the first value in the pipe separated AA fields (the Indel specific REF, ALT, IndelType fields are ignored).

We parse the VCF file and extract the following fields from INFO:

  • AA
  • AC
  • AN
  • EAS_AN
  • AMR_AN
  • AFR_AN
  • EUR_AN
  • SAS_AN
  • EAS_AC
  • AMR_AC
  • AFR_AC
  • EUR_AC
  • SAS_AC

Conflict Resolution

We have observed conflicting allele frequency information in the source. Take the following example:

#CHROM  POS ID  REF ALT QUAL    FILTER  INFO
1 20505705 rs35377696 C CTCTG,CTG,CTGTG 100 PASS AC=46,1513,152;AF=0.0091853,0.302117,0.0303514;
1 20505705 rs35377696 C CTG 100 PASS AC=4;AF=0.000798722;

That is, the variant 1-20505705-C-CTG has conflicting entries. To get an idea of how frequently we observe this, here is a table summarizing ChrX and all chromosomes. Note that almost all such entries are found in ChrX.

Chromosome# of alleles# of conflicting allelespercentage
chrX83480027330.33%
Total2141309827430.013%

Currently, we removed the allele frequency of the conflicting allele (i.e., insertion TG in the example) but keep allele frequencies of all other alleles in the VCF line.

Potential Alternate Solutions

  • Remove all alleles that are contained in the vcf lines which have conflicting allele. (Recommended by 1000 genome group Holly Zheng-Bradley, 7/29/2015)
  • Recalculate the allele frequency for the conflicting allele.
  • Pick the allele frequency that has the highest data support.

Download URL

GRCh37 -GRCh38

JSON Output

"oneKg":{
"allAf":0.200879,
"afrAf":0.210287,
"amrAf":0.139769,
"easAf":0.275794,
"eurAf":0.181909,
"sasAf":0.173824,
"allAn":5008,
"afrAn":1322,
"amrAn":694,
"easAn":1008,
"eurAn":1006,
"sasAn":978,
"allAc":1006,
"afrAc":278,
"amrAc":97,
"easAc":278,
"eurAc":183,
"sasAc":170
}
FieldTypeNotes
allAffloatallele frequency for all populations. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
allAnintallele number for all populations. Non-zero integer.
afrAffloatallele frequency for the African super population. Range: 0 - 1.0
afrAcintallele count for the African super population. Integer.
afrAnintallele number for the African super population. Non-zero integer.
amrAffloatallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
amrAcintallele count for the Ad Mixed American super population. Integer.
amrAnintallele number for the Ad Mixed American super population. Non-zero integer.
easAffloatallele frequency for the East Asian super population. Range: 0 - 1.0
easAcintallele count for the East Asian super population. Integer.
easAnintallele number for the East Asian super population. Non-zero integer.
eurAffloatallele frequency for the European super population. Range: 0 - 1.0
eurAcintallele count for the European super population. Integer.
eurAnintallele number for the European super population. Non-zero integer.
sasAffloatallele frequency for the South Asian super population. Range: 0 - 1.0
sasAcintallele count for the South Asian super population. Integer.
sasAnintallele number for the South Asian super population. Non-zero integer.

Structural Variants

VCF File Parsing

The VCF files contain entries like the following:

#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103
22 16050654 esv3647175;esv3647176;esv3647177;esv3647178 A <CN0>,<CN2>,<CN3>,<CN4> 100 PASS AC=9,87,599,20;AF=0.00179712,0.0173722,0.119609,0.00399361;AN=5008;CS=DUP_gs;END=16063474;NS=2504;SVTYPE=CNV;DP=22545;EAS_AF=0.001,0.0169,0.2361,0.0099;AMR_AF=0,0.0101,0.219,0.0072;AFR_AF=0.0061,0.0363,0.0053,0;EUR_AF=0,0.007,0.0944,0.003;SAS_AF=0,0.0082,0.1094,0.002;VT=SV GT 3|0 0|0 0|0 0|0 0|0 0|0 0|4

Please note that, CNVs are allele-specific. For example, HG00096 is effectively copy number 4, which would be a net gain on chr22.

1000 Genomes contains 5 types of structural variants:

  • CNV
  • DEL
  • DUP
  • INS
  • INV

Since data of 1000 genomes is provided in VCF format, we assume that the coordinates follow the vcf format, i.e., there is a padding base for symbolic alleles. So all the interval can be interpreted as [BEGIN+1, END]. -Similarly, for all other variant types except insertion, END is far larger than BEGIN. The distribution of BEGIN and END for insertions is summarized below.

Insertion issues

  • END = BEGIN for 6/165
  • END = BEGIN+2 for 93/165
  • END = BEGIN+3 for 11/165
  • END = BEGIN+4 for 11/165
  • END – BEGIN range from 5 to 1156 for others.

Converting VCF svTypes to SO sequence alterations

The svType will be captured in our JSON file under the sequenceAlteration key. Here's the translation we'll use according to svType in 1000 Genomes.

svTypeAlternative Alleles contain <CN*>sequenceAlteration
ALUFALSEmobile_element_insertion
DUPTRUEcopy_number_gain
CNVTRUEcopy_number_gain (observed_gains >0 and observed_losses =0)
copy_number_loss (observed_gains = 0 and observed_losses > 0)
copy_number_variation (otherwise)
DELTRUEcopy_number_loss
LINE1FALSEmobile_element_insertion
SVAFALSEmobile_element_insertion
INVFALSEinversion
INSFALSEinsertion

Exceptions

We discard structural variants without END

#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103
21 9495848 esv3646347 A <INS:ME:LINE1> 100 PASS AC=1543;AF=0.308107;AN=5008;CS=L1_umary;MEINFO=LINE1,5669,6005,+;NS=2504;SVLEN=336;SVTYPE=LINE1;TSD=null;DP=20015;EAS_AF=0.3125;AMR_AF=0.2911;AFR_AF=0.3026;EUR_AF=0.2922;SAS_AF=0.3395;VT=SV GT 0|0 1|1 1|0 0|1 1|0 1|0 0|0

CNVs in chrY

  • No other types of structural variants exist in chrY
  • Since copy number is provided in genotype field, we directly parse the copy number from "CN" field.
  • For most CNVs in chrY, the reference copy number is 1, but the refence number for CNVs in segmental duplication sites is 2 (<CN2> in the 2nd example). All segmental duplication calls have identifiers starting with GS_SD_M2.
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  HG00096 HG00101 HG00103 HG00105 HG00107 HG00108
Y 2888555 CNV_Y_2888555_3014661 T <CN2> 100 PASS AC=1;AF=0.000817661;AN=1223;END=3014661;NS=1233;SVTYPE=CNV;AMR_AF=0.0000;AFR_AF=0.0000;EUR_AF=0.0000;SAS_AF=0.0019;EAS_AF=0.0000;VT=SV GT:CN:CNL:CNP:CNQ:GP:GQ:PL 0:1:-1000,0,-58.45:-1000,0,-61.55:99:0,-61.55:99:0,585 0:1:-296.36,0,-16.6:-300.46,0,-19.7:99:0,-19.7:99:0,166 0:1:-1000,0,-39.44:-1000,0,-42.54:99:0,-42.54:99:0,394
Y 6128381 GS_SD_M2_Y_6128381_6230094_Y_9650284_9752225 C <CN1>,<CN3> 100 PASS AC=4,2;AF=0.00327065,0.00163532;AN=1223;END=6230094;NS=1233;SVTYPE=CNV;AMR_AF=0.0029,0.0029;AFR_AF=0.0016,0.0016;EUR_AF=0.0000,0.0000;SAS_AF=0.0038,0.0000;EAS_AF=0.0000,0.0000;VT=SV;EX_TARGET GT:CN:CNL:CNP:CNQ:GP:GQ 0:2:-1000,-138.78,0,-38.53:-1000,-141.27,0,-41.33:99:0,-141.27,-41.33:99 0:2:-1000,-53.32,0,-17.85:-1000,-55.81,0,-20.64:99:0,-55.81,-20.64:99 0:2:-1000,-71.83,0,-32.5:-1000,-74.32,0,-35.29:99:0,-74.32,-35.29:99 0:2:-1000,-60.96,0,-20.29:-1000,-63.45,0,-23.08:99:0,-63.45,-23.08:99 0:2:-1000,-77.6,0,-31.45:-1000,-80.09,0,-34.24:99:0,-80.09,-34.24:99

JSON Output

"oneKg":[
{
"chromosome":"1",
"begin":1595369,
"end":1612441,
"variantType": "copy_number_variation",
"id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",
"allAn": 5008,
"allAc": 2702,
"allAf": 0.539537,
"afrAf": 0.6052,
"amrAf": 0.3675,
"eurAf": 0.5357,
"easAf": 0.5368,
"sasAf": 0.5797,
"reciprocalOverlap": 0.07555
}
],
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring
idstring
allAnfloating pointallele number for all populations. Non-zero integer.
allAcfloating pointallele count for all populations. Integer.
allAffloating pointallele frequency for all populations. Range: 0 - 1.0
afrAffloating pointallele frequency for the African super population. Range: 0 - 1.0
amrAffloating pointallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
eurAffloating pointallele frequency for the European super population. Range: 0 - 1.0
easAfintegerallele frequency for the East Asian super population. Range: 0 - 1.0
sasAfintegerallele frequency for the South Asian super population. Range: 0 - 1.0
reciprocalOverlapfloating pointrange: 0 - 1.
- - - - \ No newline at end of file diff --git a/3.17/data-sources/amino-acid-conservation-json/index.html b/3.17/data-sources/amino-acid-conservation-json/index.html deleted file mode 100644 index 61da2eee..00000000 --- a/3.17/data-sources/amino-acid-conservation-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -amino-acid-conservation-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

amino-acid-conservation-json

"aminoAcidConservation": {
"scores": [0.34]
}
FieldTypeNotes
aminoAcidConservationobject
scoresobject array of doublespercent conserved with respect to human amino acid residue. Range: 0.01 - 1.00
- - - - \ No newline at end of file diff --git a/3.17/data-sources/amino-acid-conservation/index.html b/3.17/data-sources/amino-acid-conservation/index.html deleted file mode 100644 index c8b295ae..00000000 --- a/3.17/data-sources/amino-acid-conservation/index.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - -Amino Acid Conservation | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

Amino Acid Conservation

Overview

Amino acid conservation scores are obtained from multiple alignments of vertebrate exomes to the human ones. The score indicate the frequency with which a particular AA is observed in Humans.

Publication

Siepel A, Bejerano G, Pedersen JS, Hinrichs AS, Hou M, Rosenbloom K, Clawson H, Spieth J, Hillier LW, Richards S, et al. Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. Genome Res. 2005 Aug;15(8):1034-50. (http://www.genome.org/cgi/doi/10.1101/gr.3715005)

FASTA File

The exon alignments are provided in FASTA files as follows:

>ENST00000641515.2_hg38_1_2 3 0 0 chr1:65565-65573+
MKK
>ENST00000641515.2_panTro4_1_2 3 0 0 chrUn_GL393541:146907-146915+
MKK
>ENST00000641515.2_gorGor3_1_2 3 0 0
---
>ENST00000641515.2_ponAbe2_1_2 3 0 0 chr15:99141417-99141425-
MKK
>ENST00000641515.2_hg38_2_2 324 0 0 chr1:69037-70008+
VTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLLHFFGGSEMVILIAMGFDRYIAICKPLHYTTIMCGNACVGIMAVTWGIGFLHSVSQLAFAVHLLFCGPNEVDSFYCDLPRVIKLACTDTYRLDIMVIANSGVLTVCSFVLLIISYTIILMTIQHRPLDKSSKALSTLTAHITVVLLFFGPCVFIYAWPFPIKSLDKFLAVFYSVITPLLNPIIYTLRNKDMKTAIRQLRKWDAHSSVKFZ
>ENST00000641515.2_panTro4_2_2 324 0 0 chrUn_GL393541:151333-152303+

Parsing FASTA

For each Ensembl transcript, we will need to aggregate all the exons together for each of the 100 species. From there, we should get a full alignment that can be used to determine conservation. For example, for ENST00000641515.2 we have:

Human (hg38) MKKVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLL
Chimp MKKVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFL-MLFFVFYGGIVFGNLLIVRIVVSDSHLHSPMYFLLANLSLIDLSLCSVTAPKMITDFFSQRKVISFKGCLVQIFLL
Gorilla ----------------------------------------------------------------------------------------------------------------------
Orangutan MKKVTAEAISWNESTSKTNNSVVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVIIVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLL
Gibbon ----------------------------------------------------------------------------------------------------------------------
Rhesus MKKVTEAAISWNESTSETNNSIVTEFIFLGLSDSQELQIFLFVLFLVFYGGIVFGNLLIVITVVSDSHLHSPMYLLLANLSVVDLSLSSVTAPKMITDFFSQRKAISFKGCLVQIFLL
Macaque MKKVTEAAISWNESTSETNNSIVTEFIFLGLSDSQELQIFLFVLFLVFYGGIVFGNLLIVITVVSDSHLHSPMYLLLANLSVIDLSLSSVTAPKMITDFFSQRKAISFKGCLVQIFLL

If we look at position 6, we see that humans have an Alanine (A) residue. This residue is shared by Chimp and Orangutan. However, Rhesus and Macaque have a Glutamic acid (E) residue at that position. Moreover, Gorilla and Gibbon don't even have data for that transcript. -For position 6, we would say that we have 43% conservation (3/7) since three organisms share the same residue as humans.

Assigning scores to Nirvana transcripts

The source FASTA file comes with Ensembl/UCSC transcript ids of the transcripts used for alignments. The Nirvana cache has RefSeq and Ensembl transcripts and our first attempt was to map the given Ensembl/UCSC ids to their equivalent RefSeq/Ensembl ids. This attempt was unsuccessful since UCSC Table Browser provided mapping without version numbers. So we proceeded as follows:

  • Take proteins which have a unique mapping (and hence one set of conservation scores). For ones that mapped to both ChrX and ChrY, we accepted the one from ChrX.
  • A Nirvana transcript having an exact peptide sequence match with a uniquely aligned protein is assigned the corresponding conservation scores.

Unfortunately this left us with a very small number of transcripts having conservation scores.

GRCh37

  • Source FASTA contained 41957 protein alignments.
  • 38165 proteins had unique scores.
  • 88 aligned proteins existed in Nirvana cache.
  • 118 transcripts had conservation scores.

GRCh38

  • Source FASTA contained 110024 protein alignments.
  • 88961 proteins had unique scores.
  • 11688 aligned proteins existed in Nirvana cache.
  • 12098 transcripts had conservation scores.

Download URL

GRCh37: http://hgdownload.soe.ucsc.edu/goldenPath/hg19/multiz100way/alignments/knownGene.exonAA.fa.gz

GRCh38: http://hgdownload.soe.ucsc.edu/goldenPath/hg38/multiz100way/alignments/knownGene.exonAA.fa.gz

JSON Output

Conservation scores are reported in the transcript section. One score is reported for each alt allele

"aminoAcidConservation": {
"scores": [0.34]
}
FieldTypeNotes
aminoAcidConservationobject
scoresobject array of doublespercent conserved with respect to human amino acid residue. Range: 0.01 - 1.00
- - - - \ No newline at end of file diff --git a/3.17/data-sources/clingen-dosage-json/index.html b/3.17/data-sources/clingen-dosage-json/index.html deleted file mode 100644 index d3493548..00000000 --- a/3.17/data-sources/clingen-dosage-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -clingen-dosage-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

clingen-dosage-json

"clingenDosageSensitivityMap": [{
"chromosome": "15",
"begin": 30900686,
"end": 32153204,
"haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",
"reciprocalOverlap": 0.00147,
"annotationOverlap": 0.33994
},
{
"chromosome": "15",
"begin": 31727418,
"end": 32153204,
"haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "dosage sensitivity unlikely",
"reciprocalOverlap": 0.00147,
"annotationOverlap": 1
}]
FieldTypeNotes
clingenDosageSensitivityMapobject array
chromosomestringEnsembl-style chromosome names
begininteger1-based position
endinteger1-based position
haploinsufficiencystringsee possible values below
triplosensitivitystring(same as haploinsufficiency) 
reciprocalOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).
annotationOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).

haploinsufficiency and triplosensitivity

  • no evidence to suggest that dosage sensitivity is associated with clinical phenotype
  • little evidence suggesting dosage sensitivity is associated with clinical phenotype
  • emerging evidence suggesting dosage sensitivity is associated with clinical phenotype
  • sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype
  • gene associated with autosomal recessive phenotype
  • dosage sensitivity unlikely
- - - - \ No newline at end of file diff --git a/3.17/data-sources/clingen-gene-validity-json/index.html b/3.17/data-sources/clingen-gene-validity-json/index.html deleted file mode 100644 index 8c130f40..00000000 --- a/3.17/data-sources/clingen-gene-validity-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -clingen-gene-validity-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

clingen-gene-validity-json

"clingenGeneValidity":[
{
"diseaseId":"MONDO_0007893",
"disease":"Noonan syndrome with multiple lentigines",
"classification":"no reported evidence",
"classificationDate":"2018-06-07"
},
{
"diseaseId":"MONDO_0015280",
"disease":"cardiofaciocutaneous syndrome",
"classification":"no reported evidence",
"classificationDate":"2018-06-07"
}
]
FieldTypeNotes
clingenGeneValidityobject
diseaseIdstringMonarch Disease Ontology ID (MONDO)
diseasestringdisease label
classificationstringsee below for possible values
classificationDatestringyyyy-MM-dd

classification

  • no reported evidence
  • disputed
  • limited
  • moderate
  • definitive
  • strong
  • refuted
- - - - \ No newline at end of file diff --git a/3.17/data-sources/clingen-json/index.html b/3.17/data-sources/clingen-json/index.html deleted file mode 100644 index 0b41b172..00000000 --- a/3.17/data-sources/clingen-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -clingen-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

clingen-json

"clingen":[
{
"chromosome":"17",
"begin":525,
"end":14667519,
"variantType":"copy_number_gain",
"id":"nsv996083",
"clinicalInterpretation":"pathogenic",
"observedGains":1,
"validated":true,
"phenotypes":[
"Intrauterine growth retardation"
],
"phenotypeIds":[
"HP:0001511",
"MedGen:C1853481"
],
"reciprocalOverlap":0.00131
},
{
"chromosome":"17",
"begin":45835,
"end":7600330,
"variantType":"copy_number_loss",
"id":"nsv869419",
"clinicalInterpretation":"pathogenic",
"observedLosses":1,
"validated":true,
"phenotypes":[
"Developmental delay AND/OR other significant developmental or morphological phenotypes"
],
"reciprocalOverlap":0.00254
}
]
FieldTypeNotes
clingenobject array
chromosomestringEnsembl-style chromosome names
begininteger1-based position
endinteger1-based position
variantTypestringAny of the sequence alterations defined here.
idstringIdentifier from the data source. Alternatively a VID
clinicalInterpretationstringsee possible values below
observedGainsintegerRange: 0 - (231 - 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.
observedLossesintegerRange: 0 - (231 - 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.
validatedboolean
phenotypesstring arrayDescription of the phenotype.
phenotypeIdsstring arrayDescription of the phenotype IDs.
reciprocalOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).

clinicalInterpretation

  • benign
  • curated benign
  • curated pathogenic
  • likely benign
  • likely pathogenic
  • path gain
  • path loss
  • pathogenic
  • uncertain
- - - - \ No newline at end of file diff --git a/3.17/data-sources/clingen/index.html b/3.17/data-sources/clingen/index.html deleted file mode 100644 index 4beee218..00000000 --- a/3.17/data-sources/clingen/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -ClinGen | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

ClinGen

Overview

ClinGen is a National Institutes of Health (NIH)-funded resource dedicated to building a central resource that defines the clinical relevance of genes and variants for use in precision medicine and research.

Publication

Heidi L. Rehm, Ph.D., Jonathan S. Berg, M.D., Ph.D., Lisa D. Brooks, Ph.D., Carlos D. Bustamante, Ph.D., James P. Evans, M.D., Ph.D., Melissa J. Landrum, Ph.D., David H. Ledbetter, Ph.D., Donna R. Maglott, Ph.D., Christa Lese Martin, Ph.D., Robert L. Nussbaum, M.D., Sharon E. Plon, M.D., Ph.D., Erin M. Ramos, Ph.D., Stephen T. Sherry, Ph.D., and Michael S. Watson, Ph.D., for ClinGen. ClinGen The Clinical Genome Resource. N Engl J Med 2015; 372:2235-2242 June 4, 2015 DOI: 10.1056/NEJMsr1406261.

ISCA Regions

TSV Extraction

ClinGen contains only copy number variation variants, since the coordinates in ClinGen original file follow the same rule as BED format, the coordinates had to be adjusted to [BEGIN+1, END].

#bin    chrom   chromStart      chromEnd        name    score   strand  thickStart      thickEnd        attrCount       attrTags        attrVals
nsv530705 1 564405 8597804 0 1 copy_number_loss pathogenic False Developmental delay AND/OR other significant developmental or morphological phenotypes
nsv530706 1 564424 3262790 0 1 copy_number_loss pathogenic False Abnormal facial shape,Abnormality of cardiac morphology,Global developmental delay,Muscular hypotonia HP:0001252,HP:0001263,HP:0001627,HP:0001999,MedGen:CN001147,MedGen:CN001157,MedGen:CN001482,MedGen:CN001810
nsv530707 1 564424 7068738 0 1 copy_number_loss pathogenic False Abnormality of cardiac morphology,Cleft upper lip,Failure to thrive,Global developmental delay,Intrauterine growth retardation,Microcephaly,Short stature HP:0000204,HP:0000252,HP:0001263,HP:0001508,HP:0001511,HP:0001627,HP:0004322,MedGen:C0349588,MedGen:C1845868,MedGen:C1853481,MedGen:C2364119,MedGen:CN000197,MedGen:CN001157,MedGen:CN001482
nsv533512 1 564435 649748 0 1 copy_number_loss benign False Developmental delay AND/OR other significant developmental or morphological phenotypes
nsv931338 1 714078 4958499 0 1 copy_number_loss pathogenic False Developmental delay AND/OR other significant developmental or morphological phenotypes
nsv530300 1 728138 5066371 1 0 copy_number_gain pathogenic False Abnormality of cardiac morphology,Cleft palate,Global developmental delay HP:0000175,HP:0001263,HP:0001627,MedGen:C2240378,MedGen:CN001157,MedGen:CN001482

Status levels

  • benign
  • curated benign
  • curated pathogenic
  • likely benign
  • likely pathogenic
  • path gain
  • path loss
  • pathogenic
  • uncertain

Parsing

We parse the ClinGen tsv file and extract the following:

  • chrom
  • chromStart (note this a 0-based coordinate)
  • chromEnd
  • attrTags
  • attrVals

attrTags and attrVals are comma separated lists. attrTags contains the field keys and attrVals contains the field values. We will parse the following keys from the two fields:

  • parent (this will be used as the ID in our JSON output)
  • clinical_int
  • validated
  • phenotype (this should be a string array)
  • phenotype_id (this should be a string array)

Observed losses and observed gains will be calculated from entries that share a common parent ID.

  • variants with a common parent ID and same coordinates are grouped
    • calculated observed losses, observed gains for each group
    • Clinical significance and validation status are collapsed using the priority strategy described below
  • Variants with the same parent ID can have different coordinates (mapped to hg38)
    • nsv491508 : chr14:105583663-106881350 and chr14:105605043-106766076 (only one example)
    • we kept both variants

Conflict Resolution

Clinical significance priority

When there are a mixture of variants belonging to the same parent ID, we will choose the most pathogenic clinical significance from the available values. i.e. if 3 samples were deemed pathogenic and 2 samples were likely pathogenic, we would list the variant as pathogenic.

Priority (high to low)

  • Priority
  • Pathogenic
  • Likely pathogenic
  • Benign
  • Likely benign
  • Uncertain significance

Validation Priority

When there are a mixture of variants belonging to same parent ID, we will set the validation status to true if any of the variants were validated.

Download URL

https://cirm.ucsc.edu/cgi-bin/hgTrackUi?db=hg19&g=iscaComposite

JSON Output

"clingen":[
{
"chromosome":"17",
"begin":525,
"end":14667519,
"variantType":"copy_number_gain",
"id":"nsv996083",
"clinicalInterpretation":"pathogenic",
"observedGains":1,
"validated":true,
"phenotypes":[
"Intrauterine growth retardation"
],
"phenotypeIds":[
"HP:0001511",
"MedGen:C1853481"
],
"reciprocalOverlap":0.00131
},
{
"chromosome":"17",
"begin":45835,
"end":7600330,
"variantType":"copy_number_loss",
"id":"nsv869419",
"clinicalInterpretation":"pathogenic",
"observedLosses":1,
"validated":true,
"phenotypes":[
"Developmental delay AND/OR other significant developmental or morphological phenotypes"
],
"reciprocalOverlap":0.00254
}
]
FieldTypeNotes
clingenobject array
chromosomestringEnsembl-style chromosome names
begininteger1-based position
endinteger1-based position
variantTypestringAny of the sequence alterations defined here.
idstringIdentifier from the data source. Alternatively a VID
clinicalInterpretationstringsee possible values below
observedGainsintegerRange: 0 - (231 - 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.
observedLossesintegerRange: 0 - (231 - 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.
validatedboolean
phenotypesstring arrayDescription of the phenotype.
phenotypeIdsstring arrayDescription of the phenotype IDs.
reciprocalOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).

clinicalInterpretation

  • benign
  • curated benign
  • curated pathogenic
  • likely benign
  • likely pathogenic
  • path gain
  • path loss
  • pathogenic
  • uncertain

Dosage Sensitivity Map

The Clinical Genome Resource (ClinGen) consortium is curating genes and regions of the genome to assess whether there is evidence to support that these genes/regions are dosage sensitive and should be targeted on a cytogenomic array. Nirvana reports these annotations for overlapping SVs.

Publication

Riggs ER, Nelson T, Merz A, Ackley T, Bunke B, Collins CD, Collinson MN, Fan YS, Goodenberger ML, Golden DM, Haglund-Hazy L, Krgovic D, Lamb AN, Lewis Z, Li G, Liu Y, Meck J, Neufeld-Kaiser W, Runke CK, Sanmann JN, Stavropoulos DJ, Strong E, Su M, Tayeh MK, Kokalj Vokac N, Thorland EC, Andersen E, Martin CL. Copy number variant discrepancy resolution using the ClinGen dosage sensitivity map results in updated clinical interpretations in ClinVar. Hum Mutat. 2018 Nov;39(11):1650-1659. doi: 10.1002/humu.23610. PMID: 30095202; PMCID: PMC7374944.

TSV Source files

Regions

#ClinGen Region Curation Results
#07 May,2019
#Genomic Locations are reported on GRCh38 (hg38): GCF_000001405.36
#https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen
#to create link: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/clingen_region.cgi?id=key
#ISCA ID ISCA Region Name cytoBand Genomic Location Haploinsufficiency Score Haploinsufficiency Description Haploinsufficiency PMID1 Haploinsufficiency PMID2 Haploinsufficiency PMID3 Triplosensitivity Score Triplosensitivity Description Triplosensitivity PMID1 Triplosensitivity PMID2 Triplosensitivity PMID3 Date Last Evaluated Loss phenotype OMIM ID Triplosensitive phenotype OMIM ID
ISCA-46299 Xp11.22 region (includes HUWE1) Xp11.22 tbd 0 No evidence available 3 Sufficient evidence for dosage pathogenicity 22840365 20655035 26692240 2018-11-19
ISCA-46295 15q13.3 recurrent region (D-CHRNA7 to BP5) (includes CHRNA7 and OTUD7A) 15q13.3 chr15:31727418-32153204 3 Sufficient evidence for dosage pathogenicity 19898479 20236110 22775350 40 Dosage sensitivity unlikely 26968334 22420048 2018-05-10
ISCA-46291 7q11.23 recurrent distal region (includes HIP1, YWHAG) 7q11.23 chr7:75528718-76433859 2 Some evidence for dosage pathogenicity 21109226 16971481 1 Little evidence for dosage pathogenicity 21109226 27867344 2018-12-31
ISCA-46290 Xp11.22p11.23 recurrent region (includes SHROOM4) Xp11.22-p11.23 chrX: 48447780-52444264 0 No evidence available 3 Sufficient evidence for dosage pathogenicity 19716111 21418194 25425167 2017-12-14 300801

Genes

#ClinGen Gene Curation Results
#24 May,2019
#Genomic Locations are reported on GRCh37 (hg19): GCF_000001405.13
#https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen
#to create link: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/clingen_gene.cgi?sym=Gene Symbol
#Gene Symbol Gene ID cytoBand Genomic Location Haploinsufficiency Score Haploinsufficiency Description Haploinsufficiency PMID1 Haploinsufficiency PMID2 Haploinsufficiency PMID3 Triplosensitivity Score Triplosensitivity Description Triplosensitivity PMID1 Triplosensitivity PMID2 Triplosensitivity PMID3 Date Last Evaluated Loss phenotype OMIM ID Triplosensitive phenotype OMIM ID
A4GALT 53947 22q13.2 chr22:43088121-43117307 30 Gene associated with autosomal recessive phenotype 0 No evidence available 2014-12-11 111400
AAGAB 79719 15q23 chr15:67493013-67547536 3 Sufficient evidence for dosage pathogenicity 23064416 23000146 0 No evidence available 2013-02-28 148600

Dosage Rating System

RatingPossible Clinical Interpretation
0No evidence to suggest that dosage sensitivity is associated with clinical phenotype
1Little evidence suggesting dosage sensitivity is associated with clinical phenotype
2Emerging evidence suggesting dosage sensitivity is associated with clinical phenotype
3Sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype
30Gene associated with autosomal recessive phenotype
40Dosage sensitivity unlikely

Reference: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/help.shtml

Download URL

ftp://ftp.clinicalgenome.org/

JSON Output

"clingenDosageSensitivityMap": [{
"chromosome": "15",
"begin": 30900686,
"end": 32153204,
"haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",
"reciprocalOverlap": 0.00147,
"annotationOverlap": 0.33994
},
{
"chromosome": "15",
"begin": 31727418,
"end": 32153204,
"haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "dosage sensitivity unlikely",
"reciprocalOverlap": 0.00147,
"annotationOverlap": 1
}]
FieldTypeNotes
clingenDosageSensitivityMapobject array
chromosomestringEnsembl-style chromosome names
begininteger1-based position
endinteger1-based position
haploinsufficiencystringsee possible values below
triplosensitivitystring(same as haploinsufficiency) 
reciprocalOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).
annotationOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).

haploinsufficiency and triplosensitivity

  • no evidence to suggest that dosage sensitivity is associated with clinical phenotype
  • little evidence suggesting dosage sensitivity is associated with clinical phenotype
  • emerging evidence suggesting dosage sensitivity is associated with clinical phenotype
  • sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype
  • gene associated with autosomal recessive phenotype
  • dosage sensitivity unlikely

Gene-Disease Validity

The ClinGen Gene-Disease Clinical Validity curation process involves evaluating the strength of evidence supporting or refuting a claim that variation in a particular gene causes a particular disease. Nirvana reports these annotations for genes in the genes section of the JSON.

Publication

Strande NT, Riggs ER, Buchanan AH, et al. Evaluating the Clinical Validity of Gene-Disease Associations: An Evidence-Based Framework Developed by the Clinical Genome Resource. Am J Hum Genet. 2017;100(6):895-906. doi:10.1016/j.ajhg.2017.04.015

Source TSV

The source data comes in a CSV file that we convert to a TSV as follows:

CLINGEN GENE VALIDITY CURATIONS
FILE CREATED: 2019-05-28
WEBPAGE: https://search.clinicalgenome.org/kb/gene-validity
+++++++++++,++++++++++++++,+++++++++++++,++++++++++++++++++,+++++++++,++++++++++++++,+++++++++++++,+++++++++++++++++++
GENE SYMBOL,GENE ID (HGNC),DISEASE LABEL,DISEASE ID (MONDO),SOP,CLASSIFICATION,ONLINE REPORT,CLASSIFICATION DATE
+++++++++++,++++++++++++++,+++++++++++++,++++++++++++++++++,+++++++++,++++++++++++++,+++++++++++++,+++++++++++++++++++
A2ML1,HGNC:23336,Noonan syndrome with multiple lentigines,MONDO_0007893,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/59b87033-dd91-4f1e-aec1-c9b1f5124b16--2018-06-07T14:37:47,2018-06-07T14:37:47.175Z
A2ML1,HGNC:23336,cardiofaciocutaneous syndrome,MONDO_0015280,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/fc3c41d8-8497-489b-a350-c9e30016bc6a--2018-06-07T14:31:03,2018-06-07T14:31:03.696Z
A2ML1,HGNC:23336,Costello syndrome,MONDO_0009026,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/ea72ba8d-cf62-44bc-86be-da64e3848eba--2018-06-07T14:34:05,2018-06-07T14:34:05.324Z

Download URL

https://search.clinicalgenome.org/kb/gene-validity.csv

Conflict Resolution

Multiple Classifications

Here is an example of multiple classifications.

$ grep MONDO_0010192 ClinGen-Gene-Disease-Summary-2019-12-02.csv  | grep EDNRB
EDNRB,HGNC:3180,Waardenburg syndrome type 4A,MONDO_0010192,SOP6,Moderate,https://search.clinicalgenome.org/kb/gene-validity/d7abbd45-7915-437b-849b-dea876bfc2f5--2018-05-08T04:00:00,2018-05-08T04:00:00.000Z
EDNRB,HGNC:3180,Waardenburg syndrome type 4A,MONDO_0010192,SOP6,Limited,https://search.clinicalgenome.org/kb/gene-validity/73ee9727-60c1-40fd-830f-08c2b513d2ee--2018-05-08T04:00:00,2018-05-08T04:00:00.000Z

In such cases, we select the more severe classification.

Multiple Dates

$ grep MONDO_0016419 ClinGen-Gene-Disease-Summary-2019-12-02.csv  | grep MUTYH
MUTYH,HGNC:7527,hereditary breast carcinoma,MONDO_0016419,SOP4,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/9904,2017-05-24T00:00:00
MUTYH,HGNC:7527,hereditary breast carcinoma,MONDO_0016419,SOP4,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/9902,2017-05-25T00:00:00

If the classifications are the same, we should select the latest classification date.

JSON Output

"clingenGeneValidity":[
{
"diseaseId":"MONDO_0007893",
"disease":"Noonan syndrome with multiple lentigines",
"classification":"no reported evidence",
"classificationDate":"2018-06-07"
},
{
"diseaseId":"MONDO_0015280",
"disease":"cardiofaciocutaneous syndrome",
"classification":"no reported evidence",
"classificationDate":"2018-06-07"
}
]
FieldTypeNotes
clingenGeneValidityobject
diseaseIdstringMonarch Disease Ontology ID (MONDO)
diseasestringdisease label
classificationstringsee below for possible values
classificationDatestringyyyy-MM-dd

classification

  • no reported evidence
  • disputed
  • limited
  • moderate
  • definitive
  • strong
  • refuted
- - - - \ No newline at end of file diff --git a/3.17/data-sources/clinvar-json/index.html b/3.17/data-sources/clinvar-json/index.html deleted file mode 100644 index 33b3a106..00000000 --- a/3.17/data-sources/clinvar-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -clinvar-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

clinvar-json

"clinvar":[
{
"id":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"significance":[
"benign"
],
"refAllele":"G",
"altAllele":"A",
"lastUpdatedDate":"2020-03-01",
"isAlleleSpecific":true
},
{
"id":"RCV000030258.4",
"variationId":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"alleleOrigins":[
"germline"
],
"refAllele":"G",
"altAllele":"A",
"phenotypes":[
"Lynch syndrome"
],
"medGenIds":[
"C1333990"
],
"omimIds":[
"120435"
],
"significance":[
"benign"
],
"lastUpdatedDate":"2017-05-01",
"isAlleleSpecific":true
}
]
FieldTypeNotes
idstringClinVar ID
variationIdstringClinVar VCV ID
reviewStatusstringsee possible values below
alleleOriginsstring arraysee possible values below
refAllelestring
altAllelestring
phenotypesstring array
medGenIdsstring arrayMedGen IDs
omimIdsstring arrayOMIM IDs
orphanetIdsstring arrayOrphanet IDs
significancestring arraysee possible values below
lastUpdatedDatestringyyyy-MM-dd
pubMedIdsstring arrayPubMed IDs
isAlleleSpecificbooltrue when the current variant alternate allele matches the ClinVar alternate allele

reviewStatus:

  • no assertion provided
  • no assertion criteria provided
  • criteria provided, single submitter
  • practice guideline
  • classified by multiple submitters
  • criteria provided, conflicting interpretations
  • criteria provided, multiple submitters, no conflicts
  • no interpretation for the single variant

alleleOrigins:

  • unknown
  • other
  • germline
  • somatic
  • inherited
  • paternal
  • maternal
  • de-novo
  • biparental
  • uniparental
  • not-tested
  • tested-inconclusive

significance:

  • uncertain significance
  • not provided
  • benign
  • likely benign
  • likely pathogenic
  • pathogenic
  • drug response
  • histocompatibility
  • association
  • risk factor
  • protective
  • affects
  • conflicting data from submitters
  • other
  • no interpretation for the single variant
  • conflicting interpretations of pathogenicity
- - - - \ No newline at end of file diff --git a/3.17/data-sources/clinvar/index.html b/3.17/data-sources/clinvar/index.html deleted file mode 100644 index 8ce2d259..00000000 --- a/3.17/data-sources/clinvar/index.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - -ClinVar | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

ClinVar

Overview

ClinVar is a freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence. ClinVar thus facilitates access to and communication about the relationships asserted between human variation and observed health status, and the history of that interpretation.

Publication

Melissa J Landrum, Jennifer M Lee, Mark Benson, Garth R Brown, Chen Chao, Shanmuga Chitipiralla, Baoshan Gu, Jennifer Hart, Douglas Hoffman, Wonhee Jang, Karen Karapetyan, Kenneth Katz, Chunlei Liu, Zenith Maddipatla, Adriana Malheiro, Kurt McDaniel, Michael Ovetsky, George Riley, George Zhou, J Bradley Holmes, Brandi L Kattman, Donna R Maglott, ClinVar: improving access to variant interpretations and supporting evidence, Nucleic Acids Research, 46, Issue D1, 4 January 2018, Pages D1062–D1067, https://doi.org/10.1093/nar/gkx1153

RCV File

Example

Here's a full RCV entry.

Parsing

In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output.

ID

<ClinVarSet>
<ReferenceClinVarAssertion>
<ClinVarAccession Acc="RCV000000001" Version="2">
</ClinVarSet>

The Acc and Version fields are merged to form the ID (RCV000000001.2)

LastUpdatedDate

<ClinVarSet>
<ReferenceClinVarAssertion DateCreated="2012-08-13" DateLastUpdated="2016-02-17" ID="57604" >
</ClinVarSet>

Significance

<ClinVarSet>
<ReferenceClinVarAssertion>
<ClinicalSignificance DateLastEvaluated="1996-04-01">
<ReviewStatus>no assertion criteria provided</ReviewStatus>
<Description>Pathogenic</Description>
</ClinicalSignificance>
</ClinVarSet>

ReviewStatus

<ClinVarSet>
<ReferenceClinVarAssertion>
<ClinicalSignificance DateLastEvaluated="1996-04-01">
<ReviewStatus>no assertion criteria provided</ReviewStatus>
<Description>Pathogenic</Description>
</ClinicalSignificance>
</ClinVarSet>

Phenotypes

<ReferenceClinVarAssertion>
<TraitSet Type="Disease" ID="62">
<Trait Type="Disease">
<Name>
<ElementValue Type="Preferred">Joubert syndrome 9</ElementValue>
</Name>
</Trait>
</TraitSet>
</ReferenceClinVarAssertion>

We only use the field with Type="Preferred". Multiple phenotypes may be reported

Location and Variant Id

<ReferenceClinVarAssertion>
<GenotypeSet Type="CompoundHeterozygote" ID="424709">
<MeasureSet Type="Variant" ID="81">
<Measure Type="single nucleotide variant" ID="15120">
<SequenceLocation Assembly="GRCh38" AssemblyAccessionVersion="GCF_000001405.38"
AssemblyStatus="current" Chr="10" Accession="NC_000010.11" start="89222510"
stop="89222510" display_start="89222510" display_stop="89222510" variantLength="1"
positionVCF="89222510" referenceAlleleVCF="C" alternateAlleleVCF="T"/>
<SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25"
AssemblyStatus="previous" Chr="10" Accession="NC_000010.10" start="90982267"
stop="90982267" display_start="90982267" display_stop="90982267" variantLength="1"
positionVCF="90982267" referenceAlleleVCF="C" alternateAlleleVCF="T"/>
</Measure>
</MeasureSet>
</GenotypeSet>
</ReferenceClinVarAssertion>
  • The variant position is extracted from the fields for their respective assemblies.
  • Updated records contain positionVCF, referenceAlleleVCF and alternateAlleleVCF fields and when present, we use them to create the variant.
  • For older records, since "start' and "stop" fields are not always available, we use the "display_start" and "display_end" fields.
  • If a required allele is not available, we extract it from the reference sequence.
  • Only variants having a dbSNP id are extracted.
  • Note that a ClinVar accession may have multiple variants associated with it (possible in different locations)
  • VariantId is extracted from the MeasureSet attributes.

MedGen, OMIM, Orphanet IDs

<ReferenceClinVarAssertion>
<TraitSet Type="Disease" ID="175">
<Trait ID="3036" Type="Disease">
<XRef ID="C0086651" DB="MedGen"/>
<XRef ID="309297" DB="Orphanet"/>
<XRef ID="582" DB="Orphanet"/>
<XRef Type="MIM" ID="253000" DB="OMIM"/>
</Trait>
</TraitSet>
</ReferenceClinVarAssertion>

AlleleOrigins

<ClinVarAssertion>
<Origin>germline</Origin>
</ClinVarAssertion>

We only extract all Allele Origins from Submissions (SCV) entries.

PubMedIds

<ClinVarAssertion>
<ClinicalSignificance DateLastEvaluated="1996-04-01">
<Citation Type="general">
<ID Source="PubMed">12114475</ID>
</Citation>
</ClinicalSignificance>
<AttributeSet>
<Attribute Type="AssertionMethod">LMM Criteria</Attribute>
<Citation>
<ID Source="PubMed">24033266</ID>
</Citation>
</AttributeSet>
<ObservedIn>
<ObservedData ID="9727445">
<Citation Type="general">
<ID Source="PubMed">9113933</ID>
</Citation>
</ObservedData>
</ObservedIn>
<Citation Type="general">
<ID Source="PubMed">23757202</ID>
</Citation>
</ClinVarAssertion>

We only extract all Pubmed Ids from Submissions (SCV) entries.

Parsing Significance

Extracting significance(s) may involve parsing multiple fields. Take the following snippets into consideration.

<ClinicalSignificance DateLastEvaluated="1996-04-01">
<ReviewStatus>no assertion criteria provided</ReviewStatus>
<Description>Pathogenic</Description>
</ClinicalSignificance>

<ClinicalSignificance DateLastEvaluated="2016-10-13">
<ReviewStatus>criteria provided, multiple submitters, no conflicts</ReviewStatus>
<Description>Pathogenic/Likely pathogenic</Description>
</ClinicalSignificance>

<ClinicalSignificance DateLastEvaluated="2012-06-07">
<ReviewStatus>no assertion criteria provided</ReviewStatus>
<Description>Conflicting interpretations of pathogenicity</Description>
<Explanation DataSource="ClinVar" Type="public">Pathogenic(1);Uncertain significance(1)</Explanation>
</ClinicalSignificance>

Given the evidence, we converted the significance field into an array of strings which may be parsed out of the Descriptions or Explanation fields.

Varying Delimiters

The delimiters in each field may vary. Currently, the delimiters for Description are , and /. The delimiters for Explanation are ; and /.

VCV File

Example

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<ClinVarVariationRelease xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_variation/variation_archive_1.4.xsd" ReleaseDate="2019-12-31">
<VariationArchive VariationID="431749" VariationName="GRCh37/hg19 1p36.31(chr1:6051187-6158763)" VariationType="copy number gain" DateCreated="2017-08-12" DateLastUpdated="2019-09-10" Accession="VCV000431749" Version="1" RecordType="included" NumberOfSubmissions="0" NumberOfSubmitters="0">
<RecordStatus>current</RecordStatus>
<Species>Homo sapiens</Species>
<IncludedRecord>
<SimpleAllele AlleleID="425239" VariationID="431749">
<GeneList>
<Gene Symbol="KCNAB2" FullName="potassium voltage-gated channel subfamily A regulatory beta subunit 2" GeneID="8514" HGNC_ID="HGNC:6229" Source="calculated" RelationshipType="genes overlapped by variant">
<Location>
<CytogeneticLocation>1p36.31</CytogeneticLocation>
<SequenceLocation Assembly="GRCh38" AssemblyAccessionVersion="GCF_000001405.38" AssemblyStatus="current" Chr="1" Accession="NC_000001.11" start="5992639" stop="6101186" display_start="5992639" display_stop="6101186" Strand="+"/>
<SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25" AssemblyStatus="previous" Chr="1" Accession="NC_000001.10" start="6052357" stop="6161252" display_start="6052357" display_stop="6161252" Strand="+"/>
</Location>
<OMIM>601142</OMIM>
</Gene>
<Gene Symbol="NPHP4" FullName="nephrocystin 4" GeneID="261734" HGNC_ID="HGNC:19104" Source="calculated" RelationshipType="genes overlapped by variant">
<Location>
<CytogeneticLocation>1p36.31</CytogeneticLocation>
<SequenceLocation Assembly="GRCh38" AssemblyAccessionVersion="GCF_000001405.38" AssemblyStatus="current" Chr="1" Accession="NC_000001.11" start="5862810" stop="5992425" display_start="5862810" display_stop="5992425" Strand="-"/>
<SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25" AssemblyStatus="previous" Chr="1" Accession="NC_000001.10" start="5922869" stop="6052532" display_start="5922869" display_stop="6052532" Strand="-"/>
</Location>
<OMIM>607215</OMIM>
</Gene>
</GeneList>
<Name>GRCh37/hg19 1p36.31(chr1:6051187-6158763)</Name>
<VariantType>copy number gain</VariantType>
<Location>
<CytogeneticLocation>1p36.31</CytogeneticLocation>
<SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25" forDisplay="true" AssemblyStatus="previous" Chr="1" Accession="NC_000001.10" start="6051187" stop="6158763" display_start="6051187" display_stop="6158763"/> </Location>
<Interpretations>
<Interpretation NumberOfSubmissions="0" NumberOfSubmitters="0" Type="Clinical significance">
<Description>no interpretation for the single variant</Description>
</Interpretation>
</Interpretations>
<XRefList>
<XRef Type="Interpreted" ID="431733" DB="ClinVar"/>
</XRefList>
</SimpleAllele>
<ReviewStatus>no interpretation for the single variant</ReviewStatus>
<Interpretations>
<Interpretation NumberOfSubmissions="0" NumberOfSubmitters="0" Type="Clinical significance">
<Description>no interpretation for the single variant</Description>
</Interpretation>
</Interpretations>
<SubmittedInterpretationList>
<SCV Title="SUB1895145" Accession="SCV000296057" Version="1"/>
</SubmittedInterpretationList>
<InterpretedVariationList>
<InterpretedVariation VariationID="431733" Accession="VCV000431733" Version="1"/>
</InterpretedVariationList>
</IncludedRecord>
</VariationArchive>
</ClinVarVariationRelease>

Parsing

In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output.

id

<VariationArchive VariationID="431749" VariationName="GRCh37/hg19 1p36.31(chr1:6051187-6158763)" VariationType="copy number gain" DateCreated="2017-08-12" DateLastUpdated="2019-09-10" Accession="VCV000431749" Version="1" RecordType="included" NumberOfSubmissions="0" NumberOfSubmitters="0">

The Acc and Version fields are merged to form the ID (RCV000000001.2)

significance

<ClinVarVariationRelease>
<VariationArchive>
<IncludedRecord>
<SimpleAllele>
<Interpretations>
<Interpretation NumberOfSubmissions="0" NumberOfSubmitters="0" Type="Clinical significance">
<Description>no interpretation for the single variant</Description>
</Interpretation>
</Interpretations>
</SimpleAllele>
</IncludedRecord>
</VariationArchive>
</ClinVarVariationRelease>

May have multiple significances listed.

reviewStatus

<ClinVarVariationRelease>
<VariationArchive>
<IncludedRecord>
<ReviewStatus>no interpretation for the single variant</ReviewStatus>
</IncludedRecord>
</VariationArchive>
</ClinVarVariationRelease>

Known Issues

Known Issues
  • The XML file contains ~1k more entries (out of 162K) than the VCF file
  • The XML file does not have a field indicating that a record is associated with the reference base - something that was present in VCF
  • The XML file contains entries (e.g. RCV000016645 version=1) which have IUPAC ambiguous bases ("R", "Y", "H", -etc.) as their alternate allele

Download URLs

ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz

https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_00-latest.xml.gz

JSON Output

"clinvar":[
{
"id":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"significance":[
"benign"
],
"refAllele":"G",
"altAllele":"A",
"lastUpdatedDate":"2020-03-01",
"isAlleleSpecific":true
},
{
"id":"RCV000030258.4",
"variationId":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"alleleOrigins":[
"germline"
],
"refAllele":"G",
"altAllele":"A",
"phenotypes":[
"Lynch syndrome"
],
"medGenIds":[
"C1333990"
],
"omimIds":[
"120435"
],
"significance":[
"benign"
],
"lastUpdatedDate":"2017-05-01",
"isAlleleSpecific":true
}
]
FieldTypeNotes
idstringClinVar ID
variationIdstringClinVar VCV ID
reviewStatusstringsee possible values below
alleleOriginsstring arraysee possible values below
refAllelestring
altAllelestring
phenotypesstring array
medGenIdsstring arrayMedGen IDs
omimIdsstring arrayOMIM IDs
orphanetIdsstring arrayOrphanet IDs
significancestring arraysee possible values below
lastUpdatedDatestringyyyy-MM-dd
pubMedIdsstring arrayPubMed IDs
isAlleleSpecificbooltrue when the current variant alternate allele matches the ClinVar alternate allele

reviewStatus:

  • no assertion provided
  • no assertion criteria provided
  • criteria provided, single submitter
  • practice guideline
  • classified by multiple submitters
  • criteria provided, conflicting interpretations
  • criteria provided, multiple submitters, no conflicts
  • no interpretation for the single variant

alleleOrigins:

  • unknown
  • other
  • germline
  • somatic
  • inherited
  • paternal
  • maternal
  • de-novo
  • biparental
  • uniparental
  • not-tested
  • tested-inconclusive

significance:

  • uncertain significance
  • not provided
  • benign
  • likely benign
  • likely pathogenic
  • pathogenic
  • drug response
  • histocompatibility
  • association
  • risk factor
  • protective
  • affects
  • conflicting data from submitters
  • other
  • no interpretation for the single variant
  • conflicting interpretations of pathogenicity

Building the supplementary files

The ClinVar .nsa for Nirvana can be built using the SAUtils command's clinvar subcommand.

Source data files

Two input .xml files and a .version file are required in order to build the .nsa file. You should have the following files:

ClinVarFullRelease_2021-06.xml.gz       ClinVarVariationRelease_2021-06.xml.gz
ClinVarFullRelease_2021-06.xml.gz.version

The version file is a text file with the follwoing format.

NAME=ClinVar
VERSION=20210603
DATE=2021-06-03
DESCRIPTION=A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence

The help menu for the utility is as follows:

dotnet SAUtils.dll clinvar
---------------------------------------------------------------------------
SAUtils (c) 2021 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0
---------------------------------------------------------------------------

USAGE: dotnet SAUtils.dll clinvar [options]
Creates a supplementary database with ClinVar annotations

OPTIONS:
--ref, -r <VALUE> compressed reference sequence file
--rcv, -i <VALUE> ClinVar Full release XML file
--vcv, -c <VALUE> ClinVar Variation release XML file
--out, -o <VALUE> output directory
--help, -h displays the help menu
--version, -v displays the version

dotnet SAUtils.dll clinvar

Here is a sample execution:

dotnet ~/development/Nirvana/bin/Debug/netcoreapp3.1/SAUtils.dll clinvar \\
--ref ~/development/References/7/Homo_sapiens.GRCh38.Nirvana.dat --rcv ClinVarFullRelease_2021-06.xml.gz \\
--vcv ClinVarVariationRelease_2021-06.xml.gz --out ~/development/SupplementaryDatabase/63/GRCh38
---------------------------------------------------------------------------
SAUtils (c) 2020 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.13.0
---------------------------------------------------------------------------

Found 983417 VCV records
Chromosome 1 completed in 00:09:46.2
Chromosome 2 completed in 00:00:16.4
Chromosome 3 completed in 00:00:06.9
Unknown vcv id:982521 found in RCV001262095.1
Chromosome 4 completed in 00:00:03.9
Chromosome 5 completed in 00:00:07.1
Chromosome 6 completed in 00:00:05.7
Chromosome 7 completed in 00:00:06.6
Unknown vcv id:430873 found in RCV000493222.1
Chromosome 8 completed in 00:00:04.6
Chromosome 9 completed in 00:00:06.2
Chromosome 10 completed in 00:00:05.6
Chromosome 11 completed in 00:00:10.2
Chromosome 12 completed in 00:00:06.9
Chromosome 13 completed in 00:00:05.9
Chromosome 14 completed in 00:00:04.9
Chromosome 15 completed in 00:00:05.4
Chromosome 16 completed in 00:00:08.9
Chromosome 17 completed in 00:00:13.1
Chromosome 18 completed in 00:00:02.4
Chromosome 19 completed in 00:00:07.6
Chromosome 20 completed in 00:00:02.4
Chromosome 21 completed in 00:00:01.6
Chromosome 22 completed in 00:00:02.6
Chromosome MT completed in 00:00:00.3
Chromosome X completed in 00:00:05.5
2 unknown VCVs found in RCVs.
982521,430873
Chromosome Y completed in 00:00:00.0

Time: 00:12:08.2

- - - - \ No newline at end of file diff --git a/3.17/data-sources/cosmic-json/index.html b/3.17/data-sources/cosmic-json/index.html deleted file mode 100644 index 941b6515..00000000 --- a/3.17/data-sources/cosmic-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -cosmic-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

cosmic-json

   "cosmicGeneFusions":[
{
"id":"COSF881",
"numSamples":6,
"geneSymbols":[
"MYB",
"NFIB"
],
"hgvsr":"ENST00000341911.5(MYB):r.1_2368::ENST00000397581.2(NFIB):r.2592_3318",
"histologies":[
{
"name":"adenoid cystic carcinoma",
"numSamples":6
}
],
"sites":[
{
"name":"salivary gland (submandibular)",
"numSamples":1
},
{
"name":"salivary gland (parotid)",
"numSamples":1
},
{
"name":"salivary gland (nasal cavity)",
"numSamples":1
},
{
"name":"breast",
"numSamples":3
}
],
"pubMedIds":[
19841262
]
}
]
FieldTypeNotes
idstringCOSMIC fusion ID
numSamplesint
geneSymbolsstring array5' gene & 3' gene
hgvsrstringHGVS RNA translocation fusion notation
histologiescount arrayphenotypic descriptions
sitescount arraytissue types
pubMedIdsint arrayPubMed IDs

Count

FieldTypeNotes
namestringdescription
numSamplesint
- - - - \ No newline at end of file diff --git a/3.17/data-sources/cosmic/index.html b/3.17/data-sources/cosmic/index.html deleted file mode 100644 index f2549fea..00000000 --- a/3.17/data-sources/cosmic/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -COSMIC | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

COSMIC

Overview

COSMIC, the Catalogue of Somatic Mutations in Cancer, is the world's largest source of expert manually curated somatic mutation information relating to human cancers.

Publication

John G Tate, Sally Bamford, Harry C Jubb, Zbyslaw Sondka, David M Beare, Nidhi Bindal, Harry Boutselakis, Charlotte G Cole, Celestino Creatore, Elisabeth Dawson, Peter Fish, Bhavana Harsha, Charlie Hathaway, Steve C Jupe, Chai Yin Kok, Kate Noble, Laura Ponting, Christopher C Ramshaw, Claire E Rye, Helen E Speedy, Ray Stefancsik, Sam L Thompson, Shicai Wang, Sari Ward, Peter J Campbell, Simon A Forbes. (2019) COSMIC: the Catalogue Of Somatic Mutations In Cancer, Nucleic Acids Research, Volume 47, Issue D1

Licensed Content

Commercial companies are required to acquire a license from COSMIC. At the moment, this means that our COSMIC content is only available in Illumina's products and services, not in the open source distribution.

Since many of you are academic users, we will enable a COSMIC login in our downloader later this year that will allow academic and commercial organizations (with a license) access our COSMIC data sources.

Gene Fusions

Gene fusions are manually curated from peer reviewed publications by expert COSMIC curators. A comprehensive literature curation is completed for each fusion pair when it is released in the database. Currently COSMIC includes information on fusions involved in solid tumours and leukaemias.

TSV File

Example

SAMPLE_ID       SAMPLE_NAME     PRIMARY_SITE    SITE_SUBTYPE_1  SITE_SUBTYPE_2  SITE_SUBTYPE_3  PRIMARY_HISTOLOGY      HISTOLOGY_SUBTYPE_1      HISTOLOGY_SUBTYPE_2     HISTOLOGY_SUBTYPE_3     FUSION_ID       TRANSLOCATION_NAME      5'_CHROMOSOME   5'_STRAND       5'_GENE_ID      5'_GENE_NAME    5'_LAST_OBSERVED_EXON   5'_GENOME_START_FROM    5'_GENOME_START_TO      5'_GENOME_STOP_FROM     5'_GENOME_STOP_TO       3'_CHROMOSOME   3'_STRAND       3'_GENE_ID      3'_GENE_NAME   3'_FIRST_OBSERVED_EXON   3'_GENOME_START_FROM    3'_GENOME_START_TO      3'_GENOME_STOP_FROM     3'_GENOME_STOP_TO      FUSION_TYPE      PUBMED_PMID
749711 HCC1187 breast NS NS NS carcinoma ductal_carcinoma NS NS 665 ENST00000360863.10(RGS22):r.1_3555_ENST00000369518.1(SYCP1):r.2100_3452 8 - 197199 RGS22 22 99981937 99981937 100106116 100106116 1 + 212470 SYCP1_ENST00000369518 24 114944339 114944339 114995367 114995367 Inferred Breakpoint 20033038

Parsing

From the TSV file, we're mainly interested in the following columns:

  • SAMPLE_ID
  • PRIMARY_SITE
  • PRIMARY_HISTOLOGY
  • HISTOLOGY_SUBTYPE_1
  • FUSION_ID
  • TRANSLOCATION_NAME
  • PUBMED_PMID
info

For all the histologies and sites, we replace all the underlines with spaces. salivary_gland would become salivary gland.

Aggregation

To create the gene fusion entries in Nirvana, we perform the following on each row in the TSV file:

  • Group all entries by FUSION_ID
  • Using all the entries related to this FUSION_ID:
    • Collect all the PubMed IDs
    • Tally the number of observed sample IDs
    • Grab the HGVS r. notation (should not change throughout the FUSION_ID)
    • Tally the number of samples observed for each histology
    • Tally the number of samples observed for each site
  • Extract the transcript IDs from the HGVS notation and lookup the associated gene symbols

Fixing the HGVS RNA Notation

ENST00000360863.6(RGS22):r.1_3555_ENST00000369518.1(SYCP1):r.2100_3452

There are some issues with the HGVS RNA notation:

  • The two transcripts should be linked by a double colon ::.
  • For coding transcripts, HGVS numbering should use CDS coordinates. Right now COSMIC is using cDNA coordinates for all their fusion
  • If only the breakpoint is truly known, the recommendation is to use ? marks

We chose to only update the linkage between each transcript using double colons ::. While we could have recalculated the HGVS notation using the supplied breakpoints, we chose not to because the resulting notation would be quite different from the original material. This would potentially lead to some confusion.

Aggregating Histologies

For histologies we want to capture the most specific description available. In the example above, we saw that the primary histology was carcinoma, but the subtype was ductal carcinoma. In this case we would use the subtype for the annotation.

COSMIC uses NS to show that a value is empty. If the subtype is NS, we will use the primary histology instead.

Aggregating Sites

For sites, we observe that the subtype provides additional description but is still dependent on the primary site value. For example, the primary site might be skin, but the subtype is foot. Therefore, we will combine the values in the following manner: skin (foot).

Known Issues

Known Issues

There are some issues with the HGVS RNA notation:

  • The two transcripts should be linked by a double colon ::. We fixed this aspect in Nirvana.
  • For coding transcripts, HGVS numbering should use CDS coordinates. Right now COSMIC is using cDNA coordinates for all their fusions.

Download URL

JSON Output

   "cosmicGeneFusions":[
{
"id":"COSF881",
"numSamples":6,
"geneSymbols":[
"MYB",
"NFIB"
],
"hgvsr":"ENST00000341911.5(MYB):r.1_2368::ENST00000397581.2(NFIB):r.2592_3318",
"histologies":[
{
"name":"adenoid cystic carcinoma",
"numSamples":6
}
],
"sites":[
{
"name":"salivary gland (submandibular)",
"numSamples":1
},
{
"name":"salivary gland (parotid)",
"numSamples":1
},
{
"name":"salivary gland (nasal cavity)",
"numSamples":1
},
{
"name":"breast",
"numSamples":3
}
],
"pubMedIds":[
19841262
]
}
]
FieldTypeNotes
idstringCOSMIC fusion ID
numSamplesint
geneSymbolsstring array5' gene & 3' gene
hgvsrstringHGVS RNA translocation fusion notation
histologiescount arrayphenotypic descriptions
sitescount arraytissue types
pubMedIdsint arrayPubMed IDs

Count

FieldTypeNotes
namestringdescription
numSamplesint
- - - - \ No newline at end of file diff --git a/3.17/data-sources/dbsnp-json/index.html b/3.17/data-sources/dbsnp-json/index.html deleted file mode 100644 index 791fa3cc..00000000 --- a/3.17/data-sources/dbsnp-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -dbsnp-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

dbsnp-json

"dbsnp":[
"rs1042821"
]
FieldTypeNotes
dbsnpstring arraydbSNP rsIDs
- - - - \ No newline at end of file diff --git a/3.17/data-sources/dbsnp/index.html b/3.17/data-sources/dbsnp/index.html deleted file mode 100644 index 4575571f..00000000 --- a/3.17/data-sources/dbsnp/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -dbSNP | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

dbSNP

Overview

dbSNP contains human single nucleotide variations, microsatellites, and small-scale insertions and deletions along with publication, population frequency, molecular consequence, and genomic and RefSeq mapping information for both common variations and clinical mutations.

Publication

Sherry, S.T., Ward, M. and Sirotkin, K. (1999) dbSNP—Database for Single Nucleotide Polymorphisms and Other Classes of Minor Genetic Variation. Genome Res., 9, 677–679.

VCF File

Example

#CHROM  POS ID  REF ALT QUAL    FILTER  INFO
1 10177 rs367896724 A AC . . RS=367896724;RSPOS=10177;dbSNPBuildID=138; \
SSR=0;SAO=0;VP=0x050000020005130026000200;GENEINFO=DDX11L1:100287102;WGT=1; \
VC=DIV;R5;ASP;G5A;G5;KGPhase3;CAF=0.5747,0.4253;COMMON=1; \
TOPMED=0.76728147298674821,0.23271852701325178

Parsing

From the VCF file, we're mainly interested in the following:

  • rsID from the ID field
  • CAF from the INFO field

Global allele extraction

The global major and minor alleles are extracted based on the frequency of the alleles provided in the CAF field. The global minor allele frequency is the second highest value of the CAF comma delimited field (ignoring '.' values).

Tie Breaking: Global Major Allele

If there are two candidates for global major and the reference allele is one of them, we prefer the reference allele.

Tie Breaking: Global Minor Allele

If there are two candidates for global minor and the reference allele is one of them, we prefer the other allele. If the reference allele is not involved, they are chosen arbitrarily.

Equal Allele Frequency Example (2 alleles)

chr1    100 A   C   CAF=0.5,0.5

We will select A to be the global major allele and C to be the global minor allele.

Equal Allele Frequency Example (3 alleles)

chr1    100 A   C,T CAF=0.33,0.33,0.33

We will select A to be the global major allele and either C or T is chosen (arbitrarily) to be the global minor allele.

Equal Allele Frequency in Alternate Alleles

chr1    100 A   C,T CAF=0.2,0.4,0.4

We will select C or T to be arbitrarily assigned to be the global major or global minor allele.

Equal Allele Frequency Between Reference & Alternate Allele

chr1    100 A   C,T CAF=0.2,0.2,0.6

We will select T to be the global major allele and C to be the global minor allele.

Known Issues

Known Issues

If there are multiple entries with different CAF values for the same allele, we use the first CAF value.

Download URL

https://ftp.ncbi.nih.gov/snp/organisms/

JSON Output

"dbsnp":[
"rs1042821"
]
FieldTypeNotes
dbsnpstring arraydbSNP rsIDs
- - - - \ No newline at end of file diff --git a/3.17/data-sources/fusioncatcher-json/index.html b/3.17/data-sources/fusioncatcher-json/index.html deleted file mode 100644 index ce5f40e3..00000000 --- a/3.17/data-sources/fusioncatcher-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -fusioncatcher-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

fusioncatcher-json

   "fusionCatcher":[
{
"genes":{
"first":{
"hgnc":"ETV6",
"isOncogene":true
},
"second":{
"hgnc":"RUNX1"
},
"isParalogPair":true,
"isPseudogenePair":true,
"isReadthrough":true
},
"germlineSources":[
"1000 Genomes Project"
],
"somaticSources":[
"COSMIC",
"TCGA oesophageal carcinomas"
]
}
]
FieldTypeNotes
genesgenes object5' gene & 3' gene
germlineSourcesstring arraymatches in known germline data sources
somaticSourcesstring arraymatches in known somatic data sources

genes

FieldTypeNotes
firstgene object5' gene
secondgene object3' gene
isParalogPairbooltrue when both genes are paralogs for each other
isPseudogenePairbooltrue when both genes are pseudogenes for each other
isReadthroughbooltrue when this fusion gene is a readthrough event (both are on the same strand and there are no genes between them)

gene

FieldTypeNotes
hgncstringgene symbol. e.g. MSH6
isOncogenebooltrue when this gene is an oncogene
- - - - \ No newline at end of file diff --git a/3.17/data-sources/fusioncatcher/index.html b/3.17/data-sources/fusioncatcher/index.html deleted file mode 100644 index a9ff761b..00000000 --- a/3.17/data-sources/fusioncatcher/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -FusionCatcher | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

FusionCatcher

Overview

FusionCatcher is a well-known tool that searches for somatic novel/known fusion genes, translocations, and/or chimeras in RNA-seq data. While FusionCatcher itself is not part of Nirvana, we have included a subset of their genomic databases in Nirvana.

Publication

Daniel Nicorici, Mihaela Şatalan, Henrik Edgren, Sara Kangaspeska, Astrid Murumägi, Olli Kallioniemi, Sami Virtanen, Olavi Kilkku. (2014) FusionCatcher – a tool for finding somatic fusion genes in paired-end RNA-sequencing data. bioRxiv 011650

Supported Data Sources

Oncogenes

The following data sources are aggregated and used to populate the isOncogene field in the gene JSON object:

DescriptionReferenceDataFusionCatcher filename
Bushmanbushmanlab.orgcancer_genes.txt
ONGENEJGGbioinfo-minzhao.orgoncogenes_more.txt
UniProt tumor genesNARuniprot.orgtumor_genes.txt

Germline

Nirvana labelReferenceDataFusionCatcher filename
1000 Genomes ProjectPLOS ONE1000genomes.txt
Healthy (strong support)banned.txt
Illumina Body Map 2.0EBIbodymap2.txt
CACGGenomicscacg.txt
ConjoinGPLOS ONEconjoing.txt
Healthy prefrontal cortexBMC Medical GenomicsNCBI GEOcortex.txt
Duplicated Genes DatabasePLOS ONEgenouest.orgdgd.txt
GTEx healthy tissuesgtexportal.orggtex.txt
Healthyhealthy.txt
Human Protein AtlasMCPEBIhpa.txt
Babiceanu non-cancer tissuesNARNARnon-cancer_tissues.txt
non-tumor cell linesnon-tumor_cells.txt
TumorFusions normalNARNARtcga-normal.txt

Somatic

Nirvana labelReferenceDataFusionCatcher filename
Alaei-Mahabadi 18 cancersPNAS18cancers.txt
DepMap CCLEdepmap.orgccle.txt
CCLE KlijnNature BiotechnologyNature Biotechnologyccle2.txt
CCLE VellichirammalMolecular Therapy Nucleic Acidsccle3.txt
Cancer Genome ProjectCOSMICcgp.txt
ChimerKB 4.0NARkobic.re.krchimerdb4kb.txt
ChimerPub 4.0NARkobic.re.krchimerdb4pub.txt
ChimerSeq 4.0NARkobic.re.krchimerdb4seq.txt
COSMICNARCOSMICcosmic.txt
Bao gliomasGenome Researchgliomas.txt
Knownknown.txt
Mitelman DBISB-CGCGoogle Cloudmitelman.txt
TCGA oesophageal carcinomasNatureoesophagus.txt
Bailey pancreatic cancersNatureNaturepancreases.txt
PCAWGCellICGCpcawg.txt
Robinson prostate cancersCellCellprostate_cancer.txt
TCGAcancer.govtcga.txt
TumorFusions tumorNARNARtcga-cancer.txt
TCGA GaoCellCelltcga2.txt
TCGA VellichirammalMolecular Therapy Nucleic Acidstcga3.txt
TICdbBMC Genomicsunav.eduticdb.txt

Gene Pair TSV File

Most of the data files in FusionCatcher are two-column TSV files containing the Ensembl gene IDs that are paired together.

Example

Here are the first few lines of the 1000genomes.txt file:

ENSG00000006210 ENSG00000102962
ENSG00000006652 ENSG00000181016
ENSG00000014138 ENSG00000149798
ENSG00000026297 ENSG00000071242
ENSG00000035499 ENSG00000155959
ENSG00000055211 ENSG00000131013
ENSG00000055332 ENSG00000179915
ENSG00000062485 ENSG00000257727
ENSG00000065978 ENSG00000166501
ENSG00000066044 ENSG00000104980

Parsing

In Nirvana, we will only import a gene pair if both Ensembl gene IDs are recognized from either our GRCh37 or GRCh38 cache files.

Gene TSV File

Some of the data files are single-column files containing Ensembl gene IDs. This is commonly used in the data files representing oncogene data sources.

Example

Here are the first few lines of the oncogenes_more.txt file:

ENSG00000000938
ENSG00000003402
ENSG00000005469
ENSG00000005884
ENSG00000006128
ENSG00000006453
ENSG00000006468
ENSG00000007350
ENSG00000008294
ENSG00000008952

Parsing

Known Issues

Known Issues

FusionCatcher also uses creates custom Ensembl genes (e.g. ENSG09000000002) to handle missing Ensembl genes. Nirvana will ignore these entries since we only include the gene IDs that are currently recognized by Nirvana.

I suspect that these were originally RefSeq genes and if so, we can support those directly in Nirvana in the future.

Download URL

https://sourceforge.net/projects/fusioncatcher/files/data

JSON Output

   "fusionCatcher":[
{
"genes":{
"first":{
"hgnc":"ETV6",
"isOncogene":true
},
"second":{
"hgnc":"RUNX1"
},
"isParalogPair":true,
"isPseudogenePair":true,
"isReadthrough":true
},
"germlineSources":[
"1000 Genomes Project"
],
"somaticSources":[
"COSMIC",
"TCGA oesophageal carcinomas"
]
}
]
FieldTypeNotes
genesgenes object5' gene & 3' gene
germlineSourcesstring arraymatches in known germline data sources
somaticSourcesstring arraymatches in known somatic data sources

genes

FieldTypeNotes
firstgene object5' gene
secondgene object3' gene
isParalogPairbooltrue when both genes are paralogs for each other
isPseudogenePairbooltrue when both genes are pseudogenes for each other
isReadthroughbooltrue when this fusion gene is a readthrough event (both are on the same strand and there are no genes between them)

gene

FieldTypeNotes
hgncstringgene symbol. e.g. MSH6
isOncogenebooltrue when this gene is an oncogene
- - - - \ No newline at end of file diff --git a/3.17/data-sources/gnomad-lof-json/index.html b/3.17/data-sources/gnomad-lof-json/index.html deleted file mode 100644 index 0b508f82..00000000 --- a/3.17/data-sources/gnomad-lof-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -gnomad-lof-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

gnomad-lof-json

"gnomAD":{ 
"pLi":1.00e0,
"pNull":8.94e-40,
"pRec":1.84e-16,
"synZ":-8.44e-2,
"misZ":5.96e-1,
"loeuf":1.13e0
}
FieldTypeNotes
pLifloatprobability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)
pNullfloatprobability of being completely tolerant of loss of function variation (observed = expected)
pRecfloatprobability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)
synZfloatcorrected synonymous Z score
misZfloatcorrected missense Z score
loeuffloatloss of function observed/expected upper bound fraction (LOEUF)
- - - - \ No newline at end of file diff --git a/3.17/data-sources/gnomad-small-variants-json/index.html b/3.17/data-sources/gnomad-small-variants-json/index.html deleted file mode 100644 index c7933dc4..00000000 --- a/3.17/data-sources/gnomad-small-variants-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -gnomad-small-variants-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

gnomad-small-variants-json

"gnomad":{ 
"coverage":20,
"allAf":0.190317,
"maleAf":0.193,
"femaleAf": 0.1935,
"afrAf":0.222876,
"amrAf":0.121394,
"easAf":0.239802,
"finAf":0.136833,
"nfeAf":0.181282,
"asjAf":0.258278,
"othAf":0.186094,
"allAn":30796,
"maleAn":15096,
"femaleAn":15700
"afrAn":8664,
"amrAn":832,
"easAn":1618,
"finAn":3486,
"nfeAn":14916,
"asjAn":302,
"othAn":978,
"allAc":5861,
"maleAc":2930,
"femaleAc": 2931,
"afrAc":1931,
"amrAc":101,
"easAc":388,
"finAc":477,
"nfeAc":2704,
"asjAc":78,
"othAc":182,
"allHc":561,
"afrHc":208,
"amrHc":6,
"easHc":42,
"finHc":31,
"nfeHc":242,
"asjHc":13,
"othHc":19,
"maleHc":280,
"femaleHc":281,
"controlsAllAf":0.190317,
"controlsAllAn":30796,
"controlsAllAc":5861,
"lowComplexityRegion":true,
"failedFilter":true
}
FieldTypeNotes
coverageintaverage coverage (non-negative integer values)
allAffloatallele frequency for all populations. Range: 0 - 1.0
maleAffloatallele frequency for male population. Range: 0 - 1.0
femaleAffloatallele frequency for female population. Range: 0 - 1.0
controlsAllAffloatallele frequency for the controls subset. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
maleAcintallele count for male population. Integer.
femaleAcintallele count for female population. Integer.
controlsAllAcintallele count for the controls subset. Integer.
allAnintallele number for all populations. Non-zero integer.
maleAnintallele number for male population. Non-zero integer.
femaleAnintallele number for female population. Non-zero integer.
controlsAllAnintallele number for the controls subset. Non-zero integer.
allHcintcount of homozygous individuals for all populations. Non-negative integer.
maleHcintcount of homozygous individuals for male population. Non-negative integer.
femaleHcintcount of homozygous individuals for female population. Non-negative integer.
afrAffloatallele frequency for the African / African American population. Range: 0 - 1.0
afrAcintallele count for the African / African American population. Integer.
afrAnintallele number for the African / African American population. Non-zero integer.
afrHcintcount of homozygous individuals for African / African American population. Non-negative integer.
amrAffloatallele frequency for the Latino population. Range: 0 - 1.0
amrAcintallele count for the Latino population. Integer.
amrAnintallele number for the Latino population. Non-zero integer.
amrHcintcount of homozygous individuals for Latino population. Non-negative integer.
easAffloatallele frequency for the East Asian population. Range: 0 - 1.0
easAcintallele count for the East Asian population. Integer.
easAnintallele number for the East Asian population. Non-zero integer.
easHcintcount of homozygous individuals for East Asian population. Non-negative integer.
finAffloatallele frequency for the Finnish population. Range: 0 - 1.0
finAcintallele count for the Finnish population. Integer.
finAnintallele number for the Finnish population. Non-zero integer.
finHcintcount of homozygous individuals for Finnish population. Non-negative integer
nfeAffloatallele frequency for the Non-Finnish European population. Range: 0 - 1.0
nfeAcintallele count for the Non-Finnish European population. Integer.
nfeAnintallele number for the Non-Finnish European population. Non-zero integer.
nfeHcintcount of homozygous individuals for Non-Finnish European population. Non-negative integer
othAffloatallele frequency for the Other population. Range: 0 - 1.0
othAcintallele count for the Other population. Integer.
othAnintallele number for the Other population. Non-zero integer.
othHcintcount of homozygous individuals for Other population. Non-negative integer
asjAffloatallele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0
asjAcintallele count for the Ashkenazi Jewish population Integer.
asjAnintallele number for the Ashkenazi Jewish population. Non-zero integer.
asjHcintcount of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer
sasAffloatallele frequency for the South Asian population. Range: 0 - 1.0
sasAcintallele count for the South Asian population Integer.
sasAnintallele number for the South Asian population. Non-zero integer.
sasHcintcount of homozygous individuals for the South Asian population. Non-negative integer.
failedFilterboolTrue if this variant failed any filters (Note: we do not list the failed filters)
lowComplexityRegionboolTrue if this variant is located in a low complexity region.
- - - - \ No newline at end of file diff --git a/3.17/data-sources/gnomad/index.html b/3.17/data-sources/gnomad/index.html deleted file mode 100644 index 81a3e798..00000000 --- a/3.17/data-sources/gnomad/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -gnomAD | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

gnomAD

Overview

The Genome Aggregation Database (gnomAD) is a resource developed by an international coalition of investigators, with the goal of aggregating and harmonizing both exome and genome sequencing data from a wide variety of large-scale sequencing projects, and making summary data available for the wider scientific community.

Publication

Koch, L., 2020. Exploring human genomic diversity with gnomAD. Nature Reviews Genetics, 21(8), pp.448-448.

Small Variants

VCF extraction

We currently extract the following info fields from gnomAD genome and exome VCF files:

##INFO=<ID=AC,Number=A,Type=Integer,Description="Alternate allele count for samples">
##INFO=<ID=AN,Number=A,Type=Integer,Description="Total number of alleles in samples">
##INFO=<ID=nhomalt,Number=A,Type=Integer,Description="Count of homozygous individuals in samples">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Depth of informative coverage for each sample; reads with MQ=255 or with bad mates are filtered">
##INFO=<ID=lcr,Number=0,Type=Flag,Description="Variant falls within a low complexity region">
##INFO=<ID=AC_afr,Number=A,Type=Integer,Description="Alternate allele count for samples of African-American ancestry">
##INFO=<ID=AN_afr,Number=A,Type=Integer,Description="Total number of alleles in samples of African-American ancestry">
##INFO=<ID=AF_afr,Number=A,Type=Float,Description="Alternate allele frequency in samples of African-American ancestry">
##INFO=<ID=nhomalt_afr,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of African-American ancestry">
##INFO=<ID=AC_amr,Number=A,Type=Integer,Description="Alternate allele count for samples of Latino ancestry">
##INFO=<ID=AN_amr,Number=A,Type=Integer,Description="Total number of alleles in samples of Latino ancestry">
##INFO=<ID=nhomalt_amr,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of Latino ancestry">
##INFO=<ID=AC_eas,Number=A,Type=Integer,Description="Alternate allele count for samples of East Asian ancestry">
##INFO=<ID=AN_eas,Number=A,Type=Integer,Description="Total number of alleles in samples of East Asian ancestry">
##INFO=<ID=nhomalt_eas,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of East Asian ancestry">
##INFO=<ID=AC_female,Number=A,Type=Integer,Description="Alternate allele count for female samples">
##INFO=<ID=AN_female,Number=A,Type=Integer,Description="Total number of alleles in female samples">
##INFO=<ID=nhomalt_female,Number=A,Type=Integer,Description="Count of homozygous individuals in female samples">
##INFO=<ID=AC_nfe,Number=A,Type=Integer,Description="Alternate allele count for samples of non-Finnish European ancestry">
##INFO=<ID=AN_nfe,Number=A,Type=Integer,Description="Total number of alleles in samples of non-Finnish European ancestry">
##INFO=<ID=nhomalt_nfe,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of non-Finnish European ancestry">
##INFO=<ID=AC_fin,Number=A,Type=Integer,Description="Alternate allele count for samples of Finnish ancestry">
##INFO=<ID=AN_fin,Number=A,Type=Integer,Description="Total number of alleles in samples of Finnish ancestry">
##INFO=<ID=nhomalt_fin,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of Finnish ancestry">
##INFO=<ID=AC_asj,Number=A,Type=Integer,Description="Alternate allele count for samples of Ashkenazi Jewish ancestry">
##INFO=<ID=AN_asj,Number=A,Type=Integer,Description="Total number of alleles in samples of Ashkenazi Jewish ancestry">
##INFO=<ID=nhomalt_asj,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of Ashkenazi Jewish ancestry">
##INFO=<ID=AC_oth,Number=A,Type=Integer,Description="Alternate allele count for samples of uncertain ancestry">
##INFO=<ID=AN_oth,Number=A,Type=Integer,Description="Total number of alleles in samples of uncertain ancestry">
##INFO=<ID=nhomalt_oth,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of uncertain ancestry">
##INFO=<ID=AC_male,Number=A,Type=Integer,Description="Alternate allele count for male samples">
##INFO=<ID=AN_male,Number=A,Type=Integer,Description="Total number of alleles in male samples">
##INFO=<ID=nhomalt_male,Number=A,Type=Integer,Description="Count of homozygous individuals in male samples">
##INFO=<ID=controls_AC,Number=A,Type=Integer,Description="Alternate allele count for samples in the controls subset">
##INFO=<ID=controls_AN,Number=A,Type=Integer,Description="Total number of alleles in samples in the controls subset">

We also extract the following extra fields from gnomAD exome VCF file:

##INFO=<ID=AC_sas,Number=A,Type=Integer,Description="Alternate allele count for samples of South Asian ancestry">
##INFO=<ID=AN_sas,Number=A,Type=Integer,Description="Total number of alleles in samples of South Asian ancestry">
##INFO=<ID=nhomalt_sas,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of South Asian ancestry">

Computation

Using these, we compute the following:

  • Coverage
  • Allele count, Homozygous count, allele number and allele frequencies for:
    • Global population
    • African/African Americans
    • Admixed Americans
    • Ashkenazi Jews
    • East Asians
    • Finnish
    • Non-Finnish Europeans
    • South Asian
    • Others (population not assigned)
    • Male
    • Female
    • Controls
Note
  • Coverage = DP / AN. Frequencies are computed using AC/AN for each population.
  • Please note that currently there is no genome sequencing data of south asian (SAS) population available in gnomAD.
  • Allele Count, Homozygous count, allele number and allele frequencies for control groups are also provided for the global population.

Merging genomes and exomes

When merging the genomes and exomes, the allele counts and allele numbers will be summed across both of the data sets.

info
  • For GRCh37, Nirvana currently uses gnomAD version 2.1 which contains both genomes and exomes data. Genomes and exomes data are merged in the output.
  • For GRCh38, Nirvana currently uses gnomAD version 3.0 which doesn't contain the exomes data. Therefore, only genomes data are presented in the output.

Filters

The following strategy will be used when there's a conflict in filter status:

Genomes PASSGenomes Filtered
Exomes PASSPASSOnly use exome data
Exomes FilteredOnly use genome dataFiltered

VCF download instructions

https://gnomad.broadinstitute.org/downloads

JSON output

"gnomad":{ 
"coverage":20,
"allAf":0.190317,
"maleAf":0.193,
"femaleAf": 0.1935,
"afrAf":0.222876,
"amrAf":0.121394,
"easAf":0.239802,
"finAf":0.136833,
"nfeAf":0.181282,
"asjAf":0.258278,
"othAf":0.186094,
"allAn":30796,
"maleAn":15096,
"femaleAn":15700
"afrAn":8664,
"amrAn":832,
"easAn":1618,
"finAn":3486,
"nfeAn":14916,
"asjAn":302,
"othAn":978,
"allAc":5861,
"maleAc":2930,
"femaleAc": 2931,
"afrAc":1931,
"amrAc":101,
"easAc":388,
"finAc":477,
"nfeAc":2704,
"asjAc":78,
"othAc":182,
"allHc":561,
"afrHc":208,
"amrHc":6,
"easHc":42,
"finHc":31,
"nfeHc":242,
"asjHc":13,
"othHc":19,
"maleHc":280,
"femaleHc":281,
"controlsAllAf":0.190317,
"controlsAllAn":30796,
"controlsAllAc":5861,
"lowComplexityRegion":true,
"failedFilter":true
}
FieldTypeNotes
coverageintaverage coverage (non-negative integer values)
allAffloatallele frequency for all populations. Range: 0 - 1.0
maleAffloatallele frequency for male population. Range: 0 - 1.0
femaleAffloatallele frequency for female population. Range: 0 - 1.0
controlsAllAffloatallele frequency for the controls subset. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
maleAcintallele count for male population. Integer.
femaleAcintallele count for female population. Integer.
controlsAllAcintallele count for the controls subset. Integer.
allAnintallele number for all populations. Non-zero integer.
maleAnintallele number for male population. Non-zero integer.
femaleAnintallele number for female population. Non-zero integer.
controlsAllAnintallele number for the controls subset. Non-zero integer.
allHcintcount of homozygous individuals for all populations. Non-negative integer.
maleHcintcount of homozygous individuals for male population. Non-negative integer.
femaleHcintcount of homozygous individuals for female population. Non-negative integer.
afrAffloatallele frequency for the African / African American population. Range: 0 - 1.0
afrAcintallele count for the African / African American population. Integer.
afrAnintallele number for the African / African American population. Non-zero integer.
afrHcintcount of homozygous individuals for African / African American population. Non-negative integer.
amrAffloatallele frequency for the Latino population. Range: 0 - 1.0
amrAcintallele count for the Latino population. Integer.
amrAnintallele number for the Latino population. Non-zero integer.
amrHcintcount of homozygous individuals for Latino population. Non-negative integer.
easAffloatallele frequency for the East Asian population. Range: 0 - 1.0
easAcintallele count for the East Asian population. Integer.
easAnintallele number for the East Asian population. Non-zero integer.
easHcintcount of homozygous individuals for East Asian population. Non-negative integer.
finAffloatallele frequency for the Finnish population. Range: 0 - 1.0
finAcintallele count for the Finnish population. Integer.
finAnintallele number for the Finnish population. Non-zero integer.
finHcintcount of homozygous individuals for Finnish population. Non-negative integer
nfeAffloatallele frequency for the Non-Finnish European population. Range: 0 - 1.0
nfeAcintallele count for the Non-Finnish European population. Integer.
nfeAnintallele number for the Non-Finnish European population. Non-zero integer.
nfeHcintcount of homozygous individuals for Non-Finnish European population. Non-negative integer
othAffloatallele frequency for the Other population. Range: 0 - 1.0
othAcintallele count for the Other population. Integer.
othAnintallele number for the Other population. Non-zero integer.
othHcintcount of homozygous individuals for Other population. Non-negative integer
asjAffloatallele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0
asjAcintallele count for the Ashkenazi Jewish population Integer.
asjAnintallele number for the Ashkenazi Jewish population. Non-zero integer.
asjHcintcount of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer
sasAffloatallele frequency for the South Asian population. Range: 0 - 1.0
sasAcintallele count for the South Asian population Integer.
sasAnintallele number for the South Asian population. Non-zero integer.
sasHcintcount of homozygous individuals for the South Asian population. Non-negative integer.
failedFilterboolTrue if this variant failed any filters (Note: we do not list the failed filters)
lowComplexityRegionboolTrue if this variant is located in a low complexity region.

LoF Gene Metrics

Tab delimited file example

gene transcript obs_mis exp_mis oe_mis mu_mis possible_mis obs_mis_pphen exp_mis_pphen oe_mis_pphen possible_mis_pphen obs_syn exp_syn oe_syn mu_syn possible_syn obs_lof mu_lof possible_lof exp_lof pLI pNull pRec oe_lof oe_syn_lower oe_syn_upper oe_mis_lower oe_mis_upper oe_lof_lower oe_lof_upper constraint_flag syn_zmis_z lof_z oe_lof_upper_rank oe_lof_upper_bin oe_lof_upper_bin_6 n_sites classic_caf max_af no_lofs obs_het_lof obs_hom_lof defined p exp_hom_lof classic_caf_afr classic_caf_amr classic_caf_asj classic_caf_eas classic_caf_fin classic_caf_nfe classic_caf_oth classic_caf_sas p_afr p_amr p_asj p_eas p_fin p_nfep_oth p_sas transcript_type gene_id transcript_level cds_length num_coding_exons gene_type gene_length exac_pLI exac_obs_lof exac_exp_lof exac_oe_lof brain_expression chromosome start_positionend_position
MED13 ENST00000397786 871 1.1178e+03 7.7921e-01 5.5598e-05 14195 314 5.2975e+02 5.9273e-01 6708 422 3.8753e+02 1.0890e+00 1.9097e-05 4248 0 4.9203e-06 1257 9.8429e+01 1.0000e+00 8.9436e-40 1.8383e-16 0.0000e+00 1.0050e+00 1.1800e+00 7.3600e-01 8.2400e-01 0.0000e+00 3.0000e-02 -1.3765e+00 2.6232e+00 9.1935e+00 0 0 0 2 1.2058e-05 8.0492e-06 124782 3 0 124785 1.2021e-05 1.8031e-05 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 9.2812e-05 8.8571e-06 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 9.2760e-05 8.8276e-06 0.0000e+00 0.0000e+00 protein_coding ENSG00000108510 2 6522 30 protein_coding 122678 1.0000e+00 0 6.4393e+01 0.0000e+00 NA 17 60019966 60142643

JSON key to TSV column mapping

JSON keyTSV columnDescription
pLipLIprobability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)
pNullpNullprobability of being completely tolerant of loss of function variation (observed = expected)
pRecpRecprobability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)
synZsyn_zcorrected synonymous Z score
misZmis_zcorrected missense Z score
loeufoe_lof_upperloss of function observed/expected upper bound fraction (LOEUF)

Gene symbol update

The input file provides Ensembl gene ids for each entry. We observed that they were unique while gene symbols may be repeated (multiple lines may have the same gene symbol). Since Ensembl gene Ids are more stable, and Nirvana transcript cache data contains Ensembl gene ids, we use these ids to extract the gene symbols from the transcript cache. For example, if ENSG0001 has gene symbol GENE1 in the input but Nirvana cache say ENSG0001 maps to GENE2, we use GENE2 as the gene symbol for that entry.

Conflict resolution

gnomAD uses Ensembl GeneID as unique identifiers in the source file but Nirvana uses HGNC gene symbols. Multiple Ensembl GeneIDs can map to the same HGNC symbol and therefore may result is conflict.

MDGA2   ENST00000426342 306 4.0043e+02  7.6419e-01  2.1096e-05  4724    78  1.6525e+02  4.7202e-01  1923    125 1.3737e+02  9.0993e-01  7.1973e-06  1413    4   2.0926e-06  453 3.8316e+01  9.9922e-01  8.6490e-12  7.8128e-04  1.0440e-01  7.8600e-01  1.0560e+00  6.9500e-01  8.4000e-01  5.0000e-02  2.3900e-01      8.2988e-01  1.6769e+00  5.1372e+00  1529    0   0   7   2.8103e-05  4.0317e-06  124784  7   0   124791  2.8047e-05  9.8167e-05  0.0000e+00  2.8962e-05  0.0000e+00  0.0000e+00  0.0000e+00  3.5391e-05  1.6672e-04  3.2680e-05  0.0000e+00  2.8962e-05  0.0000e+00  0.0000e+00  0.0000e+00  3.5308e-05  1.6492e-04  3.2678e-05  protein_coding  ENSG00000139915 2   2181    13  protein_coding  835332  9.9322e-01  3   2.7833e+01  1.0779e-01  NA  14  47308826    48144157
MDGA2 ENST00000439988 438 5.5311e+02 7.9189e-01 2.9490e-05 6608 105 2.0496e+02 5.1228e-01 2386 180 1.9491e+02 9.2351e-01 9.8371e-06 2048 11 2.8074e-06 627 5.1882e+01 6.6457e-01 5.5841e-10 3.3543e-01 2.1202e-01 8.1700e-01 1.0450e+00 7.3100e-01 8.5700e-01 1.3200e-01 3.5100e-01 8.3940e-01 1.7393e+00 5.2595e+00 2989 1 0 9 3.6173e-05 4.0463e-06 124782 9 0 124791 3.6061e-05 1.6228e-04 6.4986e-05 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 4.4275e-05 1.6672e-04 3.2680e-05 6.4577e-05 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 4.4135e-05 1.6492e-04 3.2678e-05 protein_coding ENSG00000272781 3 3075 17 protein_coding 832866 NA NA NA NA NA 14 47311134 48143999

In such cases, Nirvana chooses the entry with the smallest "LOEUF" value. The reason for choosing this value can be highlighted by the following table:

LOEUF decileHaplo-insufficientAutosomal DominantAutosomal RecessiveOlfactory Genes
0-10%104140360
10-20%47128721
20-30%17861120
30-40%8801734
40-50%7652068
50-60%4542076
60-70%04615418
70-80%24912049
80-90%0345896
90-100%02640174
Note

List of genes with conflicting entries

MDGA2:
{"pLI":9.99e-1,"pRec":7.81e-4,"pNull":8.65e-12,"synZ":8.30e-1,"misZ":1.68e0,"loeuf":2.39e-1}
{"pLI":6.65e-1,"pRec":3.35e-1,"pNull":5.58e-10,"synZ":8.39e-1,"misZ":1.74e0,"loeuf":3.51e-1}
CRYBG3:
{"pLI":9.27e-5,"pRec":1.00e0,"pNull":1.88e-7,"synZ":1.82e0,"misZ":4.68e-1,"loeuf":4.93e-1}
{"pLI":2.69e-4,"pRec":1.00e0,"pNull":1.20e-4,"synZ":2.63e0,"misZ":9.80e-1,"loeuf":5.98e-1}
CHTF8:
{"pLI":8.29e-1,"pRec":1.67e-1,"pNull":3.21e-3,"synZ":1.94e0,"misZ":9.48e-1,"loeuf":5.13e-1}
{"pLI":3.73e-1,"pRec":5.84e-1,"pNull":4.29e-2,"synZ":3.33e-1,"misZ":2.91e-1,"loeuf":9.92e-1}
SEPT1:
{"pLI":6.77e-8,"pRec":8.90e-1,"pNull":1.10e-1,"synZ":1.58e-1,"misZ":1.57e0,"loeuf":9.68e-1}
{"pLI":1.96e-8,"pRec":6.71e-1,"pNull":3.29e-1,"synZ":1.68e-1,"misZ":1.41e0,"loeuf":1.08e0}
ARL14EPL:
{"pLI":3.48e-2,"pRec":8.38e-1,"pNull":1.28e-1,"synZ":3.56e-1,"misZ":-1.87e-1,"loeuf":1.23e0}
{"pLI":3.23e-2,"pRec":8.29e-1,"pNull":1.38e-1,"synZ":1.15e0,"misZ":-4.05e-1,"loeuf":1.26e0}
UGT2A1:
{"pLI":2.90e-13,"pRec":1.40e-1,"pNull":8.60e-1,"synZ":-1.29e0,"misZ":-1.77e0,"loeuf":1.18e0}
{"pLI":3.88e-17,"pRec":2.87e-3,"pNull":9.97e-1,"synZ":-8.00e-1,"misZ":-1.40e0,"loeuf":1.53e0}
LTB4R2:
{"pLI":4.39e-4,"pRec":6.71e-1,"pNull":3.29e-1,"synZ":-5.24e-1,"misZ":-2.96e-1,"loeuf":1.40e0}
{"pLI":1.38e-5,"pRec":4.12e-1,"pNull":5.88e-1,"synZ":-4.58e-1,"misZ":-2.02e-1,"loeuf":1.54e0}
CDRT1:
{"pLI":4.98e-14,"pRec":5.31e-1,"pNull":4.69e-1,"synZ":8.18e-1,"misZ":6.57e-1,"loeuf":1.00e0}
{"pLI":3.50e-3,"pRec":6.37e-1,"pNull":3.59e-1,"synZ":4.89e-1,"misZ":6.90e-1,"loeuf":1.63e0}
MUC3A:
{"pLI":1.48e-10,"pRec":5.76e-1,"pNull":4.24e-1,"synZ":5.81e-2,"misZ":-6.01e-1,"loeuf":1.06e0}
{"pLI":4.03e-1,"pRec":4.79e-1,"pNull":1.17e-1,"synZ":4.05e-2,"misZ":-1.60e-1,"loeuf":1.70e0}
COG8:
{"pLI":2.97e-9,"pRec":5.04e-1,"pNull":4.96e-1,"synZ":-1.35e0,"misZ":-9.37e-2,"loeuf":1.13e0}
{"pLI":2.31e-3,"pRec":5.47e-1,"pNull":4.50e-1,"synZ":-4.94e-1,"misZ":-1.48e-1,"loeuf":1.76e0}
AC006486.1:
{"pLI":9.37e-1,"pRec":6.27e-2,"pNull":2.47e-4,"synZ":1.44e0,"misZ":2.12e0,"loeuf":3.41e-1}
{"pLI":1.14e-1,"pRec":6.16e-1,"pNull":2.70e-1,"synZ":-7.57e-2,"misZ":8.33e-2,"loeuf":1.84e0}
AL645922.1:
{"pLI":4.67e-16,"pRec":1.00e0,"pNull":4.15e-5,"synZ":7.99e-1,"misZ":1.61e0,"loeuf":6.92e-1}
{"pLI":1.60e-3,"pRec":2.78e-1,"pNull":7.21e-1,"synZ":-7.30e-2,"misZ":3.21e-1,"loeuf":1.96e0}
NBPF20:
{"pLI":1.42e-7,"pRec":3.40e-2,"pNull":9.66e-1,"synZ":-1.86e0,"misZ":-2.88e0,"loeuf":1.97e0}
{"pLI":1.92e-22,"pRec":7.96e-6,"pNull":1.00e0,"synZ":-9.73e0,"misZ":-7.67e0,"loeuf":1.97e0}
PRAMEF11:
{"pLI":6.16e-4,"pRec":7.42e-1,"pNull":2.58e-1,"synZ":-4.02e0,"misZ":-3.69e0,"loeuf":1.31e0}
{"synZ":-3.33e0,"misZ":-2.59e0}
FAM231D:
{"synZ":-1.98e0,"misZ":-1.44e0}
{"synZ":1.07e0,"misZ":3.13e-1}

Conflict resolution

  • Pick the entry with the lowest LOEUF score
  • If the same, pick the lowest pLI
  • Otherwise pick the entry with the max absolute value of synZ + misZ

Download URL

https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz

JSON output

"gnomAD":{ 
"pLi":1.00e0,
"pNull":8.94e-40,
"pRec":1.84e-16,
"synZ":-8.44e-2,
"misZ":5.96e-1,
"loeuf":1.13e0
}
FieldTypeNotes
pLifloatprobability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)
pNullfloatprobability of being completely tolerant of loss of function variation (observed = expected)
pRecfloatprobability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)
synZfloatcorrected synonymous Z score
misZfloatcorrected missense Z score
loeuffloatloss of function observed/expected upper bound fraction (LOEUF)
- - - - \ No newline at end of file diff --git a/3.17/data-sources/mito-heteroplasmy/index.html b/3.17/data-sources/mito-heteroplasmy/index.html deleted file mode 100644 index b777be8c..00000000 --- a/3.17/data-sources/mito-heteroplasmy/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Mitochondrial Heteroplasmy | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

Mitochondrial Heteroplasmy

Overview

Mitochondrial Heteroplasmy is an aggregate population data set that characterizes the amount of heteroplasmy observed for each variant. The latest version of this data set is based on re-processed 1000 Genomes Project data using the Illumina DRAGEN pipeline.

JSON File

Example

{
"T:C":{
"ad":[
1,
1,
1,
1,
1,
1
],
"allele_type":"alt",
"vrf":[
0.002369668246445498,
0.0024937655860349127,
0.0016129032258064516,
0.0025188916876574307,
0.0022935779816513763,
0.002008032128514056
],
"vrf_stats":{
"kurtosis":38.889891511122556,
"max":0.0025188916876574307,
"mean":5.4052190471990743e-05,
"min":0.0,
"nobs":246,
"skewness":6.346664692283075,
"stdev":0.0003461416264750575,
"variance":1.1981402557879823e-07
}
}
}

Parsing

From the JSON file, we're mainly interested in the following keys:

  • variant (i.e. T:C)
  • ad
  • vrf
  • nobs (number of observations)
Adjusting for null observations

The nobs value indicates how many observations were made. Ideally this would have been represented in the ad and vrf arrays, but it's left as an exercise for the reader.

Binning VRF Data

The vrf (variant read frequency) array in the JSON object above is paired with with the ad array (allele depths) shown above.

The data in the JSON object has a crazy number of significant digits. This means that as the number of samples increase, this array will grow. To make this more future-proof, Nirvana bins everything according to 0.1% increments.

With the binned data, we end up having 775 distinct vrf values in the entire JSON file. This also means that the variant with the largest number of VRFs would originally have 246 entries, but due to binning this will decrease to 143.

Pre-processing the Data

The JSON file is converted into a small TSV file that is embedded in Nirvana. Here is an example of the TSV file:

#CHROM  POS REF ALT VRF_BINS    VRF_COUNTS
chrM 1 G . 0.981,0.987,0.988,0.989,0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.998,0.999 1,2,2,4,7,8,11,19,43,60,48,64,499,1736
chrM 2 A . 0.981,0.987,0.988,0.989,0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.998,0.999 1,2,2,4,7,8,11,19,43,60,48,64,499,1736

Algorithm

Nirvana will calculate mitochondrial heteroplasmy data for every sample in the VCF. Using the computed VRF for each sample, we compute where in the empirical mitochondrial heteroplasmy distribution that VRF occurs and express that as a percentile.

Percentiles

Nirvana uses the statistical definition of percentile (indicating the value below which a given percentage of observations in a group of observations falls). Unless the sample's VRF is higher than all the VRFs represented in the distribution, the range will be [0, 1).

Download URL

Unavailable

The original data set is only available internally at Illumina at the moment.

JSON Output

"samples":[
{
"genotype":"0/1",
"variantFrequencies":[
0.333,
0.5
],
],
"alleleDepths":[
10,
20,
30
],
"heteroplasmyPercentile":[
23.13,
12.65
]
}
]
FieldTypeNotes
heteroplasmyPercentilefloat arrayone percentile for each variant frequency (each alternate allele)
- - - - \ No newline at end of file diff --git a/3.17/data-sources/mitomap-small-variants-json/index.html b/3.17/data-sources/mitomap-small-variants-json/index.html deleted file mode 100644 index c884e801..00000000 --- a/3.17/data-sources/mitomap-small-variants-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -mitomap-small-variants-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

mitomap-small-variants-json

"mitomap":[ 
{
"refAllele":"G",
"altAllele":"A",
"diseases":[
"Bipolar disorder",
"Melanoma"
],
"hasHomoplasmy":false,
"hasHeteroplasmy":true,
"status":"Reported",
"clinicalSignificance":"confirmed pathogenic",
"scorePercentile":83.30,
"numGenBankFullLengthSeqs":2,
"pubMedIds":["2316527","6299878","6301949"],
"isAlleleSpecific":true
}
]
FieldTypeNotes
refAllelestring
altAllelestring
diseasesstring arrayassociated diseases
hasHomoplasmyboolean
hasHeteroplasmyboolean
statusstringrecord status
clinicalSignificancestringpredicted pathogenicity
scorePercentilefloatMitoTIP score
numGenBankFullLengthSeqsinteger# of GenBank full-length sequences
pubMedIdsstring array
isAlleleSpecificbooleantrue when the current variant alternate allele matches the MITOMAP alternate allele
- - - - \ No newline at end of file diff --git a/3.17/data-sources/mitomap-structural-variants-json/index.html b/3.17/data-sources/mitomap-structural-variants-json/index.html deleted file mode 100644 index 9228c354..00000000 --- a/3.17/data-sources/mitomap-structural-variants-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -mitomap-structural-variants-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

mitomap-structural-variants-json

"mitomap":[ 
{
"chromosome":"MT",
"begin":3166,
"end":14152,
"variantType":"deletion",
"reciprocalOverlap":0.18068,
"annotationOverlap":0.42405
}
]
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring array
reciprocalOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
annotationOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
- - - - \ No newline at end of file diff --git a/3.17/data-sources/mitomap/index.html b/3.17/data-sources/mitomap/index.html deleted file mode 100644 index fa70353f..00000000 --- a/3.17/data-sources/mitomap/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -MITOMAP | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

MITOMAP

Overview

MITOMAP provides a compendium of polymorphisms and mutations in human mitochondrial DNA.

Publication

Lott, M.T., Leipzig, J.N., Derbeneva, O., Xie, H.M., Chalkia, D., Sarmady, M., Procaccio, V., and Wallace, D.C. mtDNA variation and analysis using MITOMAP and MITOMASTER. Current Protocols in Bioinformatics 1(123):1.23.1-26 (2013). http://www.mitomap.org

Scraping HTML Pages

Example

MITOMAP is unique in that it doesn't offer the data in a downloadable format. As a result, the annotation content in Nirvana is scraped from the following MITOMAP pages:

  1. mtDNA Control Region Sequence Variants
  2. mtDNA Coding Region & RNA Sequence Variants
  3. Reported Mitochondrial DNA Base Substitution Diseases: rRNA/tRNA mutations
  4. Reported Mitochondrial DNA Base Substitution Diseases: Coding and Control Region Point Mutations
  5. Reported mtDNA Deletions
  6. mtDNA Simple Insertions

Parsing

Here's what the HTML code looks like:

["582","<a href='/MITOMAP/GenomeLoci#MTTF'>MT-TF</a>","Mitochondrial myopathy","T582C","tRNA Phe","-","+","Reported","<span style='display:inline-block;white-space:nowrap;'><a href='/cgi-bin/mitotip?pos=582&alt=C&quart=2'><u>72.90%</u></a> <i class='fa fa-arrow-up' style='color:orange' aria-hidden='true'></i></span>","0","<a href='/cgi-bin/print_ref_list?refs=90165,91590&title=RNA+Mutation+T582C' target='_blank'>2</a>"],
["583","<a href='/MITOMAP/GenomeLoci#MTTF'>MT-TF</a>","MELAS / MM & EXIT","G583A","tRNA Phe","-","+","Cfrm","<span style='display:inline-block;white-space:nowrap;'><a href='/cgi-bin/mitotip?pos=583&alt=A&quart=0'><u>93.10%</u></a> <i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i><i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i><i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i></span>","0","<a href='/cgi-bin/print_ref_list?refs=2066,90532,91590&title=RNA+Mutation+G583A' target='_blank'>3</a>"],

We're mainly interested in the following columns (numbers indicate the HTML page above):

  • Position1,2,3,4
  • Disease3,4
  • Nucleotide Change1,2
  • Allele3,4
  • Homoplasmy3,4
  • Heteroplasmy3,4
  • Status3,4
  • MitoTIP3,4
  • GB Seqs FL(CR)1,2,3,4
  • Deletion Junction5
  • Insert (nt)6
  • Insert Point (nt)6
  • References/Curated References1,2,3,4
MitoTIP

The MitoTIP information is used to populate the clinicalSignificance and scorePercentile JSON keys. The "frequency alert" entries are skipped since it's not directly relevant to clinical significance.

Left alignment

Many of the variants in MITOMAP have not been normalized. As part of our import procedure, we left align all insertions and deletions.

Variant Enumeration

Sometimes MITOMAP provides data that indicates that multiple values have been observed. Some examples of this are C-C(2-8) and A-AC or ACC. Alternate alleles containing IUPAC ambiguity codes are similarly enumerated.

Inversions

MITOMAP inversions are currently treated as MNVs.

Allele Parsing

The following MITOMAP allele parsing conventions are supported:

  • C123T
  • 16021_16022del
  • 8042del2
  • C9537insC
  • 3902_3908invACCTTGC
  • A-AC or ACC
  • C-C(2-8)
  • 8042delAT

PostgreSQL Dump File

Example

COPY mitomap.reference (id, authors, title, publication, editors, volume, number, pages, date, city, publisher, keywords, abstract, nlmid) FROM stdin;
1 Albring, M., Griffith, J. and Attardi, G. Association of a protein structure of probable membrane derivation with HeLa cell mitochondrial DNA near its origin of replication Proceedings of the National Academy of Sciences of the United States of America . 74 4 1348-1352 1977 . . Deoxyribonucleoproteins; DNA Replication; DNA, Mitochondrial; Hela Cells; Membrane Proteins; Microscopy, Electron; Molecular Weight; Neoplasm Proteins; Protein Binding Almost all (about 95 percent) of the mitochondrial DNA molecules released by Triton X-100 lysis of HeLa cell mitochondria in the presence of 0.15 M salt are associated with a single protein-containing structure varying in appearance between a 10-20 nm knob and a 100-500 nm membrane-like patch. Analysis by high resolution electron microscopy and by polyacrylamide gel electrophoresis after cleavage of mitochondrial DNA with the endonucleases EcoRI, HindIII, and Hpa II has shown that the protein structure is attached to the DNA in the region of the D-loop, and probably near the origin of mitochondrial DNA replication. The data strongly suggest that HeLa cell mitochondrial DNA is attached in vivo to the inner mitochondrial membrane at or near the origin of replication, and that a membrane fragment of variable size remains associated with the DNA during the isolation. After sodium dodecyl sulfate extraction of mitochondrial DNA, a small 5-10 nm protein is found at the same site on a fraction of the mitochondrial DNA molecules. 266177
2 Anderson, S., Bankier, A.T., Barrell, B.G., de Bruijn, M.H., Coulson, A.R., Drouin, J., Eperon, I.C., Nierlich, D.P., Roe, B.A., Sanger, F., Schreier, P.H., Smith, A.J., Staden, R., Young, I.G. Sequence and organization of the human mitochondrial genome Nature . 290 5806 457-465 1981 . . Base Sequence; Codon; DNA Replication; mtDNA; Evolution; Genes, Structural; Human; Nucleic Acid Precursors; Peptide Chain Initiation; Peptide Chain Termination; RNA, Ribosomal; RNA, Transfer; Transcription, Genetic The complete sequence of the 16,569-base pair human mitochondrial genome is presented. The genes for the 12S and 16S rRNAs, 22 tRNAs, cytochrome c oxidase subunits I, II and III, ATPase subunit 6, cytochrome b and eight other predicted protein coding genes have been located. The sequence shows extreme economy in that the genes have none or only a few noncoding bases between them, and in many cases the termination codons are not coded in the DNA but are created post- transcriptionally by polyadenylation of the mRNAs. 7219534

Parsing

From the PostgreSQL dump file, we're interested in parsing the mapping between reference IDs and the PubMed IDs:

  • id
  • nlmid
Why not use the PostgreSQL file for everything?

Ideally we would use this file for parsing all of our data, but the schema contains 80+ tables and we haven't invested the time yet to see how the tables are linked together to produce the 6 main HTML pages that we're interested in.

Known Issues

Duplicated records

Multiple records describing the same nucleotide change are merged into the same record. If any conflicting information is found (homoplasmy, heteroplasmy, status, clinical significance, score percentile, end coordinate, variant type), an exception is thrown.

  • For diseases and PubMed IDs, we take the union of the values in the duplicated records.
  • For full length GenBank sequences, we take the largest number from each of the duplicated records since it provides the strongest evidence for this variant.
Skipped records

Records that represent an alternate notation of the original variant are skipped. Similarly some variants with confusing alleles (T961delT+ / -C(n)ins) are also skipped.

Download URLs

JSON Output

Small Variants

"mitomap":[ 
{
"refAllele":"G",
"altAllele":"A",
"diseases":[
"Bipolar disorder",
"Melanoma"
],
"hasHomoplasmy":false,
"hasHeteroplasmy":true,
"status":"Reported",
"clinicalSignificance":"confirmed pathogenic",
"scorePercentile":83.30,
"numGenBankFullLengthSeqs":2,
"pubMedIds":["2316527","6299878","6301949"],
"isAlleleSpecific":true
}
]
FieldTypeNotes
refAllelestring
altAllelestring
diseasesstring arrayassociated diseases
hasHomoplasmyboolean
hasHeteroplasmyboolean
statusstringrecord status
clinicalSignificancestringpredicted pathogenicity
scorePercentilefloatMitoTIP score
numGenBankFullLengthSeqsinteger# of GenBank full-length sequences
pubMedIdsstring array
isAlleleSpecificbooleantrue when the current variant alternate allele matches the MITOMAP alternate allele

Structural Variants

"mitomap":[ 
{
"chromosome":"MT",
"begin":3166,
"end":14152,
"variantType":"deletion",
"reciprocalOverlap":0.18068,
"annotationOverlap":0.42405
}
]
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring array
reciprocalOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
annotationOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
- - - - \ No newline at end of file diff --git a/3.17/data-sources/omim-json/index.html b/3.17/data-sources/omim-json/index.html deleted file mode 100644 index c1db8fa0..00000000 --- a/3.17/data-sources/omim-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -omim-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

omim-json

"omim":[ 
{
"mimNumber":600678,
"geneName":"MutS, E. coli, homolog of, 6",
"description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",
"phenotypes":[
{
"mimNumber":614350,
"phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",
"description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal dominant"
]
},
{
"mimNumber":608089,
"phenotype":"Endometrial cancer, familial",
"mapping":"molecular basis of the disorder is known"
},
{
"mimNumber":276300,
"phenotype":"Mismatch repair cancer syndrome",
"description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal recessive"
],
"comments" : [
"contribute to susceptibility to multifactorial disorders or to susceptibility to infection",
"unconfirmed or possibly spurious mapping"
]
}
]
}
]
FieldTypeNotes
mimNumberintOMIM ID for gene
geneNamestringgene name
descriptionstring
phenotypesobject arraysee Phenotype entry below

Phenotype

FieldTypeNotes
mimNumberint
phenotypestring
descriptionstring
mappingstringsee possible values below
inheritancestring arraysee possible values below
commentsstring arraysee possible values below

Mapping

  1. disorder was positioned by mapping of the wild type gene
  2. disease phenotype itself was mapped
  3. molecular basis of the disorder is known
  4. disorder is a chromosome deletion or duplication syndrome

Inheritance

  • autosomal recessive
  • autosomal dominant

Comments

  • contributes to the susceptibility to multifactorial disorders
  • variations that lead to apparently abnormal laboratory test values
  • unconfirmed mapping
- - - - \ No newline at end of file diff --git a/3.17/data-sources/omim/index.html b/3.17/data-sources/omim/index.html deleted file mode 100644 index 3eb2fd36..00000000 --- a/3.17/data-sources/omim/index.html +++ /dev/null @@ -1,23 +0,0 @@ - - - - - - - -OMIM | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

OMIM

Overview

OMIM is a comprehensive, authoritative compendium of human genes and genetic phenotypes that is freely available and updated daily.

Publications

Amberger JS, Bocchini CA, Scott AF, Hamosh A. OMIM.org: leveraging knowledge across phenotype-gene relationships. Nucleic Acids Res. 2019 Jan 8;47(D1):D1038-D1043. doi:10.1093/nar/gky1151. PMID: 30445645.

Amberger JS, Bocchini CA, Schiettecatte FJM, Scott AF, Hamosh A. OMIM.org: Online Mendelian Inheritance in Man (OMIM®), an online catalog of human genes and genetic disorders. Nucleic Acids Res. 2015 Jan;43(Database issue):D789-98. PMID: 25428349.

Parse OMIM data

Nirvana uses gene symbols as the gene identifiers internally. To generate the OMIM database, we first map the MIM numbers, which are the primary identifiers used by OMIM, to gene symbols supported by Nirvana. Please note that there can be multiple MIM numbers mapped to one gene symbol. Only MIM numbers successfully mapped to a Nirvana gene symbol are further processed. The OMIM API is used to fetch all the information associated with a gene MIM number, except the gene symbols.

mim2gene.txt

This mim2gene.txt (http://omim.org/static/omim/data/mim2gene.txt) file provides the mapping between MIM numbers and gene symbols. An example of this file is given below:

# MIM Number    MIM Entry Type (see FAQ 1.3 at https://omim.org/help/faq)   Entrez Gene ID (NCBI)   Approved Gene Symbol (HGNC) Ensembl Gene ID (Ensembl)
100050 predominantly phenotypes
100070 phenotype 100329167
100100 phenotype
100200 predominantly phenotypes
100300 phenotype
100500 moved/removed
100600 phenotype
100640 gene 216 ALDH1A1 ENSG00000165092
100650 gene/phenotype 217 ALDH2 ENSG00000111275
100660 gene 218 ALDH3A1 ENSG00000108602
100670 gene 219 ALDH1B1 ENSG00000137124
100675 predominantly phenotypes
100678 gene 39 ACAT2 ENSG00000120437

The information in the "Entrez Gene ID (NCBI)", "Approved Gene Symbol (HGNC)" and "Ensembl Gene ID (Ensembl)" columns are used to find the proper gene symbol supported by Nirvana, which may or may not be the same as the gene symbol listed here.

OMIM API

Nirvana retrieves the OMIM annotations from the OMIM API JSON responses. The "entry" handler is used to fetch all the annotations associated with a given OMIM gene. A sample JSON response from the API is provided there.

{
"omim": {
"version": "1.0",
"entryList": [
{
"entry": {
"prefix": "*",
"mimNumber": 100640,
"status": "live",
"titles": {
"preferredTitle": "ALDEHYDE DEHYDROGENASE 1 FAMILY, MEMBER A1; ALDH1A1",
"alternativeTitles": "ALDEHYDE DEHYDROGENASE 1; ALDH1;;\nACETALDEHYDE DEHYDROGENASE 1;;\nALDH, LIVER CYTOSOLIC;;\nRETINAL DEHYDROGENASE 1; RALDH1"
},
"textSectionList": [
{
"textSection": {
"textSectionName": "description",
"textSectionTitle": "Description",
"textSectionContent": "The ALDH1A1 gene encodes a liver cytosolic isoform of acetaldehyde dehydrogenase ({EC 1.2.1.3}), an enzyme involved in the major pathway of alcohol metabolism after alcohol dehydrogenase (ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650}), variation in which has been implicated in different responses to alcohol ingestion.\n\nALDH1 is associated with a low Km for NAD, a high Km for acetaldehyde, and is strongly inactivated by disulfiram. ALDH2 is associated with a high Km for NAD, and low Km for acetaldehyde, and is insensitive to inhibition by disulfiram ({4:Hsu et al., 1985})."
}
}
],
"geneMap": {
"sequenceID": 7709,
"chromosome": 9,
"chromosomeSymbol": "9",
"chromosomeSort": 225,
"chromosomeLocationStart": 72900670,
"chromosomeLocationEnd": 72953052,
"transcript": "ENST00000297785.7",
"cytoLocation": "9q21",
"computedCytoLocation": "9q21.13",
"mimNumber": 100640,
"geneSymbols": "ALDH1A1",
"geneName": "Aldehyde dehydrogenase-1 family, member A1, soluble",
"mappingMethod": "REa, A",
"confidence": "P",
"mouseGeneSymbol": "Aldh1a1",
"mouseMgiID": "MGI:1353450",
"geneInheritance": null
},
"externalLinks": {
"geneIDs": "216",
"hgncID": "402",
"ensemblIDs": "ENSG00000165092,ENST00000297785.8",
"approvedGeneSymbols": "ALDH1A1",
"ncbiReferenceSequences": "1519246465",
"proteinSequences": "194378740,211947843,2183299,178400,119582947,119582948,178372,40807656,194375548,30582681,209402710,4262707,194739599,4261625,178394,261487497,16306661,21361176,32815082,118495,62089228",
"uniGenes": "Hs.76392",
"swissProtIDs": "P00352",
"decipherGene": false,
"umlsIDs": "C1412333",
"gtr": true,
"cmgGene": false,
"keggPathways": true,
"gwasCatalog": false,

}
}
},
{
"entry": {
"prefix": "*",
"mimNumber": 102560,
"status": "live",
"titles": {
"preferredTitle": "ACTIN, GAMMA-1; ACTG1",
"alternativeTitles": "ACTIN, GAMMA; ACTG;;\nCYTOSKELETAL GAMMA-ACTIN;;\nACTIN, CYTOPLASMIC, 2"
},
"textSectionList": [
{
"textSection": {
"textSectionName": "description",
"textSectionTitle": "Description",
"textSectionContent": "Actins are a family of highly conserved cytoskeletal proteins that play fundamental roles in nearly all aspects of eukaryotic cell biology. The ability of a cell to divide, move, endocytose, generate contractile force, and maintain shape is reliant upon functional actin-based structures. Actin isoforms are grouped according to expression patterns: muscle actins predominate in striated and smooth muscle (e.g., ACTA1, {102610}, and ACTA2, {102620}, respectively), whereas the 2 cytoplasmic nonmuscle actins, gamma-actin (ACTG1) and beta-actin (ACTB; {102630}), are found in all cells ({13:Sonnemann et al., 2006})."
}
}
],
"geneMap": {
"sequenceID": 13666,
"chromosome": 17,
"chromosomeSymbol": "17",
"chromosomeSort": 947,
"chromosomeLocationStart": 81509970,
"chromosomeLocationEnd": 81512798,
"transcript": "ENST00000331925.7",
"cytoLocation": "17q25.3",
"computedCytoLocation": "17q25.3",
"mimNumber": 102560,
"geneSymbols": "ACTG1, DFNA20, DFNA26, BRWS2",
"geneName": "Actin, gamma-1",
"mappingMethod": "REa, A, Fd",
"confidence": "C",
"mouseGeneSymbol": "Actg1",
"mouseMgiID": "MGI:87906",
"geneInheritance": null,
"phenotypeMapList": [
{
"phenotypeMap": {
"mimNumber": 102560,
"phenotype": "Baraitser-Winter syndrome 2",
"phenotypeMimNumber": 614583,
"phenotypicSeriesNumber": "PS243310",
"phenotypeMappingKey": 3,
"phenotypeInheritance": "Autosomal dominant"
}
},
{
"phenotypeMap": {
"mimNumber": 102560,
"phenotype": "Deafness, autosomal dominant 20/26",
"phenotypeMimNumber": 604717,
"phenotypicSeriesNumber": "PS124900",
"phenotypeMappingKey": 3,
"phenotypeInheritance": "Autosomal dominant"
}
}
]
}
}
}
]
}
}

Content from the OMIM API JSON response is reorganized as shown in the Nirvana JSON Output

Mappings between the Nirvana JSON output and OMIM JSON API are listed in the table below:

Nirvana JSON key chainOMIM API JSON key chain
omim:mimNumberomim:entryList:entry:mimNumber
omim:geneNameomim:entryList:entry:geneMap:geneName
omim:descriptionomim:entryList:entry:textSectionList:textSection:textSectionContent
omim:phenotypes:mimNumberomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:mimNumber
omim:phenotypes:phenotypeomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotype
omim:phenotypes:descriptionomim:entryList:entry:textSectionList:textSection:textSectionContent
omim:phenotypes:mappingomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotypeMappingKey (see mapping below)
omim:phenotypes:inheritancesomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotypeInheritance
omim:phenotypes:commentsomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotype (see mapping below)

Mapping key to content

1 to disorder was positioned by mapping of the wild type gene
-2 to disease phenotype itself was mapped
-3 to molecular basis of the disorder is known
-4 to disorder is a chromosome deletion or duplication syndrome

Phenotype character to comment

? to unconfirmed or possibly spurious mapping
-[/] to nondiseases
-{/} to contribute to susceptibility to multifactorial disorders or to susceptibility to infection

There are different types of link in the OMIM description section. For example, in above JSON response, we have the description of MIM entry 100640:

The ALDH1A1 gene encodes a liver cytosolic isoform of acetaldehyde dehydrogenase ({EC 1.2.1.3}), an enzyme involved in the major pathway of alcohol metabolism after alcohol dehydrogenase (ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650}), variation in which has been implicated in different responses to alcohol ingestion.\n\nALDH1 is associated with a low Km for NAD, a high Km for acetaldehyde, and is strongly inactivated by disulfiram. ALDH2 is associated with a high Km for NAD, and low Km for acetaldehyde, and is insensitive to inhibition by disulfiram ({4:Hsu et al., 1985}).

As the descriptions will be shown as plain text, we remove the curry brackets surrounding links and try to make the text still readable with minimal modifications. Briefly:

  • Links referring to another MIM entry (e.g. {100650}) will be removed. Any word(s) specifically associated with the removed link will also be removed. For example, "(ADH, see {103700})" will become "(ADH)" after the process.
  • Links referring to a literature reference will be processed to remove the internal index and curry brackets. For example, "{4:Hsu et al., 1985}" becomes "Hsu et al., 1985".
  • All the other links will simple have their curry brackets removed. For example, "{EC 1.2.1.3}" becomes "EC 1.2.1.3".
  • If the content within a pair of parentheses becomes empty after being processed, the parentheses need to be removed as well and its surrounding white spaces should be properly processed. For example, "ALDH2 ({100650})," will become "ALDH2,".

Here is a list of examples about how the description section supposed to be processed:

Original textProcessed text
({516030}, {516040}, and {516050})
(e.g., D1, {168461}; D2, {123833}; D3, {123834})(e.g., D1; D2; D3)
(desmocollins; see DSC2, {125645})(desmocollins; see DSC2)
(e.g., see {102700}, {300755})
(ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650})(ADH). See also liver mitochondrial ALDH2
(see, e.g., CACNA1A; {601011})(see, e.g., CACNA1A)
(e.g., GSTA1; {138359}), mu (e.g., {138350})(e.g., GSTA1), mu
(NFKB; see {164011})(NFKB)
(see ISGF3G, {147574})(see ISGF3G)
(DCK; {EC 2.7.1.74}; {125450})(DCK; EC 2.7.1.74)

JSON output

"omim":[ 
{
"mimNumber":600678,
"geneName":"MutS, E. coli, homolog of, 6",
"description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",
"phenotypes":[
{
"mimNumber":614350,
"phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",
"description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal dominant"
]
},
{
"mimNumber":608089,
"phenotype":"Endometrial cancer, familial",
"mapping":"molecular basis of the disorder is known"
},
{
"mimNumber":276300,
"phenotype":"Mismatch repair cancer syndrome",
"description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal recessive"
],
"comments" : [
"contribute to susceptibility to multifactorial disorders or to susceptibility to infection",
"unconfirmed or possibly spurious mapping"
]
}
]
}
]
FieldTypeNotes
mimNumberintOMIM ID for gene
geneNamestringgene name
descriptionstring
phenotypesobject arraysee Phenotype entry below

Phenotype

FieldTypeNotes
mimNumberint
phenotypestring
descriptionstring
mappingstringsee possible values below
inheritancestring arraysee possible values below
commentsstring arraysee possible values below

Mapping

  1. disorder was positioned by mapping of the wild type gene
  2. disease phenotype itself was mapped
  3. molecular basis of the disorder is known
  4. disorder is a chromosome deletion or duplication syndrome

Inheritance

  • autosomal recessive
  • autosomal dominant

Comments

  • contributes to the susceptibility to multifactorial disorders
  • variations that lead to apparently abnormal laboratory test values
  • unconfirmed mapping

Building the supplementary files

The first step in builing the OMIM .nga files is to use the SAUtils command's subcommand downloadOMIM to download the necessary data. In order to download the data the user must possess an API key obtained from OMIM. This key has to be set as the environment variable OmimApiKey.

export OmimApiKey=<users-omim-api-key>
dotnet NirvanaBuild/SAUtils.dll downloadOMIM
---------------------------------------------------------------------------
SAUtils (c) 2021 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0
---------------------------------------------------------------------------

USAGE: dotnet SAUtils.dll downloadomim [options]
Download the OMIM gene annotation data

OPTIONS:
--uga, -u <path> universal gene archive path
--ref, -r <filename> input reference filename
--out, -o <VALUE> output directory
--help, -h displays the help menu
--version, -v displays the version

dotnet NirvanaBuild/SAUtils.dll downloadOMIM --ref References/7/Homo_sapiens.GRCh38.Nirvana.dat --uga Cache/27/UGA.tsv.gz --out ExternalDataSources/OMIM/2021-06-14
---------------------------------------------------------------------------
SAUtils (c) 2021 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0
---------------------------------------------------------------------------

Unable to resolve gene symbol conflict for CD300H: Ensembl: [ENSG00000284690]: AC079325.2, Entrez Gene: [100130520]: LOC100130520
Unable to resolve gene symbol conflict for STRIT1: Ensembl: [ENSG00000240045]: DWORF, Entrez Gene: [100507537]: LOC100507537
Unable to resolve gene symbol conflict for WAKMAR2: Ensembl: [ENSG00000237499]: AL357060.2, Entrez Gene: [100130476]: LOC100130476
Unable to resolve gene symbol conflict for PERCC1: Ensembl: [ENSG00000284395]: AL032819.3, Entrez Gene: [105371045]: LOC105371045
Unable to resolve gene symbol conflict for LASTR: Ensembl: [ENSG00000242147]: AL365356.5, Entrez Gene: [105376382]: LOC105376382
Unable to resolve gene symbol conflict for PRANCR: Ensembl: [ENSG00000257815]: LINC01481, Entrez Gene: [101928062]: LOC101928062
Unable to resolve gene symbol conflict for THORLNC: Ensembl: [ENSG00000226856]: AC093901.1, Entrez Gene: [100506797]: LOC100506797
Gene Symbol Update Statistics
============================================
# of gene symbols already up-to-date: 15,952
# of gene symbols updated: 330
# of genes where both IDs are null: 0
# of gene symbols not in cache: 148
# of resolved gene symbol conflicts: 15
# of unresolved gene symbol conflicts: 7

Time: 00:02:38.2

Once the download has succeeded, the nga files can be produced using the SAUtils command's subcommand omim.

dotnet NirvanaBuild/SAUtils.dll omim
---------------------------------------------------------------------------
SAUtils (c) 2021 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0
---------------------------------------------------------------------------

USAGE: dotnet SAUtils.dll omim [options]
Creates a gene annotation database from OMIM data

OPTIONS:
--m2g, -m <VALUE> MimToGeneSymbol tsv file
--json, -j <VALUE> OMIM entry json file
--out, -o <VALUE> output directory
--help, -h displays the help menu
--version, -v displays the version


dotnet NirvanaBuild/SAUtils.dll omim --m2g ExternalDataSources/OMIM/2021-06-14/MimToGeneSymbol.tsv --json ExternalDataSources/OMIM/2021-06-14/MimEntries.json.gz --out SupplementaryDatabase/63/
---------------------------------------------------------------------------
SAUtils (c) 2021 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0
---------------------------------------------------------------------------


Time: 00:00:04.5
- - - - \ No newline at end of file diff --git a/3.17/data-sources/phylop-json/index.html b/3.17/data-sources/phylop-json/index.html deleted file mode 100644 index b03d553e..00000000 --- a/3.17/data-sources/phylop-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -phylop-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

phylop-json

"variants":[
{
"vid":"2:48010488:A",
"chromosome":"chr2",
"begin":48010488,
"end":48010488,
"refAllele":"G",
"altAllele":"A",
"variantType":"SNV",
"phylopScore":0.459
}
]
FieldTypeNotes
phylopScorefloatrange: -14.08 to 6.424
- - - - \ No newline at end of file diff --git a/3.17/data-sources/phylop/index.html b/3.17/data-sources/phylop/index.html deleted file mode 100644 index 94518b6a..00000000 --- a/3.17/data-sources/phylop/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -PhyloP | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

PhyloP

Overview

PhyloP (phylogenetic p-values) conservation scores are obtained from the [PHAST package] (http://compgen.bscb.cornell.edu/phast/) for multiple alignments of vertebrate genomes to the human genome. For GRCh38, the multiple alignments are against 19 mammals and for GRCh37, it is against 45 vertebrate genomes.

Publication

Siepel A, Bejerano G, Pedersen JS, Hinrichs AS, Hou M, Rosenbloom K, Clawson H, Spieth J, Hillier LW, Richards S, et al. Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. Genome Res. 2005 Aug;15(8):1034-50. (http://www.genome.org/cgi/doi/10.1101/gr.3715005)

WigFix File

The data is provided in WigFix files which is a text file that provides conservation scores for contiguous intervals in the following format:

fixedStep chrom=chr1 start=10918 step=1
0.064
0.058
0.064
0.058
0.064
0.064
fixedStep chrom=chr1 start=34045 step=1
0.111
0.100
0.111
0.111
0.100
0.111
0.111
0.111
0.100
0.111
-1.636

We convert them to binary files with indexes for fast query. Note that these are scores for genomic positions and are reported only for SNVs.

Download URL

GRCh37: http://hgdownload.cse.ucsc.edu/goldenpath/hg19/phyloP46way/vertebrate/

GRCh38: http://hgdownload.cse.ucsc.edu/goldenPath/hg38/phyloP20way/

JSON Output

Unlike other supplemetary datasources, phyloP scores are reported in the variants section.

"variants":[
{
"vid":"2:48010488:A",
"chromosome":"chr2",
"begin":48010488,
"end":48010488,
"refAllele":"G",
"altAllele":"A",
"variantType":"SNV",
"phylopScore":0.459
}
]
FieldTypeNotes
phylopScorefloatrange: -14.08 to 6.424
- - - - \ No newline at end of file diff --git a/3.17/data-sources/primate-ai-json/index.html b/3.17/data-sources/primate-ai-json/index.html deleted file mode 100644 index b79d8c31..00000000 --- a/3.17/data-sources/primate-ai-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -primate-ai-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

primate-ai-json

"primateAI":[
{
"hgnc":"TP53",
"scorePercentile":0.3,
}
]
FieldTypeNotes
hgncstring
scorePercentilefloatrange: 0 - 1.0
- - - - \ No newline at end of file diff --git a/3.17/data-sources/primate-ai/index.html b/3.17/data-sources/primate-ai/index.html deleted file mode 100644 index 957255f8..00000000 --- a/3.17/data-sources/primate-ai/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Primate AI | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

Primate AI

Overview

Primate AI is a deep residual neural network for classifying the pathogenicity of missense mutations. The method is described in the publication:

Publication

Sundaram, L., Gao, H., Padigepati, S.R. et al. Predicting the clinical impact of human mutation with deep neural networks. Nat Genet 50, 1161–1170 (2018). https://doi.org/10.1038/s41588-018-0167-z

TSV File

Example

chr pos ref alt refAA   altAA   strand_1pos_0neg    trinucleotide_context   UCSC_gene   ExAC_coverage   primateDL_score
chr10 1046704 C T R C 1 CCG uc001ift.3 45.49 0.849114537239
chr10 1046704 C G R G 1 CCG uc001ift.3 45.49 0.795686006546

Parsing

From the TSV file, we're mainly interested in the following columns:

  • chr
  • pos
  • ref
  • alt
  • primateDL_score

We also use UCSC_gene to filter out variants that don't have matching gene models in Nirvana.

Pre-processing

Converting UCSC IDs

Primate AI only provides UCSC IDs. As an initial pre-processing step, we'll need to convert these to either Entrez or Ensembl Gene IDs.

The following queries are used to download the conversions from UCSC:

mysql -h genome-mysql.soe.ucsc.edu -u genome -A -P 3306 \
-e "select * FROM knownToLocusLink;" hg19 > ucsc_locuslink.tsv

mysql -h genome-mysql.soe.ucsc.edu -u genome -A -P 3306 \
-e "select knownToEnsembl.name, knownToEnsembl.value, ensGene.name2 FROM knownToEnsembl, ensGene WHERE knownToEnsembl.value = ensGene.name;" \
hg19 > ucsc_ensembl.tsv

Running the Pre-Processor

The Primate AI pre-processor can be run as follows:

dotnet PrimateAiPreProcessor.dll UGA_develop.tsv PrimateAI_scores_v0.2.tsv.gz \
ucsc_locuslink.tsv ucsc_ensembl.tsv PrimateAI_0.2_GRCh37.tsv.gz

During conversion, 0.5% of the UCSC Ids cannot be converted to either Entrez or Ensembl gene IDs. Once the gene IDs have been acquired, we check to see which are available in Nirvana.

The following Entrez Gene IDs were not found:

399753
401980
504189
504191
100293534

Here is the output from the pre-processor:

- loading UCSC to Entrez Gene ID dictionary... 73,432 genes loaded.
- loading UCSC to Ensembl Gene ID dictionary... 76,178 genes loaded.
- loading UGA gene ID to gene dictionary... 103,277 genes loaded.
- parsing Primate AI variants... 70,121,953 variants parsed.

# variants with unknown gene ID: 27,253 / 70,121,953
# genes with unknown gene ID: 109 / 19,614

# variants not in UGA: 2,036 / 70,121,953
# genes not in UGA: 6 / 19,614

Known Issues

Known Issues

The Primate AI data set provides raw scores, but the scores are biased according to gene context. I.e. a 0.4 means something different in TP53 than it does in KRAS.

As a result, the Primate AI team provided guidance on aggregating these scores and presenting them as percentiles with respect to the associated gene. According to their research, the 25th percentile is a good proxy for benign variants and the 75th percentile is a good proxy for pathogenic variants.

Download URL

https://basespace.illumina.com/s/cPgCSmecvhb4

JSON Output

"primateAI":[
{
"hgnc":"TP53",
"scorePercentile":0.3,
}
]
FieldTypeNotes
hgncstring
scorePercentilefloatrange: 0 - 1.0
- - - - \ No newline at end of file diff --git a/3.17/data-sources/revel-json/index.html b/3.17/data-sources/revel-json/index.html deleted file mode 100644 index a31ce859..00000000 --- a/3.17/data-sources/revel-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -revel-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

revel-json

"revel":{ 
"score":0.027
}
FieldTypeNotes
scorefloatRange: 0 - 1.0
- - - - \ No newline at end of file diff --git a/3.17/data-sources/revel/index.html b/3.17/data-sources/revel/index.html deleted file mode 100644 index c29b61e5..00000000 --- a/3.17/data-sources/revel/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -REVEL | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

REVEL

Overview

REVEL is an ensemble method for predicting the pathogenicity of missense variants based on a combination of scores from 13 individual tools: MutPred, FATHMM v2.3, VEST 3.0, PolyPhen-2, SIFT, PROVEAN, MutationAssessor, MutationTaster, LRT, GERP++, SiPhy, phyloP, and phastCons.

Publication

Ioannidis, N. M. et al. REVEL: An Ensemble Method for Predicting the Pathogenicity of Rare Missense Variants. The American Journal of Human Genetics 99, 877-885 (2016). https://doi.org/10.1016/j.ajhg.2016.08.016

CSV File

Example

chr,hg19_pos,grch38_pos,ref,alt,aaref,aaalt,REVEL
1,35142,35142,G,A,T,M,0.027
1,35142,35142,G,C,T,R,0.035
1,35142,35142,G,T,T,K,0.043
1,35143,35143,T,A,T,S,0.018
1,35143,35143,T,C,T,A,0.034

Parsing

From the CSV file, we're mainly interested in the following columns:

  • chr
  • hg19_pos
  • grch38_pos
  • ref
  • alt
  • REVEL

Known Issues

Sorting

Since the input file contains positions for both GRCh37 and GRCh38, we split it into two TSV files (for the sake of better readability) with identical format. The positions for GRCh37 were sorted but not for GRCh38. So we re-sort the variants by position in the GRCh38 file.

Conflicting Scores

When there are multiple scores available for the same variant (i.e. the same position with the same alternative allele), we pick the highest score.

Download URL

https://sites.google.com/site/revelgenomics/downloads

JSON Output

"revel":{ 
"score":0.027
}
FieldTypeNotes
scorefloatRange: 0 - 1.0
- - - - \ No newline at end of file diff --git a/3.17/data-sources/splice-ai-json/index.html b/3.17/data-sources/splice-ai-json/index.html deleted file mode 100644 index 41185054..00000000 --- a/3.17/data-sources/splice-ai-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -splice-ai-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

splice-ai-json

"spliceAI":[ 
{
"hgnc":"BLCAP",
"acceptorGainDistance":-3,
"acceptorGainScore":0.3,
"donorLossDistance":7,
"donorLossScore":0.9
},
{
"hgnc":"NNAT",
"acceptorGainDistance":-1,
"acceptorGainScore":0.2,
"donorGainDistance":-2,
"donorGainScore":0.3
}
]
FieldTypeNotes
hgncstringHGNC gene symbol
acceptorGainDistanceint± bp from current position
acceptorGainScorefloatrange: 0 - 1.0. 1 decimal place
acceptorLossDistanceint± bp from current position
acceptorLossScorefloatrange: 0 - 1.0. 1 decimal place
donorGainDistanceint± bp from current position
donorGainScorefloatrange: 0 - 1.0. 1 decimal place
donorLossDistanceint± bp from current position
donorLossScorefloatrange: 0 - 1.0. 1 decimal place
- - - - \ No newline at end of file diff --git a/3.17/data-sources/splice-ai/index.html b/3.17/data-sources/splice-ai/index.html deleted file mode 100644 index 4b9e9ae2..00000000 --- a/3.17/data-sources/splice-ai/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Splice AI | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

Splice AI

Overview

SpliceAI, a 32-layer deep neural network, predicts splicing from a pre-mRNA sequence.

Publication

K. Jaganathan, et al. Predicting splicing from primary sequence with deep learning. Cell, 176 (3) (2019), pp. 535-548 e24

VCF File

Example

##fileformat=VCFv4.0
##assembly=GRCh37/hg19
##INFO=<ID=SYMBOL,Number=1,Type=String,Description="HGNC gene symbol">
##INFO=<ID=STRAND,Number=1,Type=String,Description="+ or - depending on whether the gene lies in the positive or negative strand">
##INFO=<ID=TYPE,Number=1,Type=String,Description="E or I depending on whether the variant position is exonic or intronic (GENCODE V24lift37 canonical annotation)">
##INFO=<ID=DIST,Number=1,Type=Integer,Description="Distance between the variant position and the closest splice site (GENCODE V24lift37 canonical annotation)">
##INFO=<ID=DS_AG,Number=1,Type=Float,Description="Delta score (acceptor gain)">
##INFO=<ID=DS_AL,Number=1,Type=Float,Description="Delta score (acceptor loss)">
##INFO=<ID=DS_DG,Number=1,Type=Float,Description="Delta score (donor gain)">
##INFO=<ID=DS_DL,Number=1,Type=Float,Description="Delta score (donor loss)">
##INFO=<ID=DP_AG,Number=1,Type=Integer,Description="Delta position (acceptor gain) relative to the variant position">
##INFO=<ID=DP_AL,Number=1,Type=Integer,Description="Delta position (acceptor loss) relative to the variant position">
##INFO=<ID=DP_DG,Number=1,Type=Integer,Description="Delta position (donor gain) relative to the variant position">
##INFO=<ID=DP_DL,Number=1,Type=Integer,Description="Delta position (donor loss) relative to the variant position">
#CHROM POS ID REF ALT QUAL FILTER INFO
10 92946 . C T . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0000;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=-26;DP_AL=-10;DP_DG=3;DP_DL=35
10 92946 . C G . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0008;DS_AL=0.0000;DS_DG=0.0003;DS_DL=0.0000;DP_AG=34;DP_AL=-27;DP_DG=35;DP_DL=1
10 92946 . C A . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0004;DS_AL=0.0000;DS_DG=0.0001;DS_DL=0.0000;DP_AG=-10;DP_AL=-48;DP_DG=35;DP_DL=-21
10 92947 . A C . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0002;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=-49;DP_AL=-11;DP_DG=0;DP_DL=34
10 92947 . A T . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0002;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=33;DP_AL=-11;DP_DG=-22;DP_DL=34
10 92947 . A G . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0006;DS_AL=0.0000;DS_DG=0.0001;DS_DL=0.0000;DP_AG=33;DP_AL=-11;DP_DG=34;DP_DL=32

Parsing

From the VCF file, we're mainly interested in the following columns:

  • DS_AG - Δ score (acceptor gain)
  • DS_AL - Δ score (acceptor loss)
  • DS_DG - Δ score (donor gain)
  • DS_DL - Δ score (donor loss)
  • DP_AG - Δ position (acceptor gain) relative to the variant position
  • DP_AL - Δ position (acceptor loss) relative to the variant position
  • DP_DG - Δ position (donor gain) relative to the variant position
  • DP_DL - Δ position (donor loss) relative to the variant position

The Splice AI team suggests the following interpretation for the scores:

RangeConfidencePathogenicity
0 ≤ x < 0.1lowlikely benign
0.1 ≤ x ≤ 0.5mediumlikely pathogenic
x > 0.5highpathogenic

Pre-processing

Filtering

Splice AI provides a comprehensive list of entries throughout the genome. However, many of the entries have little value. I.e. observing low splice scores in intergenic regions. Not only do these extra entries require more storage, but the unused content has a negative impact on annotation speed.

As a result, Nirvana filters out all the values in the low confidence tier except for regions within 15 bp of nascent splice sites. For those regions, we found it useful to see if Splice AI predicted an interruption of the splicing mechanism.

Download URL

https://basespace.illumina.com/s/5u6ThOblecrh

JSON Output

"spliceAI":[ 
{
"hgnc":"BLCAP",
"acceptorGainDistance":-3,
"acceptorGainScore":0.3,
"donorLossDistance":7,
"donorLossScore":0.9
},
{
"hgnc":"NNAT",
"acceptorGainDistance":-1,
"acceptorGainScore":0.2,
"donorGainDistance":-2,
"donorGainScore":0.3
}
]
FieldTypeNotes
hgncstringHGNC gene symbol
acceptorGainDistanceint± bp from current position
acceptorGainScorefloatrange: 0 - 1.0. 1 decimal place
acceptorLossDistanceint± bp from current position
acceptorLossScorefloatrange: 0 - 1.0. 1 decimal place
donorGainDistanceint± bp from current position
donorGainScorefloatrange: 0 - 1.0. 1 decimal place
donorLossDistanceint± bp from current position
donorLossScorefloatrange: 0 - 1.0. 1 decimal place
- - - - \ No newline at end of file diff --git a/3.17/data-sources/topmed-json/index.html b/3.17/data-sources/topmed-json/index.html deleted file mode 100644 index 74e4e51e..00000000 --- a/3.17/data-sources/topmed-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -topmed-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

topmed-json

"topmed":{ 
"allAc":20,
"allAn":125568,
"allAf":0.000159,
"allHc":0,
"failedFilter":true
}
FieldTypeNotes
allAcintTOPMed allele count
allAnintTOPMed allele number. Non-zero integer.
allAffloatTOPMed allele frequency (computed by Nirvana)
allHcintTOPMed homozygous count
failedFilterboolTrue if this variant failed any filters
- - - - \ No newline at end of file diff --git a/3.17/data-sources/topmed/index.html b/3.17/data-sources/topmed/index.html deleted file mode 100644 index 8f527ea4..00000000 --- a/3.17/data-sources/topmed/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -TOPMed | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

TOPMed

Overview

The Trans-Omics for Precision Medicine (TOPMed) program, sponsored by the National Institutes of Health (NIH) National Heart, Lung and Blood Institute (NHLBI), is part of a broader Precision Medicine Initiative, which aims to provide disease treatments tailored to an individual’s unique genes and environment. TOPMed contributes to this Initiative through the integration of whole-genome sequencing (WGS) and other omics (e.g., metabolic profiles, epigenomics, protein and RNA expression patterns) data with molecular, behavioral, imaging, environmental, and clinical data.

Publication

Kowalski, M.H., Qian, H., Hou, Z., Rosen, J.D., Tapia, A.L., Shan, Y., Jain, D., Argos, M., Arnett, D.K., Avery, C. and Barnes, K.C., 2019. Use of> 100,000 NHLBI Trans-Omics for Precision Medicine (TOPMed) Consortium whole genome sequences improves imputation quality and detection of rare variant associations in admixed African and Hispanic/Latino populations. PLoS genetics, 15(12), p.e1008500.

VCF extraction

We currently extract the following fields from TOPMed VCF file:

##INFO=<ID=AN,Number=1,Type=Integer,Description="Number of Alleles in Samples with Coverage">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Alternate Allele Counts in Samples with Coverage">
##INFO=<ID=AF,Number=A,Type=Float,Description="Alternate Allele Frequencies">
##INFO=<ID=Het,Number=A,Type=Integer,Description="Number of samples with heterozygous genotype calls">
##INFO=<ID=Hom,Number=A,Type=Integer,Description="Number of samples with homozygous alternate genotype calls">

Example:

chr1    10132   TOPMed_freeze_5?chr1:10,132     T       C       255     SVM     VRT=1;NS=62784;AN=125568;AC=32;AF=0.000254842;Het=32;Hom=0      NA:FRQ  125568:0.000254842

GRCh37 liftover

The data is not available for GRCh37 on TOPMed website. We performed a liftover from GRCh38 to GRCh37 using dbSNP ids.

Download URL

https://bravo.sph.umich.edu/freeze5/hg38/download

JSON output

"topmed":{ 
"allAc":20,
"allAn":125568,
"allAf":0.000159,
"allHc":0,
"failedFilter":true
}
FieldTypeNotes
allAcintTOPMed allele count
allAnintTOPMed allele number. Non-zero integer.
allAffloatTOPMed allele frequency (computed by Nirvana)
allHcintTOPMed homozygous count
failedFilterboolTrue if this variant failed any filters
- - - - \ No newline at end of file diff --git a/3.17/file-formats/custom-annotations/index.html b/3.17/file-formats/custom-annotations/index.html deleted file mode 100644 index 925e4671..00000000 --- a/3.17/file-formats/custom-annotations/index.html +++ /dev/null @@ -1,42 +0,0 @@ - - - - - - - -Custom Annotations | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

Custom Annotations

Overview

While the team tries to keep data sources up-to-date, you might want to start incorporate new annotations ahead of our update cycle. Another -common use case involves protected health information (PHI). Custom annotations are a mechanism that enables both use cases.

Here are some examples of how our collaborators use custom annotations:

  • associating context from both a patient-level and a patient cohort level with the variant annotations
  • adding content that is licensed (e.g. HGMD) to the variant annotations

At the moment, we have two different custom annotation file formats. One provides additional annotations to variants (both small variants and SVs) -while the other caters to gene annotations.

In both cases, the custom annotation file format is a tab-delimited file that is separated into two parts: the header & the data.

The header is where you can customize how you want the data to appear in the JSON file and provide context about the genome assembly and how -Nirvana should match the variants.

At Illumina, there are usually many components downstream of Nirvana that have to parse our annotations. If a customer provides a custom -annotation, those downstream tools need to understand more about the data such as:

  • data type (e.g. number, boolean, or a string)
  • data category (e.g. is this an allele count, allele number, allele frequency, etc.)
  • associated population (i.e. if this is an allele frequency)

For each custom annotation, Nirvana uses this context to create a JSON schema that can be sent to downstream tools. If -a tool knows that this is an allele frequency, it can validate user input to ensure that it's in the range of [0, 1].

Variant File Format

Basic Allele Frequency Example

Create the Custom Annotation TSV

Imagine that you want to create a basic allele frequency custom annotation for small variants. If we visualized the tab-delimited file -(TSV), it would look something like this:

Col 1Col 2Col 3Col 4Col 5
#title=MyDataSource
#assembly=GRCh38
#matchVariantsBy=allele
#CHROMPOSREFALTallAf
#categories...AlleleFrequency
#descriptions...ALL
#type...number
chr1623603511TGAT0.000006579
chr1668801894GA0.000006569
chr1911107436GA0.00003291

Here's the full TSV file.

Let's go over the header and discuss the contents:

  • title indicates the name of the JSON key
  • assembly indicates that this data is only valid for GRCh38
  • matchVariantsBy indicates that we should only match the annotations if they are allele-specific
  • categories provides hints to downstream tools on how they might want to treat the data. In this case, we indicate that it's an allele -frequency.
  • descriptions are used in special circumstances to provide more context. Even though column 5 is called allAf, it might not be clear to a -downstream tool that this means a global allele frequency using all sub-populations. In this case, ALL indicates the intended population.
  • type indicates to downstream tools the data type. Since allele frequencies are numbers, we'll write number in this column.
Reference Base Checking

Nirvana validates all the reference bases in a custom annotation. If a variant or genomic region is specified that has the wrong reference base, an error will be produced.

Sorting

The variants within each chromosome must be sorted by genomic position.

Convert to Nirvana Format

First we need to convert the TSV file to Nirvana's native file format and let's put that file in a new directory called CA:

$ mkdir CA
$ dotnet bin/Release/netcoreapp2.1/SAUtils.dll customvar \
-r Data/References/Homo_sapiens.GRCh38.Nirvana.dat -i MyDataSource.tsv -o CA
---------------------------------------------------------------------------
SAUtils (c) 2020 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0
---------------------------------------------------------------------------

Chromosome 16 completed in 00:00:00.1
Chromosome 19 completed in 00:00:00.0

Time: 00:00:00.2

Annotate with Nirvana

Let's annotate the following VCF (notice that it's one of the variants that we have in our custom annotation):

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
16 68801894 . G A . . .

Here's the full VCF file.

Since Nirvana can handle multiple directories with external annotations, all we need to do is specify our new CA directory in addition to -the normal Nirvana command-line.

$ dotnet bin/Release/netcoreapp2.1/Nirvana.dll -c Data/Cache/GRCh38/Both \
-r Data/References/Homo_sapiens.GRCh38.Nirvana.dat \
--sd Data/SupplementaryAnnotation/GRCh38 --sd CA -i TestCA.vcf -o TestCA
---------------------------------------------------------------------------
Nirvana (c) 2020 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0
---------------------------------------------------------------------------

Initialization Time Positions/s
---------------------------------------------------------------------------
Cache 00:00:01.8
SA Position Scan 00:00:00.0 19

Reference Preload Annotation Variants/s
---------------------------------------------------------------------------
chr16 00:00:00.2 00:00:01.3 1

Summary Time Percent
---------------------------------------------------------------------------
Initialization 00:00:01.9 25.5 %
Preload 00:00:00.2 3.3 %
Annotation 00:00:01.3 18.2 %

Time: 00:00:06.3

Investigate the Results

We would expect the following data to show up in our JSON output file:

      "variants": [
{
"vid": "16-68801894-G-A",
"chromosome": "16",
"begin": 68801894,
"end": 68801894,
"refAllele": "G",
"altAllele": "A",
"variantType": "SNV",
"hgvsg": "NC_000016.10:g.68801894G>A",
"phylopScore": 1,
"MyDataSource": {
"refAllele": "G",
"altAllele": "A",
"allAf": 7e-06
},
"clinvar": [

Here's the full JSON file.

Nirvana preserves up to 6 decimal places for allele frequency data.

Categories & Descriptions Example

Create the Custom Annotation TSV

Building on the previous example, we can add other types of annotations like predictions and general notes.

Col 1Col 2Col 3Col 4Col 5Col 6Col 7
#title=MyDataSource
#assembly=GRCh38
#matchVariantsBy=allele
#CHROMPOSREFALTallAfpathogenicitynotes
#categories...AlleleFrequencyPrediction.
#descriptions...ALL..
#type...numberstringstring
chr1623603511TGAT0.000006579P.
chr1668801894GA0.000006569LPSeen in case 123
chr1911107436GA0.00003291..

Here's the full TSV file.

Placeholders

You can use a period to denote an empty value (much in the same way as periods are used in VCF files to signify missing values). While -Nirvana also accepts empty columns in the TSV file, we use them in these examples to promote readability.

Let's go over what's new in this example:

  • Column 6 adds a field called pathogenicity which uses the Prediction category. When using this category, Nirvana will -validate to make -sure that the field contains either the abbreviations (B, LB, VUS, LP, and P) or the long-form equivalents (e.g. benign or pathogenic).
  • Column 7 adds a field called notes and it doesn't have a category or description. We're just going to use it to add some internal -notes.

Annotate with Nirvana

Let's use a new VCF file. It includes all the same positions as our custom annotation file, but only the middle variant also matches the -alternate allele (allele-specific match):

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
16 23603511 . TG T . . .
16 68801894 . G A . . .
19 11107436 . G C . . .

Here's the full VCF file.

Investigate the Results

Because we specified #matchVariantsBy=allele in our custom annotation file, only the middle variant will get an annotation:

      "variants": [
{
"vid": "16-68801894-G-A",
"chromosome": "16",
"begin": 68801894,
"end": 68801894,
"refAllele": "G",
"altAllele": "A",
"variantType": "SNV",
"hgvsg": "NC_000016.10:g.68801894G>A",
"phylopScore": 1,
"MyDataSource": {
"refAllele": "G",
"altAllele": "A",
"allAf": 7e-06,
"pathogenicity": "LP",
"notes": "Seen in case 123"
},
"clinvar": [

Here's the full JSON file.

Using Positional Matches

What would happen if we changed to #matchVariantsBy=position? Two things will happen. First, our positional variants will now match:

      "variants": [
{
"vid": "16-23603511-TG-T",
"chromosome": "16",
"begin": 23603512,
"end": 23603512,
"refAllele": "G",
"altAllele": "-",
"variantType": "deletion",
"hgvsg": "NC_000016.10:g.23603512delG",
"MyDataSource": [
{
"refAllele": "GA",
"altAllele": "-",
"allAf": 7e-06,
"pathogenicity": "P"
}
],
"clinvar": [

In addition, you will now see an extra flag for our allele-specific variant:

      "variants": [
{
"vid": "16-68801894-G-A",
"chromosome": "16",
"begin": 68801894,
"end": 68801894,
"refAllele": "G",
"altAllele": "A",
"variantType": "SNV",
"hgvsg": "NC_000016.10:g.68801894G>A",
"phylopScore": 1,
"MyDataSource": [
{
"refAllele": "G",
"altAllele": "A",
"allAf": 7e-06,
"pathogenicity": "LP",
"notes": "Seen in case 123",
"isAlleleSpecific": true
}
],
"clinvar": [

Genomic Region Example

Create the Custom Annotation TSV

In the previous example, we added a note for the middle variant, but sometimes it's handy to annotate a genomic region. Consider the following example:

Col 1Col 2Col 3Col 4Col 5
#title=MyDataSource
#assembly=GRCh38
#matchVariantsBy=allele
#CHROMPOSREFENDnotes
#categories....
#descriptions....
#type...string
chr1620000000T70000000Lots of false positives in this region

Here's the full TSV file.

Let's go over what's new in this example:

  • Column 5 now has a field called notes. In essence, it looks exactly like column 7 from our previous example.
  • The main difference is that now one of our custom annotation entries is actually a genomic region. Any variant that overlaps with that region will get a custom annotation.

In the previous example we learned about positional matching vs allele-specific matching. For genomic regions, #matchVariantsBy=allele and #matchVariantsBy=position produce -the same result.

Annotate with Nirvana

Let's use the same VCF file as our previous example.

Investigate the Results

    {
"chromosome": "16",
"position": 23603511,
"refAllele": "TG",
"altAlleles": [
"T"
],
"cytogeneticBand": "16p12.2",
"MyDataSource": [
{
"start": 20000000,
"end": 70000000,
"notes": "Lots of false positives in this region",
"reciprocalOverlap": 0,
"annotationOverlap": 0
}
],
"variants": [

Here's the full JSON file.

Reciprocal & Annotation Overlap

For all intervals, Nirvana internally calculates two overlaps: a variant overlap and an annotation overlap. Variant overlap is the percentage of the variant's length that is -overlapped. Annotation overlap is the percentage of the annotation's length that is overlap.

Reciprocal overlap is the minimum of those two overlaps. Given that the annotation is 50 Mbp and the deletion is one 1 bp, both overlaps will be pretty close to 0.

We will also see this annotation for the other variant on chr16:

    {
"chromosome": "16",
"position": 68801894,
"refAllele": "G",
"altAlleles": [
"A"
],
"cytogeneticBand": "16q22.1",
"MyDataSource": [
{
"start": 20000000,
"end": 70000000,
"notes": "Lots of false positives in this region",
"reciprocalOverlap": 0,
"annotationOverlap": 0
}
],
"variants": [
Targeting Structural Variants

Often we use genomic regions to represent other known CNVs and SVs in the genome. In this use case, we usually don't want to match these regions to other small variants. To -force Nirvana to match regions only to other SVs, use the #matchVariantsBy=sv option in the header.

Mixing Small Variants and Genomic Regions

Create the Custom Annotation TSV

Previously we looked at examples that either had small variants or genomic regions. Let's create a file that contains both:

Col 1Col 2Col 3Col 4Col 5Col 6
#title=MyDataSource
#assembly=GRCh38
#matchVariantsBy=allele
#CHROMPOSREFALTENDnotes
#categories.....
#descriptions.....
#type....string
chr1623603511TGAT..
chr1668801894GA..
chr1911107436GA..
chr2110510818C.10699435Interval #1
chr2110510818C<DEL>10699435Interval #2
chr2212370388TT[chr22:12370729[.Known false-positive

Here's the full TSV file.

Let's go over what's new in this example:

  • Column 4 now has the REF field. Exception for the case listed below, this is only used by small variants or translocation breakends.
  • Column 5 now has the END field. This is only used by genomic regions.
  • There are two custom annotations on chr21 and the start and end coordinates look the same, so what's different? Interval #2 has a symbolic allele in the ALT column. When this is used in custom annotation, the start position is treated as the padding base (using VCF conventions). When Nirvana matches a variant to interval #2, it will ignore the padding base and consider the start position to be at position 10510819.

Annotate with Nirvana

Let's use a new VCF file to study how matching works for intervals #1 and #2:

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
21 10510818 . C <DUP> . . END=10699435;SVTYPE=DUP
22 12370388 . T T[chr22:12370729[ . . SVTYPE=BND

Here's the full VCF file.

The first variant is similar to the custom annotation labelled "interval #2". Position 10510818 is the padding base, so it effectively starts at position 10510819.

Investigate the Results

  "positions": [
{
"chromosome": "21",
"position": 10510818,
"svEnd": 10699435,
"refAllele": "C",
"altAlleles": [
"<DUP>"
],
"cytogeneticBand": "21p11.2",
"MyDataSource": [
{
"start": 10510818,
"end": 10699435,
"notes": "Interval #1",
"reciprocalOverlap": 0.99999,
"annotationOverlap": 0.99999
},
{
"start": 10510819,
"end": 10699435,
"notes": "Interval #2",
"reciprocalOverlap": 1,
"annotationOverlap": 1
}
],

Here's the full JSON file.

As expected, the variant and interval #2 have matching endpoints, therefore there is 100% overlap. Interval #1 technically starts 1 bp earlier, so its overlap 99.9%.

Further down the JSON file, we find the annotated translocation breakend:

      "variants": [
{
"vid": "22-12370388-T-T[chr22:12370729[",
"chromosome": "22",
"begin": 12370388,
"end": 12370388,
"isStructuralVariant": true,
"refAllele": "T",
"altAllele": "T[chr22:12370729[",
"variantType": "translocation_breakend",
"MyDataSource": {
"refAllele": "T",
"altAllele": "T[chr22:12370729[",
"notes": "Known false-positive"
}
}

Gene File Format

Basic Gene Example

Create the Custom Annotation TSV

Previously we looked at examples that either had small variants or genomic regions, however, sometimes we would like to add custom gene annotations. The gene custom annotation file format -looks slightly different:

Col 1Col 2Col 3Col 4
#title=MyDataSource
#geneSymbolgeneIdphenotypenotes
#categories...
#descriptions...
#type.stringstring
TP537157Colorectal cancer, hereditary nonpolyposis, type 5.
KRASENSG00000133703Mismatch repair cancer syndromeSeen in cohort 123

Here's the full TSV file.

Let's go over what's in this example:

  • Column 2 has the geneId field. This can be either an Entrez Gene ID or an Ensembl ID.
Gene Symbols

Gene symbols are always in flux and are being updated on a daily basis at the NCBI and at HGNC. Due to this, Nirvana uses the geneId to match genes rather than the gene symbol. However, to -make the custom annotation files easier to read, we've included the geneSymbol column as well.

Unknown Gene IDs

When Nirvana parses the gene custom annotation file, it will note any gene IDs that are currently not recognized in Nirvana. In such a case, Nirvana will display an error showing all the -unrecognized gene IDs.

Annotate with Nirvana

Let's use a VCF file that contain variants in TP53 and KRAS:

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
12 25227255 . A T . . .
17 7675074 . C A . . .

Here's the full VCF file.

Investigate the Results

  "genes": [
{
"name": "KRAS",
"clingenGeneValidity": [
{
"diseaseId": "MONDO_0009026",
"disease": "Costello syndrome",
"classification": "disputed",
"classificationDate": "2018-07-24"
}
],
"clingenDosageSensitivityMap": {
"haploinsufficiency": "no evidence to suggest that dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "no evidence to suggest that dosage sensitivity is associated with clinical phenotype"
},
"gnomAD": {
"pLi": 0.000788,
"pRec": 0.789,
"pNull": 0.21,
"synZ": 0.336,
"misZ": 2.32,
"loeuf": 1.24
},
"MyDataSource": {
"phenotype": "Mismatch repair cancer syndrome",
"notes": "Seen in cohort 123"
}
},

This is the abbreviated output for KRAS. Here's the full JSON file if you want to see the complete KRAS entry.

Customizing the Header

Title

For the title, you can provide any string that hasn't already been used. The title should be unique.

caution

Make sure that the title does not conflict with other keys in the JSON file.

For small variants, you can't provide a title that conflicts with other keys in the variant object. Some examples of this would be -vid, chromosome, transcripts, etc.. The title should also not conflict with other data source keys like clinvar or gnomad.

For structural variants, you can't provide a title that conflicts with other keys in the position object. Some examples of this would be -chromosome, svLength, cytogeneticBand, etc. The title should also not conflict with other data source keys like clingen or dgv.

caution

Care should be taken not to annotate using multiple custom annotations that all use the same title.

Genome Assemblies

The following genome assemblies can be specified:

  • GRCh37
  • GRCh38

Matching Criteria

The matching criteria instructs how Nirvana should match a VCF variant to the custom annotation.

The following matching criteria can be specified:

  • allele - use this when you only want allele-specific matches. This is commonly the case when using allele frequency data sources like gnomAD
  • position - use this when you want positional matches. This is commonly used with disease phenotype data sources like ClinVar
  • sv - use this when you want to match to all other overlapping SVs. This use case arose when we were adding custom annotations for baseline -copy number intervals along the genome.

Categories

Categories are not used by Nirvana, but are often used by downstream tools. Categories provide hints for how those tools should filter or display -the annotation data.

When a category is specified, Nirvana will provide additional validation for those fields. The following table describes each category:

CategoryDescriptionValidation
AlleleCountallele counts for a specific populationSee the supported populations below
AlleleNumberallele numbers for a specific populationSee the supported populations below
AlleleFrequencyallele frequencies for a specific populationSee the supported populations below
PredictionACMG-style pathogenicity classificationsbenign (B)
likely benign (LB)
VUS
likely pathogenic (LP)
pathogenic (P)
Filterfree text that signals downstream tools to add the column to the filterMax 20 characters
Descriptionfree-text descriptionMax 100 characters
Identifierany IDMax 50 characters
HomozygousCountcount of homozygous individuals for a specific populationSee the supported populations below
Scoreany score valueAny double-precision floating point number

Descriptions

Descriptions are used to add more context to the categories. For now, descriptions are mainly used to associate allele counts, numbers, and frequencies with their respective populations.

Populations

The following populations were specified in the HapMap project, 1000 Genomes Project, ExAC, and gnomAD.

Population CodeSuper-population CodeDescription
ACBAFRAfrican Caribbeans in Barbados
AFRAFRAfrican
ALLALLAll populations
AMRAMRAd Mixed American
ASJAshkenazi Jewish
ASWAFRAmericans of African Ancestry in SW USA
BEBSASBengali from Bangladesh
CDXEASChinese Dai in Xishuangbanna, China
CEUEURUtah Residents (CEPH) with Northern and Western European Ancestry
CHBEASHan Chinese in Beijing, China
CHSEASSouthern Han Chinese
CLMAMRColombians from Medellin, Colombia
EASEASEast Asian
ESNAFREsan in Nigeria
EUREUREuropean
FINEURFinnish in Finland
GBREURBritish in England and Scotland
GIHSASGujarati Indian from Houston, Texas
GWDAFRGambian in Western Divisions in the Gambia
IBSEURIberian population in Spain
ITUSASIndian Telugu from the UK
JPTEASJapanese in Tokyo, Japan
KHVEASKinh in Ho Chi Minh City, Vietnam
LWKAFRLuhya in Webuye, Kenya
MAGAFRMandinka in the Gambia
MKKAFRMaasai in Kinyawa, Kenya
MSLAFRMende in Sierra Leone
MXLAMRMexican Ancestry from Los Angeles, USA
NFEEUREuropean (Non-Finnish)
OTHOTHOther
PELAMRPeruvians from Lima, Peru
PJLSASPunjabi from Lahore, Pakistan
PURAMRPuerto Ricans from Puerto Rico
SASSASSouth Asian
STUSASSri Lankan Tamil from the UK
TSIEURToscani in Italia
YRIAFRYoruba in Ibadan, Nigeria

Data Types

Each custom annotation can be one of the following data types:

  • bool - true or false
  • number - any integer or floating-point number
  • string - text
tip

For boolean variables, only keys with a true value will be output to the JSON object.

Using SAUtils

Nirvana includes a tool called SAUtils that converts various data sources into Nirvana's native binary format. The sub-commands customvar and customgene are used to specify a variant file or a gene file respectively.

Convert Variant File

dotnet bin/Release/netcoreapp2.1/SAUtils.dll customvar \
-r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \
-i MyDataSource.tsv \
-o SupplementaryAnnotation
  • the -r argument specifies the compressed reference path
  • the -i argument specifies the input TSV path
  • the -o argument specifies the output directory

Convert Gene File

dotnet bin/Release/netcoreapp2.1/SAUtils.dll customgene \
--uga Nirvana_UGA.tsv \
-i MyDataSource.tsv \
-o SupplementaryAnnotation
  • the --uga argument specifies the Nirvana universal gene archive (UGA) path
  • the -i argument specifies the input TSV path
  • the -o argument specifies the output directory
- - - - \ No newline at end of file diff --git a/3.17/file-formats/nirvana-json-file-format/index.html b/3.17/file-formats/nirvana-json-file-format/index.html deleted file mode 100644 index 819d2f48..00000000 --- a/3.17/file-formats/nirvana-json-file-format/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Nirvana JSON File Format | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

Nirvana JSON File Format

Overview

Conventions

In the Nirvana JSON representation, we try to maximize the amount of useful information that is relayed in the output file. As such, we have several conventions that are useful to know about:

  • With boolean key/value pairs, we only output the keys that have a true value. I.e. there's no reason to display "isStructuralVariant":false a few million times when annotating a small variant VCF.
  • When transferring data from the VCF file to the JSON (e.g. for allele depths (AD)), it is common to use a period (.) as a placeholder for missing data in the VCF file. Nirvana treats periods like empty or null strings and therefore will not output those entries.

JSON Layout

info

In general, each position corresponds to a row in the original VCF file.

For each gene that was referenced in the transcripts found in the positions section, there will be additional gene-level annotation in the gene section.

Parsing

info

We've put together a new section that discusses how to parse our JSON files easily using examples in a Python Jupyter notebook and a R version as well. In addition, we have information about how to quickly dump content from our JSON file using a tabix-like utility called JASIX.

{
"header":{
"annotator":"Nirvana 3.0.0-alpha.5+g6c52e247",
"creationTime":"2017-06-14 15:53:13",
"genomeAssembly":"GRCh37",
"dataSources":[
{
"name":"OMIM",
"version":"unknown",
"description":"An Online Catalog of Human Genes and Genetic Disorders",
"releaseDate":"2017-05-03"
},
{
"name":"VEP",
"version":"84",
"description":"BothRefSeqAndEnsembl",
"releaseDate":"2017-01-16"
},
{
"name":"ClinVar",
"version":"20170503",
"description":"A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence",
"releaseDate":"2017-05-03"
},
{
"name":"phyloP",
"version":"hg19",
"description":"46 way conservation score between humans and 45 other vertebrates",
"releaseDate":"2009-11-10"
}
],
"samples":[
"NA12878",
"NA12891",
"NA12892"
]
},
FieldTypeNotes
annotatorstringthe name of the annotator and the current version
creationTimestringyyyy-MM-dd hh:mm:ss
genomeAssemblystringsee possible values below
schemaVersionintegerincremented whenever the core structure of the JSON file introduces breaking changes
dataVersionstring
dataSourcesobject arraysee Data Source entry below
samplesstring arraythe order of these sample names will be used throughout the JSON file when enumerating samples

Data Source

FieldTypeNotes
namestring
versionstring
descriptionstringoptional description of the data source
releaseDatestringyyyy-MM-dd

Genome Assemblies

  • GRCh37
  • GRCh38
  • hg19
  • SARSCoV2

Positions

"positions":[
{
"chromosome":"chr2",
"position":48010488,
"repeatUnit":"GGCCCC",
"refRepeatCount":3,
"svEnd":48020488,
"refAllele":"G",
"altAlleles":[
"A",
"GT"
],
"quality":461,
"filters":[
"PASS"
],
"ciPos":[
-170,
170
],
"ciEnd":[
-175,
175
],
"svLength":1000,
"strandBias":1.23,
"jointSomaticNormalQuality":29,
"cytogeneticBand":"2p16.3",
FieldTypeVariant TypeNotes
chromosomestringallexactly as displayed in the vcf
positionintegerallexactly as displayed in the vcf (1-based notation). Range: 1 - 250 million
repeatUnitstringSTRprovided by ExpansionHunter
refRepeatCountintegerSTRprovided by ExpansionHunter
svEndintegerSV
refAllelestringallexactly as displayed in the vcf
altAllelestring arrayallexactly as displayed in the vcf
qualityfloatallexactly as displayed in the vcf (Normally an integer, but some variant callers using floating point. Has been observed as high as 500k)
filtersstring arrayallexactly as displayed in the vcf
ciPosinteger arraySV
ciEndinteger arraySV
svLengthintegerSV
strandBiasfloatsmall variantprovided by GATK (from SB)
jointSomaticNormalQualityintegerSVprovided by the Manta variant caller (SOMATICSCORE)
cytogeneticBandstringalle.g. 17p13.1

ClinGen

"clingen":[
{
"chromosome":"17",
"begin":525,
"end":14667519,
"variantType":"copy_number_gain",
"id":"nsv996083",
"clinicalInterpretation":"pathogenic",
"observedGains":1,
"validated":true,
"phenotypes":[
"Intrauterine growth retardation"
],
"phenotypeIds":[
"HP:0001511",
"MedGen:C1853481"
],
"reciprocalOverlap":0.00131
},
{
"chromosome":"17",
"begin":45835,
"end":7600330,
"variantType":"copy_number_loss",
"id":"nsv869419",
"clinicalInterpretation":"pathogenic",
"observedLosses":1,
"validated":true,
"phenotypes":[
"Developmental delay AND/OR other significant developmental or morphological phenotypes"
],
"reciprocalOverlap":0.00254
}
]
FieldTypeNotes
clingenobject array
chromosomestringEnsembl-style chromosome names
begininteger1-based position
endinteger1-based position
variantTypestringAny of the sequence alterations defined here.
idstringIdentifier from the data source. Alternatively a VID
clinicalInterpretationstringsee possible values below
observedGainsintegerRange: 0 - (231 - 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.
observedLossesintegerRange: 0 - (231 - 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.
validatedboolean
phenotypesstring arrayDescription of the phenotype.
phenotypeIdsstring arrayDescription of the phenotype IDs.
reciprocalOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).

clinicalInterpretation

  • benign
  • curated benign
  • curated pathogenic
  • likely benign
  • likely pathogenic
  • path gain
  • path loss
  • pathogenic
  • uncertain
"clingenDosageSensitivityMap": [{
"chromosome": "15",
"begin": 30900686,
"end": 32153204,
"haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",
"reciprocalOverlap": 0.00147,
"annotationOverlap": 0.33994
},
{
"chromosome": "15",
"begin": 31727418,
"end": 32153204,
"haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "dosage sensitivity unlikely",
"reciprocalOverlap": 0.00147,
"annotationOverlap": 1
}]
FieldTypeNotes
clingenDosageSensitivityMapobject array
chromosomestringEnsembl-style chromosome names
begininteger1-based position
endinteger1-based position
haploinsufficiencystringsee possible values below
triplosensitivitystring(same as haploinsufficiency) 
reciprocalOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).
annotationOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).

haploinsufficiency and triplosensitivity

  • no evidence to suggest that dosage sensitivity is associated with clinical phenotype
  • little evidence suggesting dosage sensitivity is associated with clinical phenotype
  • emerging evidence suggesting dosage sensitivity is associated with clinical phenotype
  • sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype
  • gene associated with autosomal recessive phenotype
  • dosage sensitivity unlikely

1000 Genomes (SV)

"oneKg":[
{
"chromosome":"1",
"begin":1595369,
"end":1612441,
"variantType": "copy_number_variation",
"id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",
"allAn": 5008,
"allAc": 2702,
"allAf": 0.539537,
"afrAf": 0.6052,
"amrAf": 0.3675,
"eurAf": 0.5357,
"easAf": 0.5368,
"sasAf": 0.5797,
"reciprocalOverlap": 0.07555
}
],
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring
idstring
allAnfloating pointallele number for all populations. Non-zero integer.
allAcfloating pointallele count for all populations. Integer.
allAffloating pointallele frequency for all populations. Range: 0 - 1.0
afrAffloating pointallele frequency for the African super population. Range: 0 - 1.0
amrAffloating pointallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
eurAffloating pointallele frequency for the European super population. Range: 0 - 1.0
easAfintegerallele frequency for the East Asian super population. Range: 0 - 1.0
sasAfintegerallele frequency for the South Asian super population. Range: 0 - 1.0
reciprocalOverlapfloating pointrange: 0 - 1.

MITOMAP (SV)

"mitomap":[ 
{
"chromosome":"MT",
"begin":3166,
"end":14152,
"variantType":"deletion",
"reciprocalOverlap":0.18068,
"annotationOverlap":0.42405
}
]
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring array
reciprocalOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
annotationOverlapfloatRange: 0 - 1. Specified up to 5 decimal places

Samples

"samples":[
{
"genotype":"0/1",
"variantFrequencies":[
0.333,
0.5
],
"totalDepth":57,
"genotypeQuality":12,
"copyNumber":3,
"repeatUnitCounts":[
10,
20
],
"alleleDepths":[
10,
20,
30
],
"failedFilter":true,
"splitReadCounts":[
10,
20
],
"pairedEndReadCounts":[
10,
20
],
"isDeNovo":true,
"diseaseAffectedStatuses":[
"-"
],
"artifactAdjustedQualityScore":89.3,
"likelihoodRatioQualityScore":78.2,
"heteroplasmyPercentile":[
23.13,
12.65
]
}
]
FieldTypeVCFNotes
genotypestringGT
variantFrequenciesfloat arrayVF, ADrange: 0 - 1.0. One value per alternate allele
totalDepthintegerDPnon-negative integer values
genotypeQualityintegerGQnon-negative integer values. Typically maxes out at 99
copyNumberintegerCNnon-negative integer values
minorHaplotypeCopyNumberintegerMCNnon-negative integer values
repeatUnitCountsinteger arrayREPCNExpansionHunter-specific
alleleDepthsinteger arrayADnon-negative integer values
failedFilterboolFT
splitReadCountsinteger arraySRManta-specific
pairedEndReadCountsinteger arrayPRManta-specific
isDeNovoboolDN
deNovoQualityfloatDQ
diseaseAffectedStatusesstring arrayDSTExpansionHunter-specific
artifactAdjustedQualityScorefloatAQPEPE-specific. Range: 0 - 100.0
likelihoodRatioQualityScorefloatLQPEPE-specific. Range: 0 - 100.0
lossOfHeterozygosityboolCN, MCN
somaticQualityfloatSQ
heteroplasmyPercentilefloatVFrange: 0 - 100. 2 decimal places. One value per alternate allele
binCountintegerBCnon-negative integer values
Empty Samples

If a sample does not contain any entries, we will create a sample object that contains the isEmpty key. This ensures that sample ordering is preserved while indicating that a sample is intentionally empty.

"samples":[
{
"isEmpty":true
}
],

Variants

"variants":[
{
"vid":"2:48010488:A",
"chromosome":"chr2",
"begin":48010488,
"end":48010488,
"isReferenceMinorAllele":true,
"isStructuralVariant":true,
"refAllele":"G",
"altAllele":"A",
"variantType":"SNV",
"isDecomposedVariant":true,
"isRecomposedVariant":true,
"linkedVids":["2:48010488:GTA:ATC"],
"hgvsg":"NC_000002.11:g.48010488G>A",
"phylopScore":0.459
FieldTypeNotes
vidstringsee Variant Identifiers
chromosomestring
beginint1-based non-negative integer values. Range: 1 - 250 million
endint1-based non-negative integer values. Range: 1 - 250 million
isReferenceMinorAllelebooltrue when this is a reference minor allele
isStructuralVariantbooltrue when the variant is a structural variant
inLowComplexityRegionbooltrue when the variant lies in a low complexity region (gnomAD low complexity regions)
refAllelestringparsimonious representation of the reference allele
altAllelestringparsimonious representation of the alternate allele.
variantTypestringuses Sequence Ontology sequence alterations
isDecomposedVariantbooltrue when the decomposed variant has been used to create another recomposed variant
isRecomposedVariantbooltrue when the variant is recomposed from two or more decomposed variants
linkedVidsstring arraylist of VIDs for variants connecting decomposed and recomposed variants
hgvsgstringHGVS g. notation
phylopScorefloatphyloP conservation score. Range: -14.08 to 6.424
Reference Minor Alleles

Nirvana supports annotating reference minor alleles. In such a case, refAllele will be replaced by the global major allele and altAllele will be replaced with the original reference allele.

Flagging Decomposed & Recomposed Variants

When two or more decomposed variants are recomposed into an MNV, the decomposed variants will be marked with "isDecomposedVariant":true.

Similarly, the recomposed variant will be shown as a new VCF position. This recomposed variant will be flagged with "isRecomposedVariant":true.

Transcripts

"transcripts":[
{
"transcript":"ENST00000445503.1",
"source":"Ensembl",
"bioType":"nonsense_mediated_decay",
"codons":"gGg/gAg",
"aminoAcids":"G/E",
"cdnaPos":"268",
"cdsPos":"116",
"exons":"1/9",
"introns":"1/8",
"proteinPos":"39",
"geneId":"ENSG00000116062",
"hgnc":"MSH6",
"consequence":[
"missense_variant",
"NMD_transcript_variant"
],
"hgvsc":"ENST00000445503.1:c.116G>A",
"hgvsp":"ENSP00000405294.1:p.(Gly39Glu)",
"geneFusion":{
"exon":6,
"intron":5,
"fusions":[
{
"hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000437180.1}:c.58+568_1443",
"exon":3,
"intron":2
},
{
"hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000300305.3}:c.58+568_1443",
"exon":2,
"intron":1
}
]
},
"isCanonical":true,
"polyPhenScore":0.95,
"polyPhenPrediction":"probably damaging",
"proteinId":"ENSP00000405294.1",
"siftScore":0.61,
"siftPrediction":"tolerated",
"completeOverlap":true
}
]
FieldTypeNotes
transcriptstringtranscript ID. e.g. ENST00000445503.1
sourcestringRefSeq / Ensembl
bioTypestringdescriptions of the biotypes from Ensembl
codonsstring
aminoAcidsstring
cdnaPosstring
cdsPosstring
exonsstringexons affected by the variant
intronsstringintrons affected by the variant
proteinPosstring
geneIdstringgene ID. e.g. ENSG00000116062
hgncstringgene symbol. e.g. MSH6
consequencestring arraySequence Ontology Consequences
hgvscstringHGVS coding nomenclature
hgvspstringHGVS protein nomenclature
geneFusionobjectsee Gene Fusions entry below
isCanonicalbooltrue when this is a canonical transcript
polyPhenScorefloatrange: 0 - 1.0
polyPhenPredictionstringsee possible values below
proteinIdstringprotein ID. E.g. ENSP00000405294.1
siftScorefloatrange: 0 - 1.0
siftPredictionstringsee possible values below
completeOverlapbooltrue when this transcript is completely overlapped by the variant

PolyPhen

  • probably damaging
  • possibly damaging
  • benign
  • unknown

SIFT

  • tolerated
  • deleterious
  • tolerated - low confidence
  • deleterious - low confidence

Amino Acid Conservation

"aminoAcidConservation": {
"scores": [0.34]
}
FieldTypeNotes
aminoAcidConservationobject
scoresobject array of doublespercent conserved with respect to human amino acid residue. Range: 0.01 - 1.00

Gene Fusions

FieldTypeNotes
exonintactual exon where the breakpoint was located
intronintactual intron where the breakpoint was located
fusionsobject arraysee Fusion entry below

Fusion

FieldTypeNotes
exonintactual exon where the other breakpoint was located
intronintactual intron where the other breakpoint was located
hgvscstringHGVS coding nomenclature describing the two genes and the transcripts that are fused along with

Regulatory Regions

"regulatoryRegions":[
{
"id":"ENSR00001542175",
"type":"promoter",
"consequence":[
"regulatory_region_variant"
]
}
]
FieldTypeNotes
idstring
typestringsee possible values below
consequencestring arraysee possible values below

Regulatory Types

  • CTCF_binding_site
  • enhancer
  • open_chromatin_region
  • promoter
  • promoter_flanking_region
  • TF_binding_site

Regulatory Consequences

  • regulatory_region_variant
  • regulatory_region_ablation
  • regulatory_region_amplification
  • regulatory_region_truncation

ClinVar

"clinvar":[
{
"id":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"significance":[
"benign"
],
"refAllele":"G",
"altAllele":"A",
"lastUpdatedDate":"2020-03-01",
"isAlleleSpecific":true
},
{
"id":"RCV000030258.4",
"variationId":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"alleleOrigins":[
"germline"
],
"refAllele":"G",
"altAllele":"A",
"phenotypes":[
"Lynch syndrome"
],
"medGenIds":[
"C1333990"
],
"omimIds":[
"120435"
],
"significance":[
"benign"
],
"lastUpdatedDate":"2017-05-01",
"isAlleleSpecific":true
}
]
FieldTypeNotes
idstringClinVar ID
variationIdstringClinVar VCV ID
reviewStatusstringsee possible values below
alleleOriginsstring arraysee possible values below
refAllelestring
altAllelestring
phenotypesstring array
medGenIdsstring arrayMedGen IDs
omimIdsstring arrayOMIM IDs
orphanetIdsstring arrayOrphanet IDs
significancestring arraysee possible values below
lastUpdatedDatestringyyyy-MM-dd
pubMedIdsstring arrayPubMed IDs
isAlleleSpecificbooltrue when the current variant alternate allele matches the ClinVar alternate allele

reviewStatus:

  • no assertion provided
  • no assertion criteria provided
  • criteria provided, single submitter
  • practice guideline
  • classified by multiple submitters
  • criteria provided, conflicting interpretations
  • criteria provided, multiple submitters, no conflicts
  • no interpretation for the single variant

alleleOrigins:

  • unknown
  • other
  • germline
  • somatic
  • inherited
  • paternal
  • maternal
  • de-novo
  • biparental
  • uniparental
  • not-tested
  • tested-inconclusive

significance:

  • uncertain significance
  • not provided
  • benign
  • likely benign
  • likely pathogenic
  • pathogenic
  • drug response
  • histocompatibility
  • association
  • risk factor
  • protective
  • affects
  • conflicting data from submitters
  • other
  • no interpretation for the single variant
  • conflicting interpretations of pathogenicity

1000 Genomes

"oneKg":{
"allAf":0.200879,
"afrAf":0.210287,
"amrAf":0.139769,
"easAf":0.275794,
"eurAf":0.181909,
"sasAf":0.173824,
"allAn":5008,
"afrAn":1322,
"amrAn":694,
"easAn":1008,
"eurAn":1006,
"sasAn":978,
"allAc":1006,
"afrAc":278,
"amrAc":97,
"easAc":278,
"eurAc":183,
"sasAc":170
}
FieldTypeNotes
allAffloatallele frequency for all populations. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
allAnintallele number for all populations. Non-zero integer.
afrAffloatallele frequency for the African super population. Range: 0 - 1.0
afrAcintallele count for the African super population. Integer.
afrAnintallele number for the African super population. Non-zero integer.
amrAffloatallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
amrAcintallele count for the Ad Mixed American super population. Integer.
amrAnintallele number for the Ad Mixed American super population. Non-zero integer.
easAffloatallele frequency for the East Asian super population. Range: 0 - 1.0
easAcintallele count for the East Asian super population. Integer.
easAnintallele number for the East Asian super population. Non-zero integer.
eurAffloatallele frequency for the European super population. Range: 0 - 1.0
eurAcintallele count for the European super population. Integer.
eurAnintallele number for the European super population. Non-zero integer.
sasAffloatallele frequency for the South Asian super population. Range: 0 - 1.0
sasAcintallele count for the South Asian super population. Integer.
sasAnintallele number for the South Asian super population. Non-zero integer.

gnomAD

"gnomad":{ 
"coverage":20,
"allAf":0.190317,
"maleAf":0.193,
"femaleAf": 0.1935,
"afrAf":0.222876,
"amrAf":0.121394,
"easAf":0.239802,
"finAf":0.136833,
"nfeAf":0.181282,
"asjAf":0.258278,
"othAf":0.186094,
"allAn":30796,
"maleAn":15096,
"femaleAn":15700
"afrAn":8664,
"amrAn":832,
"easAn":1618,
"finAn":3486,
"nfeAn":14916,
"asjAn":302,
"othAn":978,
"allAc":5861,
"maleAc":2930,
"femaleAc": 2931,
"afrAc":1931,
"amrAc":101,
"easAc":388,
"finAc":477,
"nfeAc":2704,
"asjAc":78,
"othAc":182,
"allHc":561,
"afrHc":208,
"amrHc":6,
"easHc":42,
"finHc":31,
"nfeHc":242,
"asjHc":13,
"othHc":19,
"maleHc":280,
"femaleHc":281,
"controlsAllAf":0.190317,
"controlsAllAn":30796,
"controlsAllAc":5861,
"lowComplexityRegion":true,
"failedFilter":true
}
FieldTypeNotes
coverageintaverage coverage (non-negative integer values)
allAffloatallele frequency for all populations. Range: 0 - 1.0
maleAffloatallele frequency for male population. Range: 0 - 1.0
femaleAffloatallele frequency for female population. Range: 0 - 1.0
controlsAllAffloatallele frequency for the controls subset. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
maleAcintallele count for male population. Integer.
femaleAcintallele count for female population. Integer.
controlsAllAcintallele count for the controls subset. Integer.
allAnintallele number for all populations. Non-zero integer.
maleAnintallele number for male population. Non-zero integer.
femaleAnintallele number for female population. Non-zero integer.
controlsAllAnintallele number for the controls subset. Non-zero integer.
allHcintcount of homozygous individuals for all populations. Non-negative integer.
maleHcintcount of homozygous individuals for male population. Non-negative integer.
femaleHcintcount of homozygous individuals for female population. Non-negative integer.
afrAffloatallele frequency for the African / African American population. Range: 0 - 1.0
afrAcintallele count for the African / African American population. Integer.
afrAnintallele number for the African / African American population. Non-zero integer.
afrHcintcount of homozygous individuals for African / African American population. Non-negative integer.
amrAffloatallele frequency for the Latino population. Range: 0 - 1.0
amrAcintallele count for the Latino population. Integer.
amrAnintallele number for the Latino population. Non-zero integer.
amrHcintcount of homozygous individuals for Latino population. Non-negative integer.
easAffloatallele frequency for the East Asian population. Range: 0 - 1.0
easAcintallele count for the East Asian population. Integer.
easAnintallele number for the East Asian population. Non-zero integer.
easHcintcount of homozygous individuals for East Asian population. Non-negative integer.
finAffloatallele frequency for the Finnish population. Range: 0 - 1.0
finAcintallele count for the Finnish population. Integer.
finAnintallele number for the Finnish population. Non-zero integer.
finHcintcount of homozygous individuals for Finnish population. Non-negative integer
nfeAffloatallele frequency for the Non-Finnish European population. Range: 0 - 1.0
nfeAcintallele count for the Non-Finnish European population. Integer.
nfeAnintallele number for the Non-Finnish European population. Non-zero integer.
nfeHcintcount of homozygous individuals for Non-Finnish European population. Non-negative integer
othAffloatallele frequency for the Other population. Range: 0 - 1.0
othAcintallele count for the Other population. Integer.
othAnintallele number for the Other population. Non-zero integer.
othHcintcount of homozygous individuals for Other population. Non-negative integer
asjAffloatallele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0
asjAcintallele count for the Ashkenazi Jewish population Integer.
asjAnintallele number for the Ashkenazi Jewish population. Non-zero integer.
asjHcintcount of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer
sasAffloatallele frequency for the South Asian population. Range: 0 - 1.0
sasAcintallele count for the South Asian population Integer.
sasAnintallele number for the South Asian population. Non-zero integer.
sasHcintcount of homozygous individuals for the South Asian population. Non-negative integer.
failedFilterboolTrue if this variant failed any filters (Note: we do not list the failed filters)
lowComplexityRegionboolTrue if this variant is located in a low complexity region.

dbSNP

"dbsnp":[
"rs1042821"
]
FieldTypeNotes
dbsnpstring arraydbSNP rsIDs

MITOMAP

"mitomap":[ 
{
"refAllele":"G",
"altAllele":"A",
"diseases":[
"Bipolar disorder",
"Melanoma"
],
"hasHomoplasmy":false,
"hasHeteroplasmy":true,
"status":"Reported",
"clinicalSignificance":"confirmed pathogenic",
"scorePercentile":83.30,
"numGenBankFullLengthSeqs":2,
"pubMedIds":["2316527","6299878","6301949"],
"isAlleleSpecific":true
}
]
FieldTypeNotes
refAllelestring
altAllelestring
diseasesstring arrayassociated diseases
hasHomoplasmyboolean
hasHeteroplasmyboolean
statusstringrecord status
clinicalSignificancestringpredicted pathogenicity
scorePercentilefloatMitoTIP score
numGenBankFullLengthSeqsinteger# of GenBank full-length sequences
pubMedIdsstring array
isAlleleSpecificbooleantrue when the current variant alternate allele matches the MITOMAP alternate allele

Primate AI

"primateAI":[
{
"hgnc":"TP53",
"scorePercentile":0.3,
}
]
FieldTypeNotes
hgncstring
scorePercentilefloatrange: 0 - 1.0

REVEL

"revel":{ 
"score":0.027
}
FieldTypeNotes
scorefloatRange: 0 - 1.0

Splice AI

"spliceAI":[ 
{
"hgnc":"BLCAP",
"acceptorGainDistance":-3,
"acceptorGainScore":0.3,
"donorLossDistance":7,
"donorLossScore":0.9
},
{
"hgnc":"NNAT",
"acceptorGainDistance":-1,
"acceptorGainScore":0.2,
"donorGainDistance":-2,
"donorGainScore":0.3
}
]
FieldTypeNotes
hgncstringHGNC gene symbol
acceptorGainDistanceint± bp from current position
acceptorGainScorefloatrange: 0 - 1.0. 1 decimal place
acceptorLossDistanceint± bp from current position
acceptorLossScorefloatrange: 0 - 1.0. 1 decimal place
donorGainDistanceint± bp from current position
donorGainScorefloatrange: 0 - 1.0. 1 decimal place
donorLossDistanceint± bp from current position
donorLossScorefloatrange: 0 - 1.0. 1 decimal place

TOPMed

"topmed":{ 
"allAc":20,
"allAn":125568,
"allAf":0.000159,
"allHc":0,
"failedFilter":true
}
FieldTypeNotes
allAcintTOPMed allele count
allAnintTOPMed allele number. Non-zero integer.
allAffloatTOPMed allele frequency (computed by Nirvana)
allHcintTOPMed homozygous count
failedFilterboolTrue if this variant failed any filters

Genes

"genes":[
{
"name":"MSH6",
"hgncId":7329,
"summary":"This gene encodes a member of the DNA mismatch repair MutS family. In E. coli, the MutS protein helps in the recognition of mismatched nucleotides prior to their repair. A highly conserved region of approximately 150 aa, called the Walker-A adenine nucleotide binding motif, exists in MutS homologs. The encoded protein heterodimerizes with MSH2 to form a mismatch recognition complex that functions as a bidirectional molecular switch that exchanges ADP and ATP as DNA mismatches are bound and dissociated. Mutations in this gene may be associated with hereditary nonpolyposis colon cancer, colorectal cancer, and endometrial cancer. Transcripts variants encoding different isoforms have been described. [provided by RefSeq, Jul 2013]",
/* this is where gene-level data sources can be found e.g. OMIM */
}
]
FieldTypeNotes
namestringHGNC gene symbol
hgncIdintHGNC ID
summarystringshort description of the gene from OMIM

OMIM

"omim":[ 
{
"mimNumber":600678,
"geneName":"MutS, E. coli, homolog of, 6",
"description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",
"phenotypes":[
{
"mimNumber":614350,
"phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",
"description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal dominant"
]
},
{
"mimNumber":608089,
"phenotype":"Endometrial cancer, familial",
"mapping":"molecular basis of the disorder is known"
},
{
"mimNumber":276300,
"phenotype":"Mismatch repair cancer syndrome",
"description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal recessive"
],
"comments" : [
"contribute to susceptibility to multifactorial disorders or to susceptibility to infection",
"unconfirmed or possibly spurious mapping"
]
}
]
}
]
FieldTypeNotes
mimNumberintOMIM ID for gene
geneNamestringgene name
descriptionstring
phenotypesobject arraysee Phenotype entry below

Phenotype

FieldTypeNotes
mimNumberint
phenotypestring
descriptionstring
mappingstringsee possible values below
inheritancestring arraysee possible values below
commentsstring arraysee possible values below

Mapping

  1. disorder was positioned by mapping of the wild type gene
  2. disease phenotype itself was mapped
  3. molecular basis of the disorder is known
  4. disorder is a chromosome deletion or duplication syndrome

Inheritance

  • autosomal recessive
  • autosomal dominant

Comments

  • contributes to the susceptibility to multifactorial disorders
  • variations that lead to apparently abnormal laboratory test values
  • unconfirmed mapping

gnomAD LoF Gene Metrics

"gnomAD":{ 
"pLi":1.00e0,
"pNull":8.94e-40,
"pRec":1.84e-16,
"synZ":-8.44e-2,
"misZ":5.96e-1,
"loeuf":1.13e0
}
FieldTypeNotes
pLifloatprobability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)
pNullfloatprobability of being completely tolerant of loss of function variation (observed = expected)
pRecfloatprobability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)
synZfloatcorrected synonymous Z score
misZfloatcorrected missense Z score
loeuffloatloss of function observed/expected upper bound fraction (LOEUF)

ClinGen Disease Validity

"clingenGeneValidity":[
{
"diseaseId":"MONDO_0007893",
"disease":"Noonan syndrome with multiple lentigines",
"classification":"no reported evidence",
"classificationDate":"2018-06-07"
},
{
"diseaseId":"MONDO_0015280",
"disease":"cardiofaciocutaneous syndrome",
"classification":"no reported evidence",
"classificationDate":"2018-06-07"
}
]
FieldTypeNotes
clingenGeneValidityobject
diseaseIdstringMonarch Disease Ontology ID (MONDO)
diseasestringdisease label
classificationstringsee below for possible values
classificationDatestringyyyy-MM-dd

classification

  • no reported evidence
  • disputed
  • limited
  • moderate
  • definitive
  • strong
  • refuted
- - - - \ No newline at end of file diff --git a/3.17/index.html b/3.17/index.html deleted file mode 100644 index 6c44a782..00000000 --- a/3.17/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Introduction | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

Nirvana provides clinical-grade annotation of genomic variants (SNVs, MNVs, insertions, deletions, indels, STRs, gene fusions, and SVs (including CNVs). It can be run as a stand-alone package, as an AWS Lambda function, or integrated into larger software tools that require variant annotation.

The input to Nirvana are VCFs and the output is a structured JSON representation of all annotation and sample information (as extracted from the VCF). Nirvana handles multiple alternate alleles and multiple samples with ease.

The software is being developed under a rigorous SDLC and testing process to ensure accuracy of the results and enable embedding in other software with regulatory needs. Nirvana uses a continuous integration pipeline where millions of variant annotations are monitored against baseline values daily.

Fun Fact

Nirvana is a backronym for NImble and Robust VAriant aNnotAtor

What does Nirvana annotate?

We use Sequence Ontology consequences to describe how each variant impacts a given transcript:

In addition, we also use external data sources to provide additional context for each variant:

Licensing

Code

Nirvana source code is provided under the GPLv3 license. Nirvana includes several third party packages provided under other open source licenses, please see Dependencies for additional details.

Data

The data used by Nirvana is publicly available, however some data sources have special restrictions on use by non-academic entities.

Nirvana Team

Active Team

The Nirvana team works on the core functionality, AWS annotation services, in addition to keeping the annotation data sources up-to-date.

Current members of the Nirvana team are listed in alphabetical order below.

Joseph Platzer

Test Lead. Joins Nirvana with a history of building sequencing tools and keeping the customer first.

Michael Strömberg

Nirvana founder and now ever grateful Nirvana cheerleader to those who actually write code for it.

Rajat Shuvro Roy

Lead developer. Loves to speed up things and make services available to all interested users.

Honorary Alumni

Nirvana would never be what it is today without the huge contributions from these folks who have moved on to bigger and greater things.

Haochen Li

Detail-oriented quick thinker that keeps cool even in the most stressful situations. Now working as a Senior Bioinformatics Data Scientist at GRAIL.

Julien Lajugie

Julien is a legend around these parts. When he's not taking down opponents in Taekwondo or melting riffs in his rock band, he's demolishing bugs and making the world a better place.

Shuli Kang

Oncology bioinformatician from USC before joining our team at Illumina. Now working as a Senior Bioinformatics Scientist at Novartis Gene Therapies.

Yu Jiang

Biostatistics genius from Duke University before joining our team at Illumina. Now working as a Research Engineer at Facebook AI Research.
- - - - \ No newline at end of file diff --git a/3.17/introduction/covid19/index.html b/3.17/introduction/covid19/index.html deleted file mode 100644 index 996a23fa..00000000 --- a/3.17/introduction/covid19/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Annotating COVID-19 | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

Annotating COVID-19

The Nirvana development team is mainly focused on providing annotations for the human genome. This focus allows us to maximize our resources towards understanding human health.

However, nothing in our architecture prevents us from supporting other genomes. Earlier this year, we had an opportunity to put that statement to the test - we added support for annotating the SARS-CoV-2 genome, the virus that causes the COVID-19 disease.

In addition to normal transcript annotation, we also supply:

  • allele frequencies
  • protein domains
SARS-CoV-2 Galaxy Project

The allele frequencies used by Nirvana were provided by the SARS-CoV-2 Galaxy Project. This is an international effort that provides ongoing analysis of COVID-19 using Galaxy, BioConda, and public research infrastructures.

Getting Nirvana

If you don't have Nirvana already, please consult our Getting Started page first.

Downloading the COVID-19 data files

Here's a data zip file containing new gene models, reference, and external data sources for SARS-CoV-2:

Just go to the directory that contains your Nirvana Data directory.

cd ~/Nirvana
curl -O https://illumina.github.io/NirvanaDocumentation/files/Covid19Data.zip
unzip Covid19Data.zip

Download a COVID-19 VCF file

Here's a COVID-19 VCF file you can play around with:

curl -O https://illumina.github.io/NirvanaDocumentation/files/Covid19Mutations.vcf.gz

Running Nirvana

Once you have downloaded the data sets, use the following command to annotate your VCF:

dotnet bin/Release/netcoreapp2.1/Nirvana.dll \
-c Data/Cache/SARS-CoV-2/SARS-CoV-2 \
--sd Data/SupplementaryAnnotation/SARS-CoV-2 \
-r Data/References/SARS-CoV-2.ASM985889v3.dat \
-i Covid19Mutations.vcf.gz \
-o Covid19Mutations
  • the -c argument specifies the cache prefix
  • the --sd argument specifies the supplementary annotation directory
  • the -r argument specifies the compressed reference path
  • the -i argument specifies the input VCF path
  • the -o argument specifies the output filename prefix

When running Nirvana, performance metrics are shown as it evaluates each chromosome in the input VCF file:

---------------------------------------------------------------------------
Nirvana (c) 2020 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0
---------------------------------------------------------------------------

Initialization Time Positions/s
---------------------------------------------------------------------------
Cache 00:00:00.0
SA Position Scan 00:00:00.0 1763

Reference Preload Annotation Variants/s
---------------------------------------------------------------------------
NC_045512 00:00:00.0 00:00:00.1 173

Summary Time Percent
---------------------------------------------------------------------------
Initialization 00:00:00.0 2.0 %
Preload 00:00:00.0 0.3 %
Annotation 00:00:00.1 6.0 %

Time: 00:00:01.5

The output will be a JSON file called Covid19Mutations.json.gz. Here's the full JSON file.

Investigating the Results

Here's an example of what a COVID-19 variant looks like in the JSON output:

{
"chromosome":"NC_045512.2",
"position":27323,
"refAllele":"C",
"altAlleles":[
"T"
],
"filters":[
"PASS"
],
"proteinDomains":[
{
"start":27202,
"end":27384,
"proteinId":"YP_009724394.1",
"domainId":"cl13556",
"domainName":"Sars6 super family",
"reciprocalOverlap":0.00546,
"annotationOverlap":0.00546
}
],
"variants":[
{
"vid":"NC_045512.2-27323-C-T",
"chromosome":"NC_045512.2",
"begin":27323,
"end":27323,
"refAllele":"C",
"altAllele":"T",
"variantType":"SNV",
"hgvsg":"NC_045512.2:g.27323C>T",
"alleleFrequency":{
"refAllele":"C",
"altAllele":"T",
"allAc":8,
"allAn":1058,
"allAf":0.007561
},
"transcripts":[
{
"transcript":"YP_009724394.1",
"source":"RefSeq",
"bioType":"protein_coding",
"codons":"tCt/tTt",
"aminoAcids":"S/F",
"cdnaPos":"122",
"cdsPos":"122",
"exons":"1/1",
"proteinPos":"41",
"geneId":"43740572",
"hgnc":"ORF6",
"consequence":[
"missense_variant"
],
"hgvsc":"YP_009724394.1:c.122C>T",
"hgvsp":"YP_009724394.1:p.(Ser41Phe)",
"proteinId":"YP_009724394.1"
},
{
"transcript":"YP_009724395.1",
"source":"RefSeq",
"bioType":"protein_coding",
"geneId":"43740573",
"hgnc":"ORF7a",
"consequence":[
"upstream_gene_variant"
],
"proteinId":"YP_009724395.1"
}
]
}
]
}
- - - - \ No newline at end of file diff --git a/3.17/introduction/dependencies/index.html b/3.17/introduction/dependencies/index.html deleted file mode 100644 index 8e1b44b3..00000000 --- a/3.17/introduction/dependencies/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Dependencies | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

Dependencies

All of the following dependencies have been included in this repository.

NameLicenseUsage
Amazon.LambdaApacheAWS extensions for .NET CLI
AWSSDKApacheAWS Lambda, S3, SNS support
Json.NETMITJASIX utility
libdeflateMITBlockCompression library
MoqBSDMocking framework for unit tests
NDesk.OptionsMIT/X11CommandLine library
xUnitApacheUnit testing framework
zlib-ngzlibBlockCompression library
zstdBSDBlockCompression library
- - - - \ No newline at end of file diff --git a/3.17/introduction/getting-started/index.html b/3.17/introduction/getting-started/index.html deleted file mode 100644 index d58aba74..00000000 --- a/3.17/introduction/getting-started/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Getting Started | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

Getting Started

Nirvana is written in C# using .NET Core (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files.

tip

Nirvana currently uses .NET Core 3.1 or later. Please make sure that you have the most current runtime from the .NET Core downloads page.

Quick Start

If you want to get started right away, we've created a script that downloads Nirvana, compiles it, and starts annotating a test file:

curl -O https://illumina.github.io/NirvanaDocumentation/files/TestNirvana.sh
bash ./TestNirvana.sh

We have verified that this script works on Windows (using Git Bash or WSL), Linux, and Mac OS X.

Getting Nirvana

Compile from Source

The following will grab the latest version of Nirvana from GitHub and compile it using the .NET Core compiler:

git clone https://github.com/Illumina/Nirvana.git
cd Nirvana
dotnet build -c Release

GitHub Release Notes

Alternatively, you can grab the latest binaries from our GitHub Releases page:

mkdir -p Nirvana/Data
cd Nirvana
unzip Nirvana-3.16.1-dotnet-3.1.0.zip

Docker

You can find us on Docker Hub under annotation/nirvana:

caution

We think Docker is fantastic. However, because our data files are usually accessed through a Docker volume, there is a noticeable performance penalty when running Nirvana in Docker.

mkdir -p Nirvana/Data
cd Nirvana
docker pull annotation/nirvana:3.14

For Docker, we have special instructions for running the Downloader:

sudo docker run --rm -it -v Data:/scratch annotation/nirvana:3.14 dotnet \
/opt/nirvana/Downloader.dll --ga GRCh37 -o /scratch

Similarly, we have special instructions for running Nirvana (Here's a toy VCF in case you need it):

sudo docker run --rm -it -v Data:/scratch annotation/nirvana:3.14 dotnet \
/opt/nirvana/Nirvana.dll -c /scratch/Cache/GRCh37/Both \
-r /scratch/References/Homo_sapiens.GRCh37.Nirvana.dat \
--sd /scratch/SupplementaryAnnotation/GRCh37 \
-i /scratch/HiSeq.10000.vcf.gz -o /scratch/HiSeq

Downloading the data files

To download the latest data sources (or update the ones that you already have), use the following command to automate the download from S3:

dotnet bin/Release/netcoreapp3.1/Downloader.dll \
--ga GRCh37 \
-o Data
  • the --ga argument specifies the genome assembly which can be GRCh37, GRCh38, or both.
  • the -o argument specifies the output directory
Glitches in the Matrix

Every once in a while, the download process does not go smoothly. Perhaps the internet connection cut out or you ran out of disk space. The Downloader attempts to detect these situations by checking the file sizes at the very end. If you see that a file was marked truncated, try fixing the root cause and running the downloader again.

tip

From time to time, you can re-run the Downloader to get the latest annotation files. It will only download the files that changed.

Download a test VCF file

Here's a toy VCF file you can play around with:

curl -O https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.vcf.gz

Running Nirvana

Once you have downloaded the data sets, use the following command to annotate your VCF:

dotnet bin/Release/netcoreapp3.1/Nirvana.dll \
-c Data/Cache/GRCh37/Both \
--sd Data/SupplementaryAnnotation/GRCh37 \
-r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \
-i HiSeq.10000.vcf.gz \
-o HiSeq.10000
  • the -c argument specifies the cache prefix
  • the --sd argument specifies the supplementary annotation directory
  • the -r argument specifies the compressed reference path
  • the -i argument specifies the input VCF path
  • the -o argument specifies the output filename prefix

When running Nirvana, performance metrics are shown as it evaluates each chromosome in the input VCF file:

---------------------------------------------------------------------------
Nirvana (c) 2021 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.16.1
---------------------------------------------------------------------------

Initialization Time Positions/s
---------------------------------------------------------------------------
Cache 00:00:01.2
SA Position Scan 00:00:00.1 55,270

Reference Preload Annotation Variants/s
---------------------------------------------------------------------------
chr1 00:00:00.1 00:00:01.5 6,323

Summary Time Percent
---------------------------------------------------------------------------
Initialization 00:00:01.3 23.9 %
Preload 00:00:00.1 2.9 %
Annotation 00:00:01.5 27.2 %

Peak memory usage: 1.434 GB
Time: 00:00:05.2

The output will be a JSON file called HiSeq.10000.json.gz. Here's the full JSON file.

- - - - \ No newline at end of file diff --git a/3.17/introduction/parsing-json/index.html b/3.17/introduction/parsing-json/index.html deleted file mode 100644 index fbf5933b..00000000 --- a/3.17/introduction/parsing-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Parsing Nirvana JSON | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

Parsing Nirvana JSON

Why JSON?

VCF is a fantastic file format that was developed during the methods development activities within the 1000 Genomes Project. Prior to that, variant callers were outputting information into a variety of tab-delimited formats. Sometimes based on existing standards (like GFF), while most were proprietary. The primary intent of VCF files was to provide a human-readable, standardized representation of genetic variants. Similar to SAM/BAM files, VCF files used BCF files as their binary counterpart.

In the very beginning, Nirvana offered VCF output for annotation. While many variant annotators offer an option to output VCF files, one could argue if they are still human-readable. Here's an example from a VCF file produced by VEP v102:

chr3    107840527   .   A   ATTTTTTTTT,AT,ATTTTTTTT 153.51  PASS    AN=6;MQ=244.10;
SOR=1.739;QD=2.24;DP=57;AF=0.500,0.167,0.333;FS=0.000;AC=3,1,2;CSQ=TTTTTTTTT|
intron_variant&non_coding_transcript_variant|MODIFIER|LINC00635|ENSG00000241469|
Transcript|ENST00000608506.6|lncRNA||4/4|ENST00000608506.6:n.622-132_622-124dup|||||||
rs35564779||-1||HGNC|HGNC:27184|||5|||||||||Ensembl|||||||||||||||||||||||||||||||||||
|||||||||0.792|-0.109757,T|intron_variant&non_coding_transcript_variant|MODIFIER|
LINC00635|ENSG00000241469|Transcript|ENST00000608506.6|lncRNA||4/4|
ENST00000608506.6:n.622-124dup|||||||rs35564779||-1||HGNC|HGNC:27184|||5|||||||||
Ensembl||||||||||||||||||||||||||||||||||||||||||||0.932|-0.075622,TTTTTTTT|
intron_variant&non_coding_transcript_variant|MODIFIER|LINC00635|ENSG00000241469|
Transcript|ENST00000608506.6|lncRNA||4/4|ENST00000608506.6:n.622-131_622-124dup|||||||
rs35564779||-1||HGNC|HGNC:27184|||5|||||||||Ensembl|||||||||||||||||||||||||||||||||||
|||||||||0.808|-0.105490,TTTTTTTTT|intron_variant&non_coding_transcript_variant|
MODIFIER|LINC00636|ENSG00000240423|Transcript|ENST00000649048.1|lncRNA||2/3|
ENST00000649048.1:n.179+5223_179+5231dup|||||||rs35564779||1||HGNC|HGNC:27702|||||||||
|||Ensembl||||||||||||||||||||||||||||||||||||||||||||0.792|-0.109757, (etc.)

Originally Nirvana used the same VCF notation as VEP uses above. The problem is that you end up with a large amount of text that is difficult to parse out by eye and requires the use of several delimiters to divide the information into useful segments. When we originally annotated this variant using VEP, this single variant used 488,909 bytes (almost ½ MB). Surprisingly, we found that this broke some downstream tools that had preconceived notions of how long a single line could be in a VCF file.

caution

Whitespace is not allowed in the VCF INFO field. This means that if you wanted to express a gene description from OMIM: "HRAS PROTOONCOGENE, GTPase; HRAS", you would need to replace the spaces with something else like an underline. You would also need to hope that the VCF parser correctly handles embedded commas and semicolons in the description.

What do other annotators use?

Unfortunately, file format standardization has not made it all the way to variant annotation yet. The GA4GH Annotation group had many discussions on the topic several years ago. While a set of JSON schemas were created in that effort, there wasn't enough momentum to make this a new standard.

While there is some overlap in general file formats (JSON vs VCF vs TSV), none of those are compatible with each other. I.e. the VCF representation in VEP and snpEff is different just like the JSON schemas used by VEP, Nirvana, and GA4GH are different.

SourceFormats
VEPJSON, TSV, VCF
snpEffVCF
AnnovarTSV
NirvanaJSON
GA4GHJSON

We are interested in working together with others in the annotation space to develop a common annotation file format. Our belief is that this would accelerate methods development and benchmarking activities within annotation much in the same way the creation of SAM/BAM & VCF/BCF accelerated secondary analysis development.

What do we gain by using JSON?

  • JSON files are better at showing hierarchical and other relational data. For example when we output ClinVar data, we often want to output several overlapping RCV entries (variants coupled with a disease phenotype). In each, we would want to output a list of phenotypes, clinical significance, etc. That is difficult to accomplish in a human-readable way using VCF files (without resorting to growing lexicon of delimiters).
  • JSON files use JavaScript data types, while VCF INFO fields don't directly have data types. Instead, external metadata located in the VCF header is required to indicated the preferred data type.
  • JSON files are more verbose. Often this is seen as a negative, but compression largely compensates for this. Given the following excerpt from the VCF example above HGNC:27184|||5|||||||||Ensembl it's not immediately obvious what the 5 refers to (without checking the VCF header for details). With JSON files, you would always see a key name associated with a value.
  • JSON files can be natively imported into different search and analytics solutions like Elasticsearch and Snowflake.
  • JSON strings do not have any limitations on the use of whitespace.

Parsing JSON

Our JSON files are organized similarly to original VCF variants:

Nirvana JSON files can get very large and sometimes we receive feedback that a bioinformatician tried to read the JSON file into Python or R resulting in a program that ran out of available RAM. This happens because those parsers try to load everything into memory all at once.

To get around those issues, we play some clever tricks with newlines that enables our users to parse our JSON files quickly and efficiently.

Organization

Our JSON file is arranged as follows:

  • the header section is located on the first line
  • each line after that corresponds to a position (same as a row in a VCF file)
    • until you reach the genes section ],"genes":[
  • each line after that corresponds to a gene
    • until you reach the end ]}

Knowing this, you can load each position line as an independent JSON object and extract the information you need.

Jupyter Notebook

To demonstrate this, we have put together a Jupyter notebook demonstrating how to do this in Python and a R version as well.

JASIX

One of the tools that we really like in the VCF ecosystem is tabix. Unfortunately, tabix only works for tab-delimited file formats. As a result, we created a similar tool for Nirvana JSON files called JASIX.

Here's an example of how you might use JASIX:

dotnet bin/Release/netcoreapp2.1/Jasix.dll -i dragen.json.gz -q chr1:942450-942455
  • the -i argument specifies the Nirvana JSON path
  • the -q argument specifies a genomic range (you can use as many of these as you want)

JASIX also includes additional options for showing the Nirvana header or for extracting different sections (like the genes section).

The output from JASIX is compliant JSON object shown in pretty-printed form:

{"positions":[
{
"chromosome": "chr1",
"position": 942451,
"refAllele": "T",
"altAlleles": [
"C"
],
"quality": 484.23,
"filters": [
"PASS"
],
"cytogeneticBand": "1p36.33",
"samples": [
{
"genotype": "1/1",
"variantFrequencies": [
1
],
"totalDepth": 21,
"genotypeQuality": 60,
"alleleDepths": [
0,
21
]
},
{
"genotype": "1/1",
"variantFrequencies": [
1
],
"totalDepth": 32,
"genotypeQuality": 93,
"alleleDepths": [
0,
32
]
},
{
"genotype": "1/1",
"variantFrequencies": [
1
],
"totalDepth": 36,
"genotypeQuality": 105,
"alleleDepths": [
0,
36
]
}
],
"variants": [
{
"vid": "1-942451-T-C",
"chromosome": "chr1",
"begin": 942451,
"end": 942451,
"refAllele": "T",
"altAllele": "C",
"variantType": "SNV",
"hgvsg": "NC_000001.11:g.942451T>C",
"phylopScore": -0.1,
"clinvar": [
{
"id": "VCV000836156.1",
"reviewStatus": "criteria provided, single submitter",
"significance": [
"uncertain significance"
],
"refAllele": "T",
"altAllele": "T",
"lastUpdatedDate": "2020-08-20"
},
{
"id": "RCV001037211.1",
"variationId": 836156,
"reviewStatus": "criteria provided, single submitter",
"alleleOrigins": [
"germline"
],
"refAllele": "T",
"altAllele": "T",
"phenotypes": [
"not provided"
],
"medGenIds": [
"CN517202"
],
"significance": [
"uncertain significance"
],
"lastUpdatedDate": "2020-08-20",
"pubMedIds": [
"28492532"
]
}
],
"dbsnp": [
"rs6672356"
],
"gnomad": {
"coverage": 25,
"allAf": 0.999855,
"allAn": 123742,
"allAc": 123724,
"allHc": 61853,
"afrAf": 0.999416,
"afrAn": 10278,
"afrAc": 10272,
"afrHc": 5133,
"amrAf": 0.99995,
"amrAn": 20008,
"amrAc": 20007,
"amrHc": 10003,
"easAf": 1,
"easAn": 6054,
"easAc": 6054,
"easHc": 3027,
"finAf": 1,
"finAn": 8696,
"finAc": 8696,
"finHc": 4348,
"nfeAf": 0.999899,
"nfeAn": 49590,
"nfeAc": 49585,
"nfeHc": 24790,
"asjAf": 1,
"asjAn": 7208,
"asjAc": 7208,
"asjHc": 3604,
"sasAf": 0.99967,
"sasAn": 18160,
"sasAc": 18154,
"sasHc": 9074,
"othAf": 1,
"othAn": 3748,
"othAc": 3748,
"othHc": 1874,
"maleAf": 0.9999,
"maleAn": 69780,
"maleAc": 69773,
"maleHc": 34883,
"femaleAf": 0.999796,
"femaleAn": 53962,
"femaleAc": 53951,
"femaleHc": 26970,
"controlsAllAf": 0.999815,
"controlsAllAn": 48654,
"controlsAllAc": 48645
},
"oneKg": {
"allAf": 1,
"afrAf": 1,
"amrAf": 1,
"easAf": 1,
"eurAf": 1,
"sasAf": 1,
"allAn": 5008,
"afrAn": 1322,
"amrAn": 694,
"easAn": 1008,
"eurAn": 1006,
"sasAn": 978,
"allAc": 5008,
"afrAc": 1322,
"amrAc": 694,
"easAc": 1008,
"eurAc": 1006,
"sasAc": 978
},
"primateAI": [
{
"hgnc": "SAMD11",
"scorePercentile": 0.87
}
],
"revel": {
"score": 0.145
},
"topmed": {
"allAf": 0.999809,
"allAn": 125568,
"allAc": 125544,
"allHc": 62760
},
"transcripts": [
{
"transcript": "ENST00000420190.6",
"source": "Ensembl",
"bioType": "protein_coding",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"downstream_gene_variant"
],
"proteinId": "ENSP00000411579.2"
},
{
"transcript": "ENST00000342066.7",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "1110",
"cdsPos": "1027",
"exons": "10/14",
"proteinPos": "343",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000342066.7:c.1027T>C",
"hgvsp": "ENSP00000342313.3:p.(Trp343Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000342313.3",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000618181.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "732",
"cdsPos": "652",
"exons": "7/11",
"proteinPos": "218",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000618181.4:c.652T>C",
"hgvsp": "ENSP00000480870.1:p.(Trp218Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000480870.1",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000622503.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "1110",
"cdsPos": "1030",
"exons": "10/14",
"proteinPos": "344",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000622503.4:c.1030T>C",
"hgvsp": "ENSP00000482138.1:p.(Trp344Arg)",
"isCanonical": true,
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000482138.1",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000618323.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "cTg/cCg",
"aminoAcids": "L/P",
"cdnaPos": "712",
"cdsPos": "632",
"exons": "8/12",
"proteinPos": "211",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000618323.4:c.632T>C",
"hgvsp": "ENSP00000480678.1:p.(Leu211Pro)",
"polyPhenScore": 0,
"polyPhenPrediction": "unknown",
"proteinId": "ENSP00000480678.1",
"siftScore": 0.03,
"siftPrediction": "deleterious - low confidence"
},
{
"transcript": "ENST00000616016.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "ccT/ccC",
"aminoAcids": "P",
"cdnaPos": "944",
"cdsPos": "864",
"exons": "9/13",
"proteinPos": "288",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"synonymous_variant"
],
"hgvsc": "ENST00000616016.4:c.864T>C",
"hgvsp": "ENST00000616016.4:c.864T>C(p.(Pro288=))",
"proteinId": "ENSP00000478421.1"
},
{
"transcript": "ENST00000618779.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "921",
"cdsPos": "841",
"exons": "9/13",
"proteinPos": "281",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000618779.4:c.841T>C",
"hgvsp": "ENSP00000484256.1:p.(Trp281Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000484256.1",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000616125.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "783",
"cdsPos": "703",
"exons": "8/12",
"proteinPos": "235",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000616125.4:c.703T>C",
"hgvsp": "ENSP00000484643.1:p.(Trp235Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000484643.1",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000620200.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "cTg/cCg",
"aminoAcids": "L/P",
"cdnaPos": "427",
"cdsPos": "347",
"exons": "5/9",
"proteinPos": "116",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000620200.4:c.347T>C",
"hgvsp": "ENSP00000484820.1:p.(Leu116Pro)",
"polyPhenScore": 0,
"polyPhenPrediction": "unknown",
"proteinId": "ENSP00000484820.1",
"siftScore": 0.16,
"siftPrediction": "tolerated - low confidence"
},
{
"transcript": "ENST00000617307.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "867",
"cdsPos": "787",
"exons": "9/13",
"proteinPos": "263",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000617307.4:c.787T>C",
"hgvsp": "ENSP00000482090.1:p.(Trp263Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000482090.1",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "NM_152486.2",
"source": "RefSeq",
"bioType": "protein_coding",
"codons": "Cgg/Cgg",
"aminoAcids": "R",
"cdnaPos": "1107",
"cdsPos": "1027",
"exons": "10/14",
"proteinPos": "343",
"geneId": "148398",
"hgnc": "SAMD11",
"consequence": [
"synonymous_variant"
],
"hgvsc": "NM_152486.2:c.1027T>C",
"hgvsp": "NM_152486.2:c.1027T>C(p.(Arg343=))",
"isCanonical": true,
"proteinId": "NP_689699.2"
},
{
"transcript": "ENST00000341065.8",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "750",
"cdsPos": "751",
"exons": "8/12",
"proteinPos": "251",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000341065.8:c.750T>C",
"hgvsp": "ENSP00000349216.4:p.(Trp251Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000349216.4",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000455979.1",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "507",
"cdsPos": "508",
"exons": "4/7",
"proteinPos": "170",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000455979.1:c.507T>C",
"hgvsp": "ENSP00000412228.1:p.(Trp170Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000412228.1",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000478729.1",
"source": "Ensembl",
"bioType": "processed_transcript",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"downstream_gene_variant"
]
},
{
"transcript": "ENST00000474461.1",
"source": "Ensembl",
"bioType": "retained_intron",
"cdnaPos": "389",
"exons": "3/4",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"non_coding_transcript_exon_variant"
],
"hgvsc": "ENST00000474461.1:n.389T>C"
},
{
"transcript": "ENST00000466827.1",
"source": "Ensembl",
"bioType": "retained_intron",
"cdnaPos": "191",
"exons": "2/2",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"non_coding_transcript_exon_variant"
],
"hgvsc": "ENST00000466827.1:n.191T>C"
},
{
"transcript": "ENST00000464948.1",
"source": "Ensembl",
"bioType": "retained_intron",
"cdnaPos": "286",
"exons": "1/2",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"non_coding_transcript_exon_variant"
],
"hgvsc": "ENST00000464948.1:n.286T>C"
},
{
"transcript": "NM_015658.3",
"source": "RefSeq",
"bioType": "protein_coding",
"geneId": "26155",
"hgnc": "NOC2L",
"consequence": [
"downstream_gene_variant"
],
"isCanonical": true,
"proteinId": "NP_056473.2"
},
{
"transcript": "ENST00000483767.5",
"source": "Ensembl",
"bioType": "retained_intron",
"geneId": "ENSG00000188976",
"hgnc": "NOC2L",
"consequence": [
"downstream_gene_variant"
]
},
{
"transcript": "ENST00000327044.6",
"source": "Ensembl",
"bioType": "protein_coding",
"geneId": "ENSG00000188976",
"hgnc": "NOC2L",
"consequence": [
"downstream_gene_variant"
],
"isCanonical": true,
"proteinId": "ENSP00000317992.6"
},
{
"transcript": "ENST00000477976.5",
"source": "Ensembl",
"bioType": "retained_intron",
"geneId": "ENSG00000188976",
"hgnc": "NOC2L",
"consequence": [
"downstream_gene_variant"
]
},
{
"transcript": "ENST00000496938.1",
"source": "Ensembl",
"bioType": "processed_transcript",
"geneId": "ENSG00000188976",
"hgnc": "NOC2L",
"consequence": [
"downstream_gene_variant"
]
}
]
}
]
}
]}
- - - - \ No newline at end of file diff --git a/3.17/utilities/jasix/index.html b/3.17/utilities/jasix/index.html deleted file mode 100644 index bcb0215c..00000000 --- a/3.17/utilities/jasix/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Jasix | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.17

Jasix

Overview

The Jasix index is aimed at providing TABIX like indexing capabilities for the Nirvana JSON output.

Creating the Jasix index

The Jasix index (that comes in a .jsi) file is generated on-the-fly with Nirvana output. It can also be generated independently by running the Jasix command line utility on the JSON output file. Please note that the Jasix utility can only consume JSON files that follow the Nirvana JSON output format. The following code blocks demonstrate the help menu and index generating functionalities of Jasix.

Example

dotnet Jasix.dll -h
USAGE: dotnet Jasix.dll -i in.json.gz [options]
Indexes a Nirvana annotated JSON file

OPTIONS:
--header, -t print also the header lines
--only-header, -H print only the header lines
--chromosomes, -l list chromosome names
--index, -c create index
--in, -i <VALUE> input
--out, -o <VALUE> compressed output file name (default:console)
--query, -q <VALUE> query range
--section, -s <VALUE> complete section (positions or genes) to output
--help, -h displays the help menu
--version, -v displays the version
dotnet Jasix.dll --index -i input.json.gz
---------------------------------------------------------------------------
Jasix (c) 2017 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 2.0.0
---------------------------------------------------------------------------

Ref Sequence chrM indexed in 00:00:00.2
Ref Sequence chr1 indexed in 00:00:05.8
Ref Sequence chr2 indexed in 00:00:06.0
.
.
.
Peak memory usage: 28.5 MB
Time: 00:01:14.8

Querying the index

The Jasix query format is chr:start-end. If not provided, it assumes end=start. If only chr is provided, all entries for that chromosome will be provided.

dotnet Jasix.dll -i input.json.gz chrM:5000-7000
{
"positions":[
{
"chromosome":"chrM",
"refAllele":"C",
"position":5581,
"quality":3070.00,
"filters":[
"LowGQXHomSNP"
],
"altAlleles":[
"T"
],
"samples":[
{
"variantFreq":1,
"totalDepth":1625,
"genotypeQuality":1,
"alleleDepths":[
0,
1625
],
"genotype":"1/1"
}
],
"variants":[
{
"altAllele":"T",
"refAllele":"C",
"begin":5581,
"chromosome":"chrM",
"end":5581,
"variantType":"SNV",
"vid":"MT:5581:T"
}
]
},
{
"chromosome":"chrM",
"refAllele":"A",
"position":6267,
"quality":1637.00,
"filters":[
"LowGQXHetSNP"
],
"altAlleles":[
"G"
],
"samples":[
{
"variantFreq":0.6873,
"totalDepth":323,
"genotypeQuality":1,
"alleleDepths":[
101,
222
],
"genotype":"0/1"
}
],
"variants":[
{
"altAllele":"G",
"refAllele":"A",
"begin":6267,
"chromosome":"chrM",
"end":6267,
"variantType":"SNV",
"vid":"MT:6267:G"
}
]
}
]
}

The default output stream is Console. However, if an output filename is provided, Jasix outputs the results to that file in a bgzip compressed format. The output is always a valid JSON entry. If requested (via -t option) the header of the indexed file will be provided. Multiple queries can be submitted in the same command and the output will contain them within the same "positions" block in order of the submitted queries (Warning: if the queries are out of order, or overlapping, the output will be out or order and intersecting).

dotnet Jasix.dll -i input.json.gz  -q chrM:5000-7000 -q chrM:8500-9500 -t
{
"header":{
"annotator":"Illumina Annotation Engine 1.6.2.0",
"creationTime":"2017-08-30 11:42:57",
"genomeAssembly":"GRCh37",
"schemaVersion":6,
"dataVersion":"84.24.39",
"dataSources":[
{
"name":"VEP",
"version":"84",
"description":"Ensembl",
"releaseDate":"2017-01-16"
}
],
"samples":[
"Mother"
]
},
"positions":[
{
"chromosome":"chrM",
"refAllele":"C",
"position":5581,
"quality":3070.00,
"filters":[
"LowGQXHomSNP"
],
"altAlleles":[
"T"
],
"samples":[
{
"variantFreq":1,
"totalDepth":1625,
"genotypeQuality":1,
"alleleDepths":[
0,
1625
],
"genotype":"1/1"
}
],
"variants":[
{
"altAllele":"T",
"refAllele":"C",
"begin":5581,
"chromosome":"chrM",
"end":5581,
"variantType":"SNV",
"vid":"MT:5581:T"
}
]
},
{
"chromosome":"chrM",
"refAllele":"A",
"position":6267,
"quality":1637.00,
"filters":[
"LowGQXHetSNP"
],
"altAlleles":[
"G"
],
"samples":[
{
"variantFreq":0.6873,
"totalDepth":323,
"genotypeQuality":1,
"alleleDepths":[
101,
222
],
"genotype":"0/1"
}
],
"variants":[
{
"altAllele":"G",
"refAllele":"A",
"begin":6267,
"chromosome":"chrM",
"end":6267,
"variantType":"SNV",
"vid":"MT:6267:G"
}
]
},
{
"chromosome":"chrM",
"refAllele":"G",
"position":8702,
"quality":3070.00,
"filters":[
"LowGQXHomSNP"
],
"altAlleles":[
"A"
],
"samples":[
{
"variantFreq":0.9987,
"totalDepth":1534,
"genotypeQuality":1,
"alleleDepths":[
2,
1532
],
"genotype":"1/1"
}
],
"variants":[
{
"altAllele":"A",
"refAllele":"G",
"begin":8702,
"chromosome":"chrM",
"end":8702,
"variantType":"SNV",
"vid":"MT:8702:A"
}
]
},
{
"chromosome":"chrM",
"refAllele":"G",
"position":9378,
"quality":3070.00,
"filters":[
"LowGQXHomSNP"
],
"altAlleles":[
"A"
],
"samples":[
{
"variantFreq":1,
"totalDepth":1018,
"genotypeQuality":1,
"alleleDepths":[
0,
1018
],
"genotype":"1/1"
}
],
"variants":[
{
"altAllele":"A",
"refAllele":"G",
"begin":9378,
"chromosome":"chrM",
"end":9378,
"variantType":"SNV",
"vid":"MT:9378:A"
}
]
}
]
}

Extracting a section

The Nirvana JSON file has three sections: header, positions and genes. Header can be printed using the -H option. If you are interested in only the positions or genes section, you can use the -s or --section option.

dotnet Jasix.dll -i input.json.gz  -s genes
[
{
"name": "ABCB10",
"omim": [
{
"mimNumber": 605454,
"geneName": "ATP-binding cassette, subfamily B, member 10"
}
]
},
{
"name": "ABCD3",
"omim": [
{
"mimNumber": 170995,
"geneName": "ATP-binding cassette, subfamily D, member 3 (peroxisomal membrane protein 1, 70kD)",
"description": "The ABCD3 gene encodes a peroxisomal membrane transporter involved in the transport of branched-chain fatty acids and C27 bile acids into the peroxisome; the latter function is a crucial step in bile acid biosynthesis (summary by Ferdinandusse et al., 2015).",
"phenotypes": [
{
"mimNumber": 616278,
"phenotype": "?Bile acid synthesis defect, congenital, 5",
"mapping": "molecular basis of the disorder is known",
"inheritances": [
"Autosomal recessive"
],
"comments": [
"unconfirmed or possibly spurious mapping"
]
}
]
}
]
}
]
- - - - \ No newline at end of file diff --git a/3.18/core-functionality/canonical-transcripts/index.html b/3.18/core-functionality/canonical-transcripts/index.html deleted file mode 100644 index b0a39815..00000000 --- a/3.18/core-functionality/canonical-transcripts/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Canonical Transcripts | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

Canonical Transcripts

Overview

One of the more polarizing topics within annotation is the notion of canonical transcripts. Because of alternative splicing, we often have several transcripts for each gene. In the human genome, there are an average of 3.4 transcripts per gene (Tung, 2020). As scientists, we seem to have a need for identifying a representative example of a gene - even if there's no biological basis for the motivation.

Golden Helix Blog

A few years ago, the guys over at Golden Helix wrote an excellent post about the pitfalls and issues surrounding the identification of canonical transcripts: What’s in a Name: The Intricacies of Identifying Variants.

In Nirvana, we wanted to identify an algorithm for determining the canonical transcript and apply it consistently to all of our transcript data sources.

Known Algorithms

UCSC

UCSC publishes a list of canonical transcripts in its knownCanonical table which is available via the TableBrowser. Of the RefSeq data sources, it was the only one we could find that provided canonical transcripts:

The canonical transcript is defined as either the longest CDS, if the gene has translated transcripts, or the longest cDNA.

If you were to implement this and compare it with the knownCanonical table, you would see a lot of exceptions to the rule.

Ensembl

The Ensembl glossary states:

The canonical transcript is used in the gene tree analysis in Ensembl and does not necessarily reflect the most biologically relevant transcript of a gene. For human, the canonical transcript for a gene is set according to the following hierarchy:

  1. Longest CCDS translation with no stop codons.
  2. If no (1), choose the longest Ensembl/Havana merged translation with no stop codons.
  3. If no (2), choose the longest translation with no stop codons.
  4. If no translation, choose the longest non-protein-coding transcript.

ACMG

From the ACMG Guidelines for the Interpretation of Sequence Variants:

A reference transcript for each gene should be used and provided in the report when describing coding variants. The transcript should represent either the longest known transcript and/or the most clinically relevant transcript.

ClinVar

From the ClinVar paper:

When there are multiple transcripts for a gene, ClinVar selects one HGVS expression to construct a preferred name. By default, this selection is based on the first reference standard transcript identified by the RefSeqGene/LRG (Locus Reference Genomic) collaboration.

Unified Approach

Our approach is almost identical to the one Golden Helix discussed in their article:

  1. If we're looking at RefSeq, only consider NM & NR transcripts as candidates for canonical transcripts.
  2. Sort the transcripts in the following order:
    1. Locus Reference Genomic (LRG) entries occur before non-LRG entries
    2. Descending CDS length
    3. Descending transcript length
    4. Ascending accession number
  3. Grab the first entry
- - - - \ No newline at end of file diff --git a/3.18/core-functionality/gene-fusions/index.html b/3.18/core-functionality/gene-fusions/index.html deleted file mode 100644 index ee69e777..00000000 --- a/3.18/core-functionality/gene-fusions/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Gene Fusion Detection | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

Gene Fusion Detection

Overview

Gene fusions often result from large genomic rearrangements such as structural variants. While WGS secondary analysis pipelines typically contain alignment and variant calling stages, very few of them contain dedicated gene fusion callers. When they are included, they are usually associated with RNA-Seq pipelines where gene fusions can be readily observed.

Since gene fusions are frequently observed in cancer and since many sequencing experiments do not include paired RNA-Seq data, we have added gene fusion detection and annotation to Nirvana.

The rich diversity in gene fusion architectures and their likely mechanisms can be seen below:

Publication

Kumar-Sinha, C., Kalyana-Sundaram, S. & Chinnaiyan, A.M. Landscape of gene fusions in epithelial cancers: seq and ye shall find. Genome Med 7, 129 (2015)

Approach

Nirvana uses structural variant calls to evaluate if they form either putative intra-chromosomal or inter-chromosomal gene fusions. Let's consider two transcripts, NM_014206.3 (TMEM258) and NM_013402.4 (FADS1). Both of these genes are on the reverse strand in the genome. The vertical bar indicates the breakpoint where these transcripts are fused:

TMEM258 &amp; FADS1 transcripts

The above explains where the transcripts are fused together, but it doesn't explain in which orientation. By using the directionality encoded in the translocation breakend, we can rearrange these two transcripts in four ways:

TMEM258 &amp; FADS1 gene fusions

Only two of the combinations yields a fusion contains both the transcription start site (TSS) and the stop codon. In one case, we can even detect an in-frame gene fusion.

Interpreting translocation breakends

At first glance, translocation breakends are a bit daunting. However, once you understand how they work, they're actually quite simple. For more information, we recommend reading section 5.4 in the VCF 4.2 specification.

REFALTMeaning
st[p[piece extending to the right of p is joined after t
st]p]reverse comp piece extending left of p is joined after t
s]p]tpiece extending to the left of p is joined before t
s[p[treverse comp piece extending right of p is joined before t

Variant Types

Specifically we can identify gene fusions from the following structural variant types:

  • deletions (<DEL>)
  • tandem_duplications (<DUP:TANDEM>)
  • inversions (<INV>)
  • translocation breakpoints (AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[)

Criteria

The following criteria must be met for Nirvana to identify a gene fusion:

  1. After accounting for gene orientation and genomic rearrangements, both transcripts must have the same orientation
  2. Both transcripts must be from the same transcript source (i.e. we won't mix and match between RefSeq and Ensembl transcripts)
  3. Both transcripts must belong to different genes
  4. Both transcripts cannot have a coding region that already overlaps without the variant (i.e. in cases where two genes naturally overlap, we don't want to call a gene fusion)

ETV6/RUNX1 Example

ETV6/RUNX1 is the most common gene fusion in childhood B-cell precursor acute lymphoblastic leukemia (ALL). Patients with this translocation are associated with a good prognosis and excellent response to treatment.

VCF

Here's a simplified representation of the translocation breakends called by the Manta structural variant caller:

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
chr12 12026270 . C [chr21:36420865[C . PASS SVTYPE=BND
chr12 12026305 . A A]chr21:36420571] . PASS SVTYPE=BND
chr21 36420571 . C C]chr12:12026305] . PASS SVTYPE=BND
chr21 36420865 . C [chr12:12026270[C . PASS SVTYPE=BND

When you put these calls together, the resulting genomic rearrangement looks something like this:

JSON Output

The annotation for the first variant in the VCF looks like this:

{
"chromosome": "chr12",
"position": 12026270,
"refAllele": "C",
"altAlleles": [
"[chr21:36420865[C"
],
"filters": [
"PASS"
],
"cytogeneticBand": "12p13.2",
"clingen": [
{
"chromosome": "12",
"begin": 173786,
"end": 34835837,
"variantType": "copy_number_gain",
"id": "nsv995956",
"clinicalInterpretation": "pathogenic",
"phenotypes": [
"Decreased calvarial ossification",
"Delayed gross motor development",
"Feeding difficulties",
"Frontal bossing",
"Morphological abnormality of the central nervous system",
"Patchy alopecia"
],
"phenotypeIds": [
"HP:0002007",
"HP:0002011",
"HP:0002194",
"HP:0002232",
"HP:0005474",
"HP:0011968",
"MedGen:C0232466",
"MedGen:C1862862",
"MedGen:CN001816",
"MedGen:CN001820",
"MedGen:CN001989",
"MedGen:CN004852"
],
"observedGains": 1,
"validated": true
}
],
"variants": [
{
"vid": "12-12026270-C-[chr21:36420865[C",
"chromosome": "chr12",
"begin": 12026270,
"end": 12026270,
"isStructuralVariant": true,
"refAllele": "C",
"altAllele": "[chr21:36420865[C",
"variantType": "translocation_breakend",
"cosmicGeneFusions": [
{
"id": "COSF2245",
"numSamples": 249,
"geneSymbols": [
"ETV6",
"RUNX1"
],
"hgvsr": "ENST00000396373.4(ETV6):r.1_1283::ENST00000300305.3(RUNX1):r.504_6222",
"histologies": [
{
"name": "acute lymphoblastic B cell leukaemia",
"numSamples": 169
},
{
"name": "acute lymphoblastic leukaemia",
"numSamples": 80
}
],
"sites": [
{
"name": "haematopoietic and lymphoid tissue",
"numSamples": 249
}
],
"pubMedIds": [
7761424,
7780150,
8609706,
8751464,
8982044,
9067587,
9207408,
9226156,
9628428,
10463610,
10774753,
11091202,
12621238,
12661004,
12750722,
15104290,
15642392,
24557455,
26925663
]
}
],
"fusionCatcher": [
{
"genes": {
"first": {
"hgnc": "ETV6",
"isOncogene": true
},
"second": {
"hgnc": "RUNX1",
"isOncogene": true
}
},
"somaticSources": [
"DepMap CCLE",
"Cancer Genome Project",
"ChimerKB 4.0",
"ChimerPub 4.0",
"ChimerSeq 4.0",
"Known",
"Mitelman DB",
"OncoKB",
"TICdb"
]
}
],
"transcripts": [
{
"transcript": "ENST00000396373.4",
"source": "Ensembl",
"bioType": "protein_coding",
"introns": "5/7",
"geneId": "ENSG00000139083",
"hgnc": "ETV6",
"consequence": [
"transcript_variant",
"unidirectional_gene_fusion"
],
"geneFusions": [
{
"transcript": "ENST00000437180.1",
"bioType": "protein_coding",
"intron": 2,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000437180.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"
},
{
"transcript": "ENST00000300305.3",
"bioType": "protein_coding",
"intron": 1,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000300305.3(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"
},
{
"transcript": "ENST00000482318.1",
"bioType": "nonsense_mediated_decay",
"intron": 2,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000482318.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"
},
{
"transcript": "ENST00000486278.2",
"bioType": "protein_coding",
"intron": 2,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000486278.2(RUNX1):r.?_-15+274::ENST00000396373.4(ETV6):r.1009+3367_?"
},
{
"transcript": "ENST00000455571.1",
"bioType": "protein_coding",
"intron": 2,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000455571.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"
},
{
"transcript": "ENST00000475045.2",
"bioType": "protein_coding",
"intron": 11,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000475045.2(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"
},
{
"transcript": "ENST00000416754.1",
"bioType": "protein_coding",
"intron": 2,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000416754.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"
}
],
"isCanonical": true,
"proteinId": "ENSP00000379658.3"
},
{
"transcript": "NM_001987.4",
"source": "RefSeq",
"bioType": "protein_coding",
"introns": "5/7",
"geneId": "2120",
"hgnc": "ETV6",
"consequence": [
"transcript_variant",
"unidirectional_gene_fusion"
],
"geneFusions": [
{
"transcript": "NM_001754.4",
"bioType": "protein_coding",
"intron": 2,
"geneId": "861",
"hgnc": "RUNX1",
"hgvsr": "NM_001754.4(RUNX1):r.?_58+274::NM_001987.4(ETV6):r.1009+3367_?"
}
],
"isCanonical": true,
"proteinId": "NP_001978.1"
}
]
}
]
}
FieldTypeNotes
transcriptstringtranscript ID
bioTypestringdescriptions of the biotypes from Ensembl
exonintexon that contained fusion breakpoint
intronintintron that contained fusion breakpoint
geneIdstringgene ID. e.g. ENSG00000116062
hgncstringgene symbol. e.g. MSH6
hgvsrstringHGVS RNA nomenclature

Gene Fusion Data Sources

To provide more context to our gene fusions, we provide the following gene fusion data sources:

Consequences

When a gene fusion is identified, we add the following Sequence Ontology consequence:

              "consequence": [
"transcript_variant",
"unidirectional_gene_fusion"
],

Gene Fusions Section

The geneFusions section is contained within the object of the originating transcript. It will contain all the pairwise gene fusions that obey the criteria outline above. In the case of ENST00000396373.4, there 7 other Ensembl transcripts that would produce a gene fusion. For NM_001987.4, there was only one transcript (NM_001754.4) that produce a gene fusion.

For each originating transcript, we report the following for each partner transcript:

  • transcript ID
  • gene ID
  • HGNC gene symbol
  • transcript bio type (e.g. protein_coding)
  • intron or exon number containing the breakpoint
  • HGVS RNA notation
tip

Before Nirvana 3.15, we provided HGVS coding notation. However, HGVS r. notation is more appropriate for these types fusion splicing events (see HGVS SVD-WG007).

          "geneFusions": [
{
"transcript": "NM_001754.4",
"bioType": "protein_coding",
"intron": 2,
"geneId": "861",
"hgnc": "RUNX1",
"hgvsr": "NM_001754.4(RUNX1):r.?_58+274::NM_001987.4(ETV6):r.1009+3367_?"
}
],

The HGVS RNA notation above indicates that the gene fusion starts with NM_001754.4 (RUNX1) until CDS position 58 and continues with NM_001987.4 (ETV6). 1009+3367 indicates that the fusion occurred 3367 bp within intron 2.

- - - - \ No newline at end of file diff --git a/3.18/core-functionality/mnv-recomposition/index.html b/3.18/core-functionality/mnv-recomposition/index.html deleted file mode 100644 index 5dcda0ac..00000000 --- a/3.18/core-functionality/mnv-recomposition/index.html +++ /dev/null @@ -1,22 +0,0 @@ - - - - - - - -MNV Recomposition | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

MNV Recomposition

Overview

Most annotation tools handle variants independently. The problem with this approach is that nearby variants could affect the same codon leading to a very different annotation. For example, consider the following example (Danecek, 2017):

When handled independently, the two variants (C→T & G→A) would be annotated as missense annotations. However, if we consider them together, the resulting MNV would yield a stop gain.

By default, Nirvana identifies these types of cases where two or more SNVs would affect the same codon. In addition, it's able to perform this operation on VCFs containing large numbers of samples (we've tested this on 2,500+ samples using the 1000 Genomes Project VCF files).

Publication

Petr Danecek, Shane A McCarthy, BCFtools/csq: haplotype-aware variant consequences, Bioinformatics, Volume 33, Issue 13, 1 July 2017, Pages 2037–2039

Supported variant types

At the moment, Nirvana only supports recomposing multiple SNVs into an MNV. The Danecek paper makes a compelling case for supporting frameshifting variants paired with frame-restoring variants. We've also received requests for supporting the recomposition of an SNV with insertions and deletions. While this is something we've looked into, it represents functionality that many of our clinical customers are not yet comfortable with.

Criteria

Nirvana will recompose a set of SNVs if two or more SNVs are located in the same codon for any codon in any of the overlapping transcripts.

The following criteria must also be met for at least one sample:

  1. Genotypes are provided for the VCF variants and all variants are in phase or homozygous variant.
  2. All the available phase set IDs are the same (homozygous variants are available to all phase sets)
  3. The genotype ploidy for all the variants are the same.
  4. No unsupported variant type (i.e. insertion or deletion) overlaps the recomposed variants
  5. The first and last base in at least one of the recomposed alleles must be non-reference.

Examples

During variant recomposition, if two SNVs affect the same codon, it becomes the seed codon. If there are SNVs in the adjacent codons, they will be aggregated into the seed codon.

  • Three SNVs in two adjacent codons. The recomposed alternate allele is ATAG: -

  • Three SNVs in two adjacent codons (larger distance). The recomposed alternate allele is ATATCC: -

  • Nirvana can use multiple reading frames to aggregate the seed codon. In this example, the seed codon is highlighted in green. If we look at reading frame 1, we see that the T→A variant occurs in the ACT codon. The adjacent codon to the left also has a variant C→T. As a result, there can be up to four bases between SNVs when aggregating the flanking codons. The recomposed alternate allele is TTCACATAGCACTCAC: -

  • Nothing will be recomposed if there's no seed codon: -

Multiple Samples

Recomposing variants while handling multiple samples can be complex. The recomposition criteria described above often leads to sample-specific recomposed variants. Here we show the recomposition of three variants with sample-specific criteria marked in bold:

POSREFALTSample 1Sample 2Sample 3
Decomposed Variant 1100AC0|10|11|1
Decomposed Variant 2101CG0/11|10|0
Decomposed Variant 3102TA1|1.0|1
Recomposed Variant 1100ACAG, CG.1|2.
Recomposed Variant 2100ACTCCT, CCA..1|2

In the example above, the heterozygous genotype in sample 1 at position 101 would prevent the MNVs from being recomposed. Similarly, the unknown genotype for sample 2 at position 102 would produce a smaller MNV than the one expressed for sample 3.

Phase Sets

Homozygous variants, same phase set

Recomposed phase set becomes . since homozygous variants belong to all phase sets.

POSREFALTGenotypePhase Set
Decomposed Variant 1100AT1|1567
Decomposed Variant 2101CG1|1567
Recomposed Variant100ACTG1|1.

Mixing phased and unphased variants

POSREFALTGenotypePhase Set
Decomposed Variant 1100AT0|1567
Decomposed Variant 2101CG1/1.
Recomposed Variant100ACAG,TG1|2567

Variants in different phase sets

POSREFALTGenotypePhase Set
Decomposed Variant 1100AT0|1567
Decomposed Variant 2101CG1|1890
Recomposed Variant100ACAG,TG1|2.

Unphased homozygous variants

POSREFALTGenotypePhase Set
Decomposed Variant 1100AT1/1.
Decomposed Variant 2101CG1/1.
Recomposed Variant100ACTG1/1.

Homozygous variants are not commutative

POSREFALTGenotypePhase Set
Decomposed Variant 1100AT0|1567
Decomposed Variant 2101CG1|1567
Decomposed Variant 3102GT0|1890

In this example, the homozygous variant at position 101 cannot bridge the gap between other two variants since there could be a switching error between phase sets 567 & 890. As a result, we have to create two overlapping MNVs:

POSREFALTGenotypePhase Set
Recomposed Variant 1100ACAG, TG1|2567
Recomposed Variant 2101CGGG, GT1|2890

Conflicting Genotypes

JSON Output

Given the following VCF entries:

#CHROM  POS ID  REF ALT QUAL    FILTER  INFO    FORMAT  S1  S2  S3
chr1 12861477 . T C . PASS . GT:PS 0/0:. 0/0:. 0|1:12861477
chr1 12861478 . G A . PASS . GT:PS 0/0:. 0/0:. 0|1:12861477

Each original variant would be annotated as usual. The difference is that both will now have a isDecomposedVariant flag set to true in addition to an entry in the linkedVids field that points to the new MNV:

{
"chromosome":"chr1",
"position":12861477,
"refAllele":"T",
"altAlleles":[
"C"
],
"filters":[
"PASS"
],
"samples":[
{
"genotype":"0/0",
},
{
"genotype":"0/0",
},
{
"genotype":"0|1",
}
],
"variants":[
{
"vid":"1-12861477-T-C",
"chromosome":"chr1",
"begin":12861477,
"end":12861477,
"refAllele":"T",
"altAllele":"C",
"variantType":"SNV",
"isDecomposedVariant":true,
"linkedVids":[
"1-12861477-TG-CA"
],
"hgvsg":"NC_000001.11:g.12861477T>C",
"transcripts":[ ... ]
}
]
},
{
"chromosome":"chr1",
"position":12861478,
"refAllele":"G",
"altAlleles":[
"A"
],
"filters":[
"PASS"
],
"samples":[
{
"genotype":"0/0",
},
{
"genotype":"0/0",
},
{
"genotype":"0|1",
}
],
"variants":[
{
"vid":"1-12861478-G-A",
"chromosome":"chr1",
"begin":12861478,
"end":12861478,
"refAllele":"G",
"altAllele":"A",
"variantType":"SNV",
"isDecomposedVariant":true,
"linkedVids":[
"1-12861477-TG-CA"
],
"hgvsg":"NC_000001.11:g.12861478G>A",
"transcripts":[ ... ]
}
]
}

The recomposed variant gets a separate entry where the isRecomposedVariant flag is set to true and the linkedVids field links to the constituent SNVs:

    {
"chromosome": "chr1",
"position": 12861477,
"refAllele": "TG",
"altAlleles": [
"CA"
],
"filters": [
"PASS"
],
"cytogeneticBand": "1p36.21",
"samples": [
{
"genotype": "0|0"
},
{
"genotype": "0|0"
},
{
"genotype": "0|1"
}
],
"variants": [
{
"vid": "1-12861477-TG-CA",
"chromosome": "chr1",
"begin": 12861477,
"end": 12861478,
"refAllele": "TG",
"altAllele": "CA",
"variantType": "MNV",
"isRecomposedVariant": true,
"linkedVids": [
"1-12861477-T-C",
"1-12861478-G-A"
],
"hgvsg": "NC_000001.11:g.12861477_12861478inv",
"transcripts":[ ... ]
]
}
]
},
Recomposed QUAL, FILTER, and GQ

Although the example above does not demonstrate it, Nirvana tries to set the quality score, filter, and genotype quality (GQ) for the recomposed variant. The QUAL score is calculated to be the minimum QUAL score for all the constituent SNVs. The same method is used for the genotype quality (GQ) scores. For the filters field, PASS will be used if all constituent variants passed their filters, otherwise we set it to FilteredVariantsRecomposed.

- - - - \ No newline at end of file diff --git a/3.18/core-functionality/variant-ids/index.html b/3.18/core-functionality/variant-ids/index.html deleted file mode 100644 index 86e70230..00000000 --- a/3.18/core-functionality/variant-ids/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Variant IDs | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

Variant IDs

Overview

Many downstream tools use a variant identifier to store annotation results. We've standardized on using variant identifiers (VIDs) that originated from the notation used by the Broad Institute.

The Broad VID scheme is not only simple, but it has the advantage that a user could create a bare bones VCF entry from the information captured in the identifier. One of the limitations of the Broad VID scheme is that it does not define how to handle structural variants. Our VID scheme attempts to fill that gap.

Conventions
  • all chromosomes use Ensembl style notation (i.e. 22 instead of chr22)
  • for a reference variant (i.e. no alt allele), replace the period (.) with the reference base
  • padding bases are used, neither the reference nor alternate allele can be empty
  • some large variant callers lazily output N for the reference allele. If this is the case, replace it with the true reference base

Small Variants

VCF Examples

chr1    66507   .   T   A   184.45  PASS    .
chr1 66521 . T TATATA 144.53 PASS .
chr1 66572 . GTA G,GTACTATATATTATA 45.45 PASS .

Format

chromosomepositionreference allelealternate allele

VID Examples

  • 1-66507-T-A
  • 1-66521-T-TATATA
  • 1-66572-GTA-G
  • 1-66572-G-GTACTATATATTA

Translocation Breakends

VCF Example

chr1    2617277 .   A   AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[  .   PASS    SVTYPE=BND

Format

chromosomepositionreference allelealternate allele

VID Example

  • 1-2617277-A-AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[

All Other Structural Variants

VCF Examples

chr1    1000    .   G   <ROH>   .   PASS    END=3001000;SVTYPE=ROH
chr1 1350082 . G <DEL> . PASS END=1351320;SVTYPE=DEL
chr1 1477854 . C <DUP:TANDEM> . PASS END=1477984;SVTYPE=DUP
chr1 1477968 . T <INS> . PASS END=1477968;SVTYPE=INS
chr1 1715898 . N <DUP> . PASS SVTYPE=CNV;END=1750149
chr1 2650426 . N <DEL> . PASS SVTYPE=CNV;END=2653074
chr2 321682 . T <INV> . PASS SVTYPE=INV;END=421681
chr20 2633403 . G <STR2> . PASS END=2633421

Format

chromosomepositionend positionreference allelealternate alleleSVTYPE

VID Examples

  • 1-1000-3001000-G-<ROH>-ROH
  • 1-1350082-1351320-G-<DEL>-DEL
  • 1-1477854-1477984-C-<DUP:TANDEM>-DUP
  • 1-1477968-1477968-T-<INS>-INS
  • 1-1715898-1750149-A-<DUP>-CNV (replace the N with A)
  • 1-2650426-2653074-N-<DEL>-CNV (keep the N)
  • 2-321682-421681-T-<INV>-INV
  • 20-2633403-2633421-G-<STR2>-STR
- - - - \ No newline at end of file diff --git a/3.18/data-sources/1000Genomes-snv-json/index.html b/3.18/data-sources/1000Genomes-snv-json/index.html deleted file mode 100644 index 22b07f00..00000000 --- a/3.18/data-sources/1000Genomes-snv-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -1000Genomes-snv-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

1000Genomes-snv-json

"oneKg":{
"allAf":0.200879,
"afrAf":0.210287,
"amrAf":0.139769,
"easAf":0.275794,
"eurAf":0.181909,
"sasAf":0.173824,
"allAn":5008,
"afrAn":1322,
"amrAn":694,
"easAn":1008,
"eurAn":1006,
"sasAn":978,
"allAc":1006,
"afrAc":278,
"amrAc":97,
"easAc":278,
"eurAc":183,
"sasAc":170
}
FieldTypeNotes
allAffloatallele frequency for all populations. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
allAnintallele number for all populations. Non-zero integer.
afrAffloatallele frequency for the African super population. Range: 0 - 1.0
afrAcintallele count for the African super population. Integer.
afrAnintallele number for the African super population. Non-zero integer.
amrAffloatallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
amrAcintallele count for the Ad Mixed American super population. Integer.
amrAnintallele number for the Ad Mixed American super population. Non-zero integer.
easAffloatallele frequency for the East Asian super population. Range: 0 - 1.0
easAcintallele count for the East Asian super population. Integer.
easAnintallele number for the East Asian super population. Non-zero integer.
eurAffloatallele frequency for the European super population. Range: 0 - 1.0
eurAcintallele count for the European super population. Integer.
eurAnintallele number for the European super population. Non-zero integer.
sasAffloatallele frequency for the South Asian super population. Range: 0 - 1.0
sasAcintallele count for the South Asian super population. Integer.
sasAnintallele number for the South Asian super population. Non-zero integer.
- - - - \ No newline at end of file diff --git a/3.18/data-sources/1000Genomes-sv-json/index.html b/3.18/data-sources/1000Genomes-sv-json/index.html deleted file mode 100644 index a890447e..00000000 --- a/3.18/data-sources/1000Genomes-sv-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -1000Genomes-sv-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

1000Genomes-sv-json

"oneKg":[
{
"chromosome":"1",
"begin":1595369,
"end":1612441,
"variantType": "copy_number_variation",
"id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",
"allAn": 5008,
"allAc": 2702,
"allAf": 0.539537,
"afrAf": 0.6052,
"amrAf": 0.3675,
"eurAf": 0.5357,
"easAf": 0.5368,
"sasAf": 0.5797,
"reciprocalOverlap": 0.07555
}
],
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring
idstring
allAnintegerallele number for all populations. Non-zero integer.
allAcintegerallele count for all populations. Integer.
allAffloating pointallele frequency for all populations. Range: 0 - 1.0
afrAffloating pointallele frequency for the African super population. Range: 0 - 1.0
amrAffloating pointallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
eurAffloating pointallele frequency for the European super population. Range: 0 - 1.0
easAffloating pointallele frequency for the East Asian super population. Range: 0 - 1.0
sasAffloating pointallele frequency for the South Asian super population. Range: 0 - 1.0
reciprocalOverlapfloating pointrange: 0 - 1.
- - - - \ No newline at end of file diff --git a/3.18/data-sources/1000Genomes/index.html b/3.18/data-sources/1000Genomes/index.html deleted file mode 100644 index 96b5812e..00000000 --- a/3.18/data-sources/1000Genomes/index.html +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - - -1000 Genomes | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

1000 Genomes

Overview

The goal of the 1000 Genomes Project was to find most genetic variants with frequencies of at least 1% in the populations studied. It was the first project to sequence the genomes of a large number of people, to provide a comprehensive resource on human genetic variation. Data from the 1000 Genomes Project was quickly made available to the worldwide scientific community through freely accessible public databases.

Publication

Sudmant, P., Rausch, T., Gardner, E. et al. An integrated map of structural variation in 2,504 human genomes. Nature 526, 75–81 (2015). https://doi.org/10.1038/nature15394

Populations

Small Variants

VCF File Parsing

The original VCF files come with allele frequency fields (e.g. ALL_AF, AMR_AF) but we recompute them using allele counts and allele numbers in order to get 6 digit precision. The allele counts and allele numbers (e.g. AMR_AC, AMR_AN) are not expressed in the INFO field. Instead the genotypes need to be parsed to compute that information. Our team converted the original data to VCF entries with allele counts and allele numbers like the following.

#CHROM  POS ID  REF ALT QUAL    FILTER  INFO
1 15274 rs62636497 A G,T 100 PASS AC=1739,3210;AF=0.347244,0.640974;AN=5008;NS=2504;DP=23255;EAS_AF=0.4812,0.5188;AMR_AF=0.2752,0.7205;AFR_AF=0.323,0.6369;EUR_AF=0.2922,0.7078;SAS_AF=0.3497,0.6472;AA=g|||;VT=SNP;MULTI_ALLELIC;EAS_AN=1008;EAS_AC=485,523;EUR_AN=1006;EUR_AC=294,712;AFR_AN=1322;AFR_AC=427,842;AMR_AN=694;AMR_AC=191,500;SAS_AN=978;SAS_AC=342,633

The ancestral allele, if it exists, is the first value in the pipe separated AA fields (the Indel specific REF, ALT, IndelType fields are ignored).

We parse the VCF file and extract the following fields from INFO:

  • AA
  • AC
  • AN
  • EAS_AN
  • AMR_AN
  • AFR_AN
  • EUR_AN
  • SAS_AN
  • EAS_AC
  • AMR_AC
  • AFR_AC
  • EUR_AC
  • SAS_AC

Conflict Resolution

We have observed conflicting allele frequency information in the source. Take the following example:

#CHROM  POS ID  REF ALT QUAL    FILTER  INFO
1 20505705 rs35377696 C CTCTG,CTG,CTGTG 100 PASS AC=46,1513,152;AF=0.0091853,0.302117,0.0303514;
1 20505705 rs35377696 C CTG 100 PASS AC=4;AF=0.000798722;

That is, the variant 1-20505705-C-CTG has conflicting entries. To get an idea of how frequently we observe this, here is a table summarizing ChrX and all chromosomes. Note that almost all such entries are found in ChrX.

Chromosome# of alleles# of conflicting allelespercentage
chrX83480027330.33%
Total2141309827430.013%

Currently, we removed the allele frequency of the conflicting allele (i.e., insertion TG in the example) but keep allele frequencies of all other alleles in the VCF line.

Potential Alternate Solutions

  • Remove all alleles that are contained in the vcf lines which have conflicting allele. (Recommended by 1000 genome group Holly Zheng-Bradley, 7/29/2015)
  • Recalculate the allele frequency for the conflicting allele.
  • Pick the allele frequency that has the highest data support.

Download URL

GRCh37 -GRCh38

JSON Output

"oneKg":{
"allAf":0.200879,
"afrAf":0.210287,
"amrAf":0.139769,
"easAf":0.275794,
"eurAf":0.181909,
"sasAf":0.173824,
"allAn":5008,
"afrAn":1322,
"amrAn":694,
"easAn":1008,
"eurAn":1006,
"sasAn":978,
"allAc":1006,
"afrAc":278,
"amrAc":97,
"easAc":278,
"eurAc":183,
"sasAc":170
}
FieldTypeNotes
allAffloatallele frequency for all populations. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
allAnintallele number for all populations. Non-zero integer.
afrAffloatallele frequency for the African super population. Range: 0 - 1.0
afrAcintallele count for the African super population. Integer.
afrAnintallele number for the African super population. Non-zero integer.
amrAffloatallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
amrAcintallele count for the Ad Mixed American super population. Integer.
amrAnintallele number for the Ad Mixed American super population. Non-zero integer.
easAffloatallele frequency for the East Asian super population. Range: 0 - 1.0
easAcintallele count for the East Asian super population. Integer.
easAnintallele number for the East Asian super population. Non-zero integer.
eurAffloatallele frequency for the European super population. Range: 0 - 1.0
eurAcintallele count for the European super population. Integer.
eurAnintallele number for the European super population. Non-zero integer.
sasAffloatallele frequency for the South Asian super population. Range: 0 - 1.0
sasAcintallele count for the South Asian super population. Integer.
sasAnintallele number for the South Asian super population. Non-zero integer.

Structural Variants

VCF File Parsing

The VCF files contain entries like the following:

#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103
22 16050654 esv3647175;esv3647176;esv3647177;esv3647178 A <CN0>,<CN2>,<CN3>,<CN4> 100 PASS AC=9,87,599,20;AF=0.00179712,0.0173722,0.119609,0.00399361;AN=5008;CS=DUP_gs;END=16063474;NS=2504;SVTYPE=CNV;DP=22545;EAS_AF=0.001,0.0169,0.2361,0.0099;AMR_AF=0,0.0101,0.219,0.0072;AFR_AF=0.0061,0.0363,0.0053,0;EUR_AF=0,0.007,0.0944,0.003;SAS_AF=0,0.0082,0.1094,0.002;VT=SV GT 3|0 0|0 0|0 0|0 0|0 0|0 0|4

Please note that, CNVs are allele-specific. For example, HG00096 is effectively copy number 4, which would be a net gain on chr22.

1000 Genomes contains 5 types of structural variants:

  • CNV
  • DEL
  • DUP
  • INS
  • INV

Since data of 1000 genomes is provided in VCF format, we assume that the coordinates follow the vcf format, i.e., there is a padding base for symbolic alleles. So all the interval can be interpreted as [BEGIN+1, END]. -Similarly, for all other variant types except insertion, END is far larger than BEGIN. The distribution of BEGIN and END for insertions is summarized below.

Insertion issues

  • END = BEGIN for 6/165
  • END = BEGIN+2 for 93/165
  • END = BEGIN+3 for 11/165
  • END = BEGIN+4 for 11/165
  • END – BEGIN range from 5 to 1156 for others.

Converting VCF svTypes to SO sequence alterations

The svType will be captured in our JSON file under the sequenceAlteration key. Here's the translation we'll use according to svType in 1000 Genomes.

svTypeAlternative Alleles contain <CN*>sequenceAlteration
ALUFALSEmobile_element_insertion
DUPTRUEcopy_number_gain
CNVTRUEcopy_number_gain (observed_gains >0 and observed_losses =0)
copy_number_loss (observed_gains = 0 and observed_losses > 0)
copy_number_variation (otherwise)
DELTRUEcopy_number_loss
LINE1FALSEmobile_element_insertion
SVAFALSEmobile_element_insertion
INVFALSEinversion
INSFALSEinsertion

Exceptions

We discard structural variants without END

#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103
21 9495848 esv3646347 A <INS:ME:LINE1> 100 PASS AC=1543;AF=0.308107;AN=5008;CS=L1_umary;MEINFO=LINE1,5669,6005,+;NS=2504;SVLEN=336;SVTYPE=LINE1;TSD=null;DP=20015;EAS_AF=0.3125;AMR_AF=0.2911;AFR_AF=0.3026;EUR_AF=0.2922;SAS_AF=0.3395;VT=SV GT 0|0 1|1 1|0 0|1 1|0 1|0 0|0

CNVs in chrY

  • No other types of structural variants exist in chrY
  • Since copy number is provided in genotype field, we directly parse the copy number from "CN" field.
  • For most CNVs in chrY, the reference copy number is 1, but the refence number for CNVs in segmental duplication sites is 2 (<CN2> in the 2nd example). All segmental duplication calls have identifiers starting with GS_SD_M2.
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  HG00096 HG00101 HG00103 HG00105 HG00107 HG00108
Y 2888555 CNV_Y_2888555_3014661 T <CN2> 100 PASS AC=1;AF=0.000817661;AN=1223;END=3014661;NS=1233;SVTYPE=CNV;AMR_AF=0.0000;AFR_AF=0.0000;EUR_AF=0.0000;SAS_AF=0.0019;EAS_AF=0.0000;VT=SV GT:CN:CNL:CNP:CNQ:GP:GQ:PL 0:1:-1000,0,-58.45:-1000,0,-61.55:99:0,-61.55:99:0,585 0:1:-296.36,0,-16.6:-300.46,0,-19.7:99:0,-19.7:99:0,166 0:1:-1000,0,-39.44:-1000,0,-42.54:99:0,-42.54:99:0,394
Y 6128381 GS_SD_M2_Y_6128381_6230094_Y_9650284_9752225 C <CN1>,<CN3> 100 PASS AC=4,2;AF=0.00327065,0.00163532;AN=1223;END=6230094;NS=1233;SVTYPE=CNV;AMR_AF=0.0029,0.0029;AFR_AF=0.0016,0.0016;EUR_AF=0.0000,0.0000;SAS_AF=0.0038,0.0000;EAS_AF=0.0000,0.0000;VT=SV;EX_TARGET GT:CN:CNL:CNP:CNQ:GP:GQ 0:2:-1000,-138.78,0,-38.53:-1000,-141.27,0,-41.33:99:0,-141.27,-41.33:99 0:2:-1000,-53.32,0,-17.85:-1000,-55.81,0,-20.64:99:0,-55.81,-20.64:99 0:2:-1000,-71.83,0,-32.5:-1000,-74.32,0,-35.29:99:0,-74.32,-35.29:99 0:2:-1000,-60.96,0,-20.29:-1000,-63.45,0,-23.08:99:0,-63.45,-23.08:99 0:2:-1000,-77.6,0,-31.45:-1000,-80.09,0,-34.24:99:0,-80.09,-34.24:99

JSON Output

"oneKg":[
{
"chromosome":"1",
"begin":1595369,
"end":1612441,
"variantType": "copy_number_variation",
"id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",
"allAn": 5008,
"allAc": 2702,
"allAf": 0.539537,
"afrAf": 0.6052,
"amrAf": 0.3675,
"eurAf": 0.5357,
"easAf": 0.5368,
"sasAf": 0.5797,
"reciprocalOverlap": 0.07555
}
],
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring
idstring
allAnintegerallele number for all populations. Non-zero integer.
allAcintegerallele count for all populations. Integer.
allAffloating pointallele frequency for all populations. Range: 0 - 1.0
afrAffloating pointallele frequency for the African super population. Range: 0 - 1.0
amrAffloating pointallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
eurAffloating pointallele frequency for the European super population. Range: 0 - 1.0
easAffloating pointallele frequency for the East Asian super population. Range: 0 - 1.0
sasAffloating pointallele frequency for the South Asian super population. Range: 0 - 1.0
reciprocalOverlapfloating pointrange: 0 - 1.
- - - - \ No newline at end of file diff --git a/3.18/data-sources/amino-acid-conservation-json/index.html b/3.18/data-sources/amino-acid-conservation-json/index.html deleted file mode 100644 index ca8b8e04..00000000 --- a/3.18/data-sources/amino-acid-conservation-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -amino-acid-conservation-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

amino-acid-conservation-json

"aminoAcidConservation": {
"scores": [0.34]
}
FieldTypeNotes
aminoAcidConservationobject
scoresobject array of doublespercent conserved with respect to human amino acid residue. Range: 0.01 - 1.00
- - - - \ No newline at end of file diff --git a/3.18/data-sources/amino-acid-conservation/index.html b/3.18/data-sources/amino-acid-conservation/index.html deleted file mode 100644 index cfbb1602..00000000 --- a/3.18/data-sources/amino-acid-conservation/index.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - -Amino Acid Conservation | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

Amino Acid Conservation

Overview

Amino acid conservation scores are obtained from multiple alignments of vertebrate exomes to the human ones. The score indicate the frequency with which a particular AA is observed in Humans.

Publication

Siepel A, Bejerano G, Pedersen JS, Hinrichs AS, Hou M, Rosenbloom K, Clawson H, Spieth J, Hillier LW, Richards S, et al. Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. Genome Res. 2005 Aug;15(8):1034-50. (http://www.genome.org/cgi/doi/10.1101/gr.3715005)

FASTA File

The exon alignments are provided in FASTA files as follows:

>ENST00000641515.2_hg38_1_2 3 0 0 chr1:65565-65573+
MKK
>ENST00000641515.2_panTro4_1_2 3 0 0 chrUn_GL393541:146907-146915+
MKK
>ENST00000641515.2_gorGor3_1_2 3 0 0
---
>ENST00000641515.2_ponAbe2_1_2 3 0 0 chr15:99141417-99141425-
MKK
>ENST00000641515.2_hg38_2_2 324 0 0 chr1:69037-70008+
VTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLLHFFGGSEMVILIAMGFDRYIAICKPLHYTTIMCGNACVGIMAVTWGIGFLHSVSQLAFAVHLLFCGPNEVDSFYCDLPRVIKLACTDTYRLDIMVIANSGVLTVCSFVLLIISYTIILMTIQHRPLDKSSKALSTLTAHITVVLLFFGPCVFIYAWPFPIKSLDKFLAVFYSVITPLLNPIIYTLRNKDMKTAIRQLRKWDAHSSVKFZ
>ENST00000641515.2_panTro4_2_2 324 0 0 chrUn_GL393541:151333-152303+

Parsing FASTA

For each Ensembl transcript, we will need to aggregate all the exons together for each of the 100 species. From there, we should get a full alignment that can be used to determine conservation. For example, for ENST00000641515.2 we have:

Human (hg38) MKKVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLL
Chimp MKKVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFL-MLFFVFYGGIVFGNLLIVRIVVSDSHLHSPMYFLLANLSLIDLSLCSVTAPKMITDFFSQRKVISFKGCLVQIFLL
Gorilla ----------------------------------------------------------------------------------------------------------------------
Orangutan MKKVTAEAISWNESTSKTNNSVVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVIIVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLL
Gibbon ----------------------------------------------------------------------------------------------------------------------
Rhesus MKKVTEAAISWNESTSETNNSIVTEFIFLGLSDSQELQIFLFVLFLVFYGGIVFGNLLIVITVVSDSHLHSPMYLLLANLSVVDLSLSSVTAPKMITDFFSQRKAISFKGCLVQIFLL
Macaque MKKVTEAAISWNESTSETNNSIVTEFIFLGLSDSQELQIFLFVLFLVFYGGIVFGNLLIVITVVSDSHLHSPMYLLLANLSVIDLSLSSVTAPKMITDFFSQRKAISFKGCLVQIFLL

If we look at position 6, we see that humans have an Alanine (A) residue. This residue is shared by Chimp and Orangutan. However, Rhesus and Macaque have a Glutamic acid (E) residue at that position. Moreover, Gorilla and Gibbon don't even have data for that transcript. -For position 6, we would say that we have 43% conservation (3/7) since three organisms share the same residue as humans.

Assigning scores to Nirvana transcripts

The source FASTA file comes with Ensembl/UCSC transcript ids of the transcripts used for alignments. The Nirvana cache has RefSeq and Ensembl transcripts and our first attempt was to map the given Ensembl/UCSC ids to their equivalent RefSeq/Ensembl ids. This attempt was unsuccessful since UCSC Table Browser provided mapping without version numbers. So we proceeded as follows:

  • Take proteins which have a unique mapping (and hence one set of conservation scores). For ones that mapped to both ChrX and ChrY, we accepted the one from ChrX.
  • A Nirvana transcript having an exact peptide sequence match with a uniquely aligned protein is assigned the corresponding conservation scores.

Unfortunately this left us with a very small number of transcripts having conservation scores.

GRCh37

  • Source FASTA contained 41957 protein alignments.
  • 38165 proteins had unique scores.
  • 88 aligned proteins existed in Nirvana cache.
  • 118 transcripts had conservation scores.

GRCh38

  • Source FASTA contained 110024 protein alignments.
  • 88961 proteins had unique scores.
  • 11688 aligned proteins existed in Nirvana cache.
  • 12098 transcripts had conservation scores.

Download URL

GRCh37: http://hgdownload.soe.ucsc.edu/goldenPath/hg19/multiz100way/alignments/knownGene.exonAA.fa.gz

GRCh38: http://hgdownload.soe.ucsc.edu/goldenPath/hg38/multiz100way/alignments/knownGene.exonAA.fa.gz

JSON Output

Conservation scores are reported in the transcript section. One score is reported for each alt allele

"aminoAcidConservation": {
"scores": [0.34]
}
FieldTypeNotes
aminoAcidConservationobject
scoresobject array of doublespercent conserved with respect to human amino acid residue. Range: 0.01 - 1.00
- - - - \ No newline at end of file diff --git a/3.18/data-sources/clingen-dosage-json/index.html b/3.18/data-sources/clingen-dosage-json/index.html deleted file mode 100644 index da7c268e..00000000 --- a/3.18/data-sources/clingen-dosage-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -clingen-dosage-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

clingen-dosage-json

"clingenDosageSensitivityMap": [{
"chromosome": "15",
"begin": 30900686,
"end": 32153204,
"haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",
"reciprocalOverlap": 0.00147,
"annotationOverlap": 0.33994
},
{
"chromosome": "15",
"begin": 31727418,
"end": 32153204,
"haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "dosage sensitivity unlikely",
"reciprocalOverlap": 0.00147,
"annotationOverlap": 1
}]
FieldTypeNotes
clingenDosageSensitivityMapobject array
chromosomestringEnsembl-style chromosome names
begininteger1-based position
endinteger1-based position
haploinsufficiencystringsee possible values below
triplosensitivitystring(same as haploinsufficiency) 
reciprocalOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).
annotationOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).

haploinsufficiency and triplosensitivity

  • no evidence to suggest that dosage sensitivity is associated with clinical phenotype
  • little evidence suggesting dosage sensitivity is associated with clinical phenotype
  • emerging evidence suggesting dosage sensitivity is associated with clinical phenotype
  • sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype
  • gene associated with autosomal recessive phenotype
  • dosage sensitivity unlikely
- - - - \ No newline at end of file diff --git a/3.18/data-sources/clingen-gene-validity-json/index.html b/3.18/data-sources/clingen-gene-validity-json/index.html deleted file mode 100644 index d4cba17c..00000000 --- a/3.18/data-sources/clingen-gene-validity-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -clingen-gene-validity-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

clingen-gene-validity-json

"clingenGeneValidity":[
{
"diseaseId":"MONDO_0007893",
"disease":"Noonan syndrome with multiple lentigines",
"classification":"no reported evidence",
"classificationDate":"2018-06-07"
},
{
"diseaseId":"MONDO_0015280",
"disease":"cardiofaciocutaneous syndrome",
"classification":"no reported evidence",
"classificationDate":"2018-06-07"
}
]
FieldTypeNotes
clingenGeneValidityobject
diseaseIdstringMonarch Disease Ontology ID (MONDO)
diseasestringdisease label
classificationstringsee below for possible values
classificationDatestringyyyy-MM-dd

classification

  • no reported evidence
  • disputed
  • limited
  • moderate
  • definitive
  • strong
  • refuted
  • no known disease relationship
- - - - \ No newline at end of file diff --git a/3.18/data-sources/clingen-json/index.html b/3.18/data-sources/clingen-json/index.html deleted file mode 100644 index 381a9ce1..00000000 --- a/3.18/data-sources/clingen-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -clingen-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

clingen-json

"clingen":[
{
"chromosome":"17",
"begin":525,
"end":14667519,
"variantType":"copy_number_gain",
"id":"nsv996083",
"clinicalInterpretation":"pathogenic",
"observedGains":1,
"validated":true,
"phenotypes":[
"Intrauterine growth retardation"
],
"phenotypeIds":[
"HP:0001511",
"MedGen:C1853481"
],
"reciprocalOverlap":0.00131
},
{
"chromosome":"17",
"begin":45835,
"end":7600330,
"variantType":"copy_number_loss",
"id":"nsv869419",
"clinicalInterpretation":"pathogenic",
"observedLosses":1,
"validated":true,
"phenotypes":[
"Developmental delay AND/OR other significant developmental or morphological phenotypes"
],
"reciprocalOverlap":0.00254
}
]
FieldTypeNotes
clingenobject array
chromosomestringEnsembl-style chromosome names
begininteger1-based position
endinteger1-based position
variantTypestringAny of the sequence alterations defined here.
idstringIdentifier from the data source. Alternatively a VID
clinicalInterpretationstringsee possible values below
observedGainsintegerRange: 0 - (231 - 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.
observedLossesintegerRange: 0 - (231 - 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.
validatedboolean
phenotypesstring arrayDescription of the phenotype.
phenotypeIdsstring arrayDescription of the phenotype IDs.
reciprocalOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).

clinicalInterpretation

  • benign
  • curated benign
  • curated pathogenic
  • likely benign
  • likely pathogenic
  • path gain
  • path loss
  • pathogenic
  • uncertain
- - - - \ No newline at end of file diff --git a/3.18/data-sources/clingen/index.html b/3.18/data-sources/clingen/index.html deleted file mode 100644 index 6297099e..00000000 --- a/3.18/data-sources/clingen/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -ClinGen | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

ClinGen

Overview

ClinGen is a National Institutes of Health (NIH)-funded resource dedicated to building a central resource that defines the clinical relevance of genes and variants for use in precision medicine and research.

Publication

Heidi L. Rehm, Ph.D., Jonathan S. Berg, M.D., Ph.D., Lisa D. Brooks, Ph.D., Carlos D. Bustamante, Ph.D., James P. Evans, M.D., Ph.D., Melissa J. Landrum, Ph.D., David H. Ledbetter, Ph.D., Donna R. Maglott, Ph.D., Christa Lese Martin, Ph.D., Robert L. Nussbaum, M.D., Sharon E. Plon, M.D., Ph.D., Erin M. Ramos, Ph.D., Stephen T. Sherry, Ph.D., and Michael S. Watson, Ph.D., for ClinGen. ClinGen The Clinical Genome Resource. N Engl J Med 2015; 372:2235-2242 June 4, 2015 DOI: 10.1056/NEJMsr1406261.

ISCA Regions

TSV Extraction

ClinGen contains only copy number variation variants, since the coordinates in ClinGen original file follow the same rule as BED format, the coordinates had to be adjusted to [BEGIN+1, END].

#bin    chrom   chromStart      chromEnd        name    score   strand  thickStart      thickEnd        attrCount       attrTags        attrVals
nsv530705 1 564405 8597804 0 1 copy_number_loss pathogenic False Developmental delay AND/OR other significant developmental or morphological phenotypes
nsv530706 1 564424 3262790 0 1 copy_number_loss pathogenic False Abnormal facial shape,Abnormality of cardiac morphology,Global developmental delay,Muscular hypotonia HP:0001252,HP:0001263,HP:0001627,HP:0001999,MedGen:CN001147,MedGen:CN001157,MedGen:CN001482,MedGen:CN001810
nsv530707 1 564424 7068738 0 1 copy_number_loss pathogenic False Abnormality of cardiac morphology,Cleft upper lip,Failure to thrive,Global developmental delay,Intrauterine growth retardation,Microcephaly,Short stature HP:0000204,HP:0000252,HP:0001263,HP:0001508,HP:0001511,HP:0001627,HP:0004322,MedGen:C0349588,MedGen:C1845868,MedGen:C1853481,MedGen:C2364119,MedGen:CN000197,MedGen:CN001157,MedGen:CN001482
nsv533512 1 564435 649748 0 1 copy_number_loss benign False Developmental delay AND/OR other significant developmental or morphological phenotypes
nsv931338 1 714078 4958499 0 1 copy_number_loss pathogenic False Developmental delay AND/OR other significant developmental or morphological phenotypes
nsv530300 1 728138 5066371 1 0 copy_number_gain pathogenic False Abnormality of cardiac morphology,Cleft palate,Global developmental delay HP:0000175,HP:0001263,HP:0001627,MedGen:C2240378,MedGen:CN001157,MedGen:CN001482

Status levels

  • benign
  • curated benign
  • curated pathogenic
  • likely benign
  • likely pathogenic
  • path gain
  • path loss
  • pathogenic
  • uncertain

Parsing

We parse the ClinGen tsv file and extract the following:

  • chrom
  • chromStart (note this a 0-based coordinate)
  • chromEnd
  • attrTags
  • attrVals

attrTags and attrVals are comma separated lists. attrTags contains the field keys and attrVals contains the field values. We will parse the following keys from the two fields:

  • parent (this will be used as the ID in our JSON output)
  • clinical_int
  • validated
  • phenotype (this should be a string array)
  • phenotype_id (this should be a string array)

Observed losses and observed gains will be calculated from entries that share a common parent ID.

  • variants with a common parent ID and same coordinates are grouped
    • calculated observed losses, observed gains for each group
    • Clinical significance and validation status are collapsed using the priority strategy described below
  • Variants with the same parent ID can have different coordinates (mapped to hg38)
    • nsv491508 : chr14:105583663-106881350 and chr14:105605043-106766076 (only one example)
    • we kept both variants

Conflict Resolution

Clinical significance priority

When there are a mixture of variants belonging to the same parent ID, we will choose the most pathogenic clinical significance from the available values. i.e. if 3 samples were deemed pathogenic and 2 samples were likely pathogenic, we would list the variant as pathogenic.

Priority (high to low)

  • Priority
  • Pathogenic
  • Likely pathogenic
  • Benign
  • Likely benign
  • Uncertain significance

Validation Priority

When there are a mixture of variants belonging to same parent ID, we will set the validation status to true if any of the variants were validated.

Download URL

https://cirm.ucsc.edu/cgi-bin/hgTrackUi?db=hg19&g=iscaComposite

JSON Output

"clingen":[
{
"chromosome":"17",
"begin":525,
"end":14667519,
"variantType":"copy_number_gain",
"id":"nsv996083",
"clinicalInterpretation":"pathogenic",
"observedGains":1,
"validated":true,
"phenotypes":[
"Intrauterine growth retardation"
],
"phenotypeIds":[
"HP:0001511",
"MedGen:C1853481"
],
"reciprocalOverlap":0.00131
},
{
"chromosome":"17",
"begin":45835,
"end":7600330,
"variantType":"copy_number_loss",
"id":"nsv869419",
"clinicalInterpretation":"pathogenic",
"observedLosses":1,
"validated":true,
"phenotypes":[
"Developmental delay AND/OR other significant developmental or morphological phenotypes"
],
"reciprocalOverlap":0.00254
}
]
FieldTypeNotes
clingenobject array
chromosomestringEnsembl-style chromosome names
begininteger1-based position
endinteger1-based position
variantTypestringAny of the sequence alterations defined here.
idstringIdentifier from the data source. Alternatively a VID
clinicalInterpretationstringsee possible values below
observedGainsintegerRange: 0 - (231 - 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.
observedLossesintegerRange: 0 - (231 - 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.
validatedboolean
phenotypesstring arrayDescription of the phenotype.
phenotypeIdsstring arrayDescription of the phenotype IDs.
reciprocalOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).

clinicalInterpretation

  • benign
  • curated benign
  • curated pathogenic
  • likely benign
  • likely pathogenic
  • path gain
  • path loss
  • pathogenic
  • uncertain

Dosage Sensitivity Map

The Clinical Genome Resource (ClinGen) consortium is curating genes and regions of the genome to assess whether there is evidence to support that these genes/regions are dosage sensitive and should be targeted on a cytogenomic array. Nirvana reports these annotations for overlapping SVs.

Publication

Riggs ER, Nelson T, Merz A, Ackley T, Bunke B, Collins CD, Collinson MN, Fan YS, Goodenberger ML, Golden DM, Haglund-Hazy L, Krgovic D, Lamb AN, Lewis Z, Li G, Liu Y, Meck J, Neufeld-Kaiser W, Runke CK, Sanmann JN, Stavropoulos DJ, Strong E, Su M, Tayeh MK, Kokalj Vokac N, Thorland EC, Andersen E, Martin CL. Copy number variant discrepancy resolution using the ClinGen dosage sensitivity map results in updated clinical interpretations in ClinVar. Hum Mutat. 2018 Nov;39(11):1650-1659. doi: 10.1002/humu.23610. PMID: 30095202; PMCID: PMC7374944.

TSV Source files

Regions

#ClinGen Region Curation Results
#07 May,2019
#Genomic Locations are reported on GRCh38 (hg38): GCF_000001405.36
#https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen
#to create link: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/clingen_region.cgi?id=key
#ISCA ID ISCA Region Name cytoBand Genomic Location Haploinsufficiency Score Haploinsufficiency Description Haploinsufficiency PMID1 Haploinsufficiency PMID2 Haploinsufficiency PMID3 Triplosensitivity Score Triplosensitivity Description Triplosensitivity PMID1 Triplosensitivity PMID2 Triplosensitivity PMID3 Date Last Evaluated Loss phenotype OMIM ID Triplosensitive phenotype OMIM ID
ISCA-46299 Xp11.22 region (includes HUWE1) Xp11.22 tbd 0 No evidence available 3 Sufficient evidence for dosage pathogenicity 22840365 20655035 26692240 2018-11-19
ISCA-46295 15q13.3 recurrent region (D-CHRNA7 to BP5) (includes CHRNA7 and OTUD7A) 15q13.3 chr15:31727418-32153204 3 Sufficient evidence for dosage pathogenicity 19898479 20236110 22775350 40 Dosage sensitivity unlikely 26968334 22420048 2018-05-10
ISCA-46291 7q11.23 recurrent distal region (includes HIP1, YWHAG) 7q11.23 chr7:75528718-76433859 2 Some evidence for dosage pathogenicity 21109226 16971481 1 Little evidence for dosage pathogenicity 21109226 27867344 2018-12-31
ISCA-46290 Xp11.22p11.23 recurrent region (includes SHROOM4) Xp11.22-p11.23 chrX: 48447780-52444264 0 No evidence available 3 Sufficient evidence for dosage pathogenicity 19716111 21418194 25425167 2017-12-14 300801

Genes

#ClinGen Gene Curation Results
#24 May,2019
#Genomic Locations are reported on GRCh37 (hg19): GCF_000001405.13
#https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen
#to create link: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/clingen_gene.cgi?sym=Gene Symbol
#Gene Symbol Gene ID cytoBand Genomic Location Haploinsufficiency Score Haploinsufficiency Description Haploinsufficiency PMID1 Haploinsufficiency PMID2 Haploinsufficiency PMID3 Triplosensitivity Score Triplosensitivity Description Triplosensitivity PMID1 Triplosensitivity PMID2 Triplosensitivity PMID3 Date Last Evaluated Loss phenotype OMIM ID Triplosensitive phenotype OMIM ID
A4GALT 53947 22q13.2 chr22:43088121-43117307 30 Gene associated with autosomal recessive phenotype 0 No evidence available 2014-12-11 111400
AAGAB 79719 15q23 chr15:67493013-67547536 3 Sufficient evidence for dosage pathogenicity 23064416 23000146 0 No evidence available 2013-02-28 148600

Dosage Rating System

RatingPossible Clinical Interpretation
0No evidence to suggest that dosage sensitivity is associated with clinical phenotype
1Little evidence suggesting dosage sensitivity is associated with clinical phenotype
2Emerging evidence suggesting dosage sensitivity is associated with clinical phenotype
3Sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype
30Gene associated with autosomal recessive phenotype
40Dosage sensitivity unlikely

Reference: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/help.shtml

Download URL

ftp://ftp.clinicalgenome.org/

JSON Output

"clingenDosageSensitivityMap": [{
"chromosome": "15",
"begin": 30900686,
"end": 32153204,
"haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",
"reciprocalOverlap": 0.00147,
"annotationOverlap": 0.33994
},
{
"chromosome": "15",
"begin": 31727418,
"end": 32153204,
"haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "dosage sensitivity unlikely",
"reciprocalOverlap": 0.00147,
"annotationOverlap": 1
}]
FieldTypeNotes
clingenDosageSensitivityMapobject array
chromosomestringEnsembl-style chromosome names
begininteger1-based position
endinteger1-based position
haploinsufficiencystringsee possible values below
triplosensitivitystring(same as haploinsufficiency) 
reciprocalOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).
annotationOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).

haploinsufficiency and triplosensitivity

  • no evidence to suggest that dosage sensitivity is associated with clinical phenotype
  • little evidence suggesting dosage sensitivity is associated with clinical phenotype
  • emerging evidence suggesting dosage sensitivity is associated with clinical phenotype
  • sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype
  • gene associated with autosomal recessive phenotype
  • dosage sensitivity unlikely

Building the supplementary files

The gene dosage sensitivity .nga for Nirvana can be built using the SAUtils command's DosageSensitivity subcommand. The required data file is ClinGen_gene_curation_list_{ASSEMBLY}.tsv (url provided above) and its associated .version file.

NAME=ClinGen Dosage Sensitivity Map
VERSION=20211201
DATE=2021-12-01
DESCRIPTION=Dosage sensitivity map from ClinGen (dbVar)

Here is a sample run:

dotnet Nirvana/bin/Debug/netcoreapp3.1/SAUtils.dll DosageSensitivity --out SupplementaryDatabase/64/GRCh37 --tsv ClinGen_gene_curation_list_GRCh37.tsv
---------------------------------------------------------------------------
SAUtils (c) 2021 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.17.0
---------------------------------------------------------------------------


Time: 00:00:00.1

For building the .nsi files, we use the SAUtils command's DosageMapRegions subcommand. The required data file is ClinGen_region_curation_list_{ASSEMBLY}.tsv (url provided above) and its associated .version file.

NAME=ClinGen Dosage Sensitivity Map
VERSION=20211201
DATE=2021-12-01
DESCRIPTION=Dosage sensitivity map from ClinGen (dbVar)

Here is a sample run:

dotnet Nirvana/bin/Debug/netcoreapp3.1/SAUtils.dll DosageMapRegions --out SupplementaryDatabase/64/GRCh37 --ref References/7/Homo_sapiens.GRCh37.Nirvana.dat --tsv ClinGen_region_curation_list_GRCh37.tsv
---------------------------------------------------------------------------
SAUtils (c) 2021 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.17.0
---------------------------------------------------------------------------

Writing 505 intervals to database...

Time: 00:00:00.1

Gene-Disease Validity

The ClinGen Gene-Disease Clinical Validity curation process involves evaluating the strength of evidence supporting or refuting a claim that variation in a particular gene causes a particular disease. Nirvana reports these annotations for genes in the genes section of the JSON.

Publication

Strande NT, Riggs ER, Buchanan AH, et al. Evaluating the Clinical Validity of Gene-Disease Associations: An Evidence-Based Framework Developed by the Clinical Genome Resource. Am J Hum Genet. 2017;100(6):895-906. doi:10.1016/j.ajhg.2017.04.015

Source TSV

The source data comes in a CSV file that we convert to a TSV.

CLINGEN GENE VALIDITY CURATIONS
FILE CREATED: 2019-05-28
WEBPAGE: https://search.clinicalgenome.org/kb/gene-validity
+++++++++++,++++++++++++++,+++++++++++++,++++++++++++++++++,+++++++++,++++++++++++++,+++++++++++++,+++++++++++++++++++
GENE SYMBOL,GENE ID (HGNC),DISEASE LABEL,DISEASE ID (MONDO),SOP,CLASSIFICATION,ONLINE REPORT,CLASSIFICATION DATE
+++++++++++,++++++++++++++,+++++++++++++,++++++++++++++++++,+++++++++,++++++++++++++,+++++++++++++,+++++++++++++++++++
A2ML1,HGNC:23336,Noonan syndrome with multiple lentigines,MONDO_0007893,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/59b87033-dd91-4f1e-aec1-c9b1f5124b16--2018-06-07T14:37:47,2018-06-07T14:37:47.175Z
A2ML1,HGNC:23336,cardiofaciocutaneous syndrome,MONDO_0015280,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/fc3c41d8-8497-489b-a350-c9e30016bc6a--2018-06-07T14:31:03,2018-06-07T14:31:03.696Z
A2ML1,HGNC:23336,Costello syndrome,MONDO_0009026,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/ea72ba8d-cf62-44bc-86be-da64e3848eba--2018-06-07T14:34:05,2018-06-07T14:34:05.324Z

Download URL

https://search.clinicalgenome.org/kb/downloads#section_gene-disease-validity

Conflict Resolution

Multiple Classifications

Here is an example of multiple classifications.

$ grep MONDO_0010192 ClinGen-Gene-Disease-Summary-2019-12-02.csv  | grep EDNRB
EDNRB,HGNC:3180,Waardenburg syndrome type 4A,MONDO_0010192,SOP6,Moderate,https://search.clinicalgenome.org/kb/gene-validity/d7abbd45-7915-437b-849b-dea876bfc2f5--2018-05-08T04:00:00,2018-05-08T04:00:00.000Z
EDNRB,HGNC:3180,Waardenburg syndrome type 4A,MONDO_0010192,SOP6,Limited,https://search.clinicalgenome.org/kb/gene-validity/73ee9727-60c1-40fd-830f-08c2b513d2ee--2018-05-08T04:00:00,2018-05-08T04:00:00.000Z

In such cases, we select the more severe classification.

Multiple Dates

$ grep MONDO_0016419 ClinGen-Gene-Disease-Summary-2019-12-02.csv  | grep MUTYH
MUTYH,HGNC:7527,hereditary breast carcinoma,MONDO_0016419,SOP4,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/9904,2017-05-24T00:00:00
MUTYH,HGNC:7527,hereditary breast carcinoma,MONDO_0016419,SOP4,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/9902,2017-05-25T00:00:00

If the classifications are the same, we should select the latest classification date.

JSON Output

"clingenGeneValidity":[
{
"diseaseId":"MONDO_0007893",
"disease":"Noonan syndrome with multiple lentigines",
"classification":"no reported evidence",
"classificationDate":"2018-06-07"
},
{
"diseaseId":"MONDO_0015280",
"disease":"cardiofaciocutaneous syndrome",
"classification":"no reported evidence",
"classificationDate":"2018-06-07"
}
]
FieldTypeNotes
clingenGeneValidityobject
diseaseIdstringMonarch Disease Ontology ID (MONDO)
diseasestringdisease label
classificationstringsee below for possible values
classificationDatestringyyyy-MM-dd

classification

  • no reported evidence
  • disputed
  • limited
  • moderate
  • definitive
  • strong
  • refuted
  • no known disease relationship

Building the supplementary files

The gene disease validity .nga for Nirvana can be built using the SAUtils command's DiseaseValidity subcommand. The only required data file is Clingen-Gene-Disease-Summary-2021-12-01.tsv (url provided above) and its associated .version file.

NAME=ClinGen disease validity curations
VERSION=20211201
DATE=2021-12-01
DESCRIPTION=Disease validity curations from ClinGen (dbVar)

Here is a sample run:

dotnet Nirvana/bin/Debug/netcoreapp3.1/SAUtils.dll DiseaseValidity --tsv Clingen-Gene-Disease-Summary-2021-12-01.tsv \\
--uga Cache/27/UGA.tsv.gz --out SupplementaryDatabase/64/GRCh37
---------------------------------------------------------------------------
SAUtils (c) 2021 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.17.0
---------------------------------------------------------------------------

Number of geneIds missing from the cache:0 (0%)

Time: 00:00:00.2
- - - - \ No newline at end of file diff --git a/3.18/data-sources/clinvar-json/index.html b/3.18/data-sources/clinvar-json/index.html deleted file mode 100644 index bf8c6ef4..00000000 --- a/3.18/data-sources/clinvar-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -clinvar-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

clinvar-json

small variants:

"clinvar":[
{
"id":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"significance":[
"benign"
],
"refAllele":"G",
"altAllele":"A",
"lastUpdatedDate":"2020-03-01",
"isAlleleSpecific":true
},
{
"id":"RCV000030258.4",
"variationId":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"alleleOrigins":[
"germline"
],
"refAllele":"G",
"altAllele":"A",
"phenotypes":[
"Lynch syndrome"
],
"medGenIds":[
"C1333990"
],
"omimIds":[
"120435"
],
"significance":[
"benign"
],
"lastUpdatedDate":"2017-05-01",
"isAlleleSpecific":true
}
]

large variants:

"clinvar":[
{
"chromosome":"1",
"begin":629025,
"end":8537745,
"variantType":"copy_number_loss",
"id":"RCV000051993.4",
"variationId":"VCV000058242.1",
"reviewStatus":"criteria provided, single submitter",
"alleleOrigins":[
"not provided"
],
"phenotypes":[
"See cases"
],
"significance":[
"pathogenic"
],
"lastUpdatedDate":"2022-04-21",
"pubMedIds":[
"21844811"
]
},
{
"id":"VCV000058242.1",
"reviewStatus":"criteria provided, single submitter",
"significance":[
"pathogenic"
],
"lastUpdatedDate":"2022-04-21"
},
......
]
FieldTypeNotes
idstringClinVar ID
variationIdstringClinVar VCV ID
variantTypestringvariant type
reviewStatusstringsee possible values below
alleleOriginsstring arraysee possible values below
refAllelestring
altAllelestring
phenotypesstring array
medGenIdsstring arrayMedGen IDs
omimIdsstring arrayOMIM IDs
orphanetIdsstring arrayOrphanet IDs
significancestring arraysee possible values below
lastUpdatedDatestringyyyy-MM-dd
pubMedIdsstring arrayPubMed IDs
isAlleleSpecificbooltrue when the current variant alternate allele matches the ClinVar alternate allele

reviewStatus:

  • no assertion provided
  • no assertion criteria provided
  • criteria provided, single submitter
  • practice guideline
  • classified by multiple submitters
  • criteria provided, conflicting interpretations
  • criteria provided, multiple submitters, no conflicts
  • no interpretation for the single variant

alleleOrigins:

  • unknown
  • other
  • germline
  • somatic
  • inherited
  • paternal
  • maternal
  • de-novo
  • biparental
  • uniparental
  • not-tested
  • tested-inconclusive

significance:

  • uncertain significance
  • not provided
  • benign
  • likely benign
  • likely pathogenic
  • pathogenic
  • drug response
  • histocompatibility
  • association
  • risk factor
  • protective
  • affects
  • conflicting data from submitters
  • other
  • no interpretation for the single variant
  • conflicting interpretations of pathogenicity
- - - - \ No newline at end of file diff --git a/3.18/data-sources/clinvar/index.html b/3.18/data-sources/clinvar/index.html deleted file mode 100644 index 3bc10a48..00000000 --- a/3.18/data-sources/clinvar/index.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - -ClinVar | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

ClinVar

Overview

ClinVar is a freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence. ClinVar thus facilitates access to and communication about the relationships asserted between human variation and observed health status, and the history of that interpretation.

Publication

Melissa J Landrum, Jennifer M Lee, Mark Benson, Garth R Brown, Chen Chao, Shanmuga Chitipiralla, Baoshan Gu, Jennifer Hart, Douglas Hoffman, Wonhee Jang, Karen Karapetyan, Kenneth Katz, Chunlei Liu, Zenith Maddipatla, Adriana Malheiro, Kurt McDaniel, Michael Ovetsky, George Riley, George Zhou, J Bradley Holmes, Brandi L Kattman, Donna R Maglott, ClinVar: improving access to variant interpretations and supporting evidence, Nucleic Acids Research, 46, Issue D1, 4 January 2018, Pages D1062–D1067, https://doi.org/10.1093/nar/gkx1153

RCV File

Example

Here's a full RCV entry.

Parsing

In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output.

ID

<ClinVarSet>
<ReferenceClinVarAssertion>
<ClinVarAccession Acc="RCV000000001" Version="2">
</ClinVarSet>

The Acc and Version fields are merged to form the ID (RCV000000001.2)

LastUpdatedDate

<ClinVarSet>
<ReferenceClinVarAssertion DateCreated="2012-08-13" DateLastUpdated="2016-02-17" ID="57604" >
</ClinVarSet>

Significance

<ClinVarSet>
<ReferenceClinVarAssertion>
<ClinicalSignificance DateLastEvaluated="1996-04-01">
<ReviewStatus>no assertion criteria provided</ReviewStatus>
<Description>Pathogenic</Description>
</ClinicalSignificance>
</ClinVarSet>

ReviewStatus

<ClinVarSet>
<ReferenceClinVarAssertion>
<ClinicalSignificance DateLastEvaluated="1996-04-01">
<ReviewStatus>no assertion criteria provided</ReviewStatus>
<Description>Pathogenic</Description>
</ClinicalSignificance>
</ClinVarSet>

Phenotypes

<ReferenceClinVarAssertion>
<TraitSet Type="Disease" ID="62">
<Trait Type="Disease">
<Name>
<ElementValue Type="Preferred">Joubert syndrome 9</ElementValue>
</Name>
</Trait>
</TraitSet>
</ReferenceClinVarAssertion>

We only use the field with Type="Preferred". Multiple phenotypes may be reported

Location, Variant Type and Variant Id

<ReferenceClinVarAssertion>
<GenotypeSet Type="CompoundHeterozygote" ID="424709">
<MeasureSet Type="Variant" ID="81">
<Measure Type="single nucleotide variant" ID="15120">
<SequenceLocation Assembly="GRCh38" AssemblyAccessionVersion="GCF_000001405.38"
AssemblyStatus="current" Chr="10" Accession="NC_000010.11" start="89222510"
stop="89222510" display_start="89222510" display_stop="89222510" variantLength="1"
positionVCF="89222510" referenceAlleleVCF="C" alternateAlleleVCF="T"/>
<SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25"
AssemblyStatus="previous" Chr="10" Accession="NC_000010.10" start="90982267"
stop="90982267" display_start="90982267" display_stop="90982267" variantLength="1"
positionVCF="90982267" referenceAlleleVCF="C" alternateAlleleVCF="T"/>
</Measure>
</MeasureSet>
</GenotypeSet>
</ReferenceClinVarAssertion>
  • The variant position is extracted from the fields for their respective assemblies.
  • Updated records contain positionVCF, referenceAlleleVCF and alternateAlleleVCF fields and when present, we use them to create the variant.
  • For older records, since "start' and "stop" fields are not always available, we use the "display_start" and "display_end" fields.
  • If a required allele is not available, we extract it from the reference sequence.
  • Only variants having a dbSNP id are extracted.
  • Note that a ClinVar accession may have multiple variants associated with it (possible in different locations)
  • VariantId is extracted from the MeasureSet attributes.
  • VariantType is extracted from the Measure attributes.
    unsupported variant types

    We currently don't support the following variant types:

    • Microsatellite
    • protein only
    • fusion
    • Complex
    • Variation
    • Translocation

MedGen, OMIM, Orphanet IDs

<ReferenceClinVarAssertion>
<TraitSet Type="Disease" ID="175">
<Trait ID="3036" Type="Disease">
<XRef ID="C0086651" DB="MedGen"/>
<XRef ID="309297" DB="Orphanet"/>
<XRef ID="582" DB="Orphanet"/>
<XRef Type="MIM" ID="253000" DB="OMIM"/>
</Trait>
</TraitSet>
</ReferenceClinVarAssertion>

AlleleOrigins

<ClinVarAssertion>
<Origin>germline</Origin>
</ClinVarAssertion>

We only extract all Allele Origins from Submissions (SCV) entries.

PubMedIds

<ClinVarAssertion>
<ClinicalSignificance DateLastEvaluated="1996-04-01">
<Citation Type="general">
<ID Source="PubMed">12114475</ID>
</Citation>
</ClinicalSignificance>
<AttributeSet>
<Attribute Type="AssertionMethod">LMM Criteria</Attribute>
<Citation>
<ID Source="PubMed">24033266</ID>
</Citation>
</AttributeSet>
<ObservedIn>
<ObservedData ID="9727445">
<Citation Type="general">
<ID Source="PubMed">9113933</ID>
</Citation>
</ObservedData>
</ObservedIn>
<Citation Type="general">
<ID Source="PubMed">23757202</ID>
</Citation>
</ClinVarAssertion>

We only extract all Pubmed Ids from Submissions (SCV) entries.

Parsing Significance

Extracting significance(s) may involve parsing multiple fields. Take the following snippets into consideration.

<ClinicalSignificance DateLastEvaluated="1996-04-01">
<ReviewStatus>no assertion criteria provided</ReviewStatus>
<Description>Pathogenic</Description>
</ClinicalSignificance>

<ClinicalSignificance DateLastEvaluated="2016-10-13">
<ReviewStatus>criteria provided, multiple submitters, no conflicts</ReviewStatus>
<Description>Pathogenic/Likely pathogenic</Description>
</ClinicalSignificance>

<ClinicalSignificance DateLastEvaluated="2012-06-07">
<ReviewStatus>no assertion criteria provided</ReviewStatus>
<Description>Conflicting interpretations of pathogenicity</Description>
<Explanation DataSource="ClinVar" Type="public">Pathogenic(1);Uncertain significance(1)</Explanation>
</ClinicalSignificance>

Given the evidence, we converted the significance field into an array of strings which may be parsed out of the Descriptions or Explanation fields.

Varying Delimiters

The delimiters in each field may vary. Currently, the delimiters for Description are , and /. The delimiters for Explanation are ; and /.

VCV File

Example

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<ClinVarVariationRelease xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_variation/variation_archive_1.4.xsd" ReleaseDate="2019-12-31">
<VariationArchive VariationID="431749" VariationName="GRCh37/hg19 1p36.31(chr1:6051187-6158763)" VariationType="copy number gain" DateCreated="2017-08-12" DateLastUpdated="2019-09-10" Accession="VCV000431749" Version="1" RecordType="included" NumberOfSubmissions="0" NumberOfSubmitters="0">
<RecordStatus>current</RecordStatus>
<Species>Homo sapiens</Species>
<IncludedRecord>
<SimpleAllele AlleleID="425239" VariationID="431749">
<GeneList>
<Gene Symbol="KCNAB2" FullName="potassium voltage-gated channel subfamily A regulatory beta subunit 2" GeneID="8514" HGNC_ID="HGNC:6229" Source="calculated" RelationshipType="genes overlapped by variant">
<Location>
<CytogeneticLocation>1p36.31</CytogeneticLocation>
<SequenceLocation Assembly="GRCh38" AssemblyAccessionVersion="GCF_000001405.38" AssemblyStatus="current" Chr="1" Accession="NC_000001.11" start="5992639" stop="6101186" display_start="5992639" display_stop="6101186" Strand="+"/>
<SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25" AssemblyStatus="previous" Chr="1" Accession="NC_000001.10" start="6052357" stop="6161252" display_start="6052357" display_stop="6161252" Strand="+"/>
</Location>
<OMIM>601142</OMIM>
</Gene>
<Gene Symbol="NPHP4" FullName="nephrocystin 4" GeneID="261734" HGNC_ID="HGNC:19104" Source="calculated" RelationshipType="genes overlapped by variant">
<Location>
<CytogeneticLocation>1p36.31</CytogeneticLocation>
<SequenceLocation Assembly="GRCh38" AssemblyAccessionVersion="GCF_000001405.38" AssemblyStatus="current" Chr="1" Accession="NC_000001.11" start="5862810" stop="5992425" display_start="5862810" display_stop="5992425" Strand="-"/>
<SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25" AssemblyStatus="previous" Chr="1" Accession="NC_000001.10" start="5922869" stop="6052532" display_start="5922869" display_stop="6052532" Strand="-"/>
</Location>
<OMIM>607215</OMIM>
</Gene>
</GeneList>
<Name>GRCh37/hg19 1p36.31(chr1:6051187-6158763)</Name>
<VariantType>copy number gain</VariantType>
<Location>
<CytogeneticLocation>1p36.31</CytogeneticLocation>
<SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25" forDisplay="true" AssemblyStatus="previous" Chr="1" Accession="NC_000001.10" start="6051187" stop="6158763" display_start="6051187" display_stop="6158763"/> </Location>
<Interpretations>
<Interpretation NumberOfSubmissions="0" NumberOfSubmitters="0" Type="Clinical significance">
<Description>no interpretation for the single variant</Description>
</Interpretation>
</Interpretations>
<XRefList>
<XRef Type="Interpreted" ID="431733" DB="ClinVar"/>
</XRefList>
</SimpleAllele>
<ReviewStatus>no interpretation for the single variant</ReviewStatus>
<Interpretations>
<Interpretation NumberOfSubmissions="0" NumberOfSubmitters="0" Type="Clinical significance">
<Description>no interpretation for the single variant</Description>
</Interpretation>
</Interpretations>
<SubmittedInterpretationList>
<SCV Title="SUB1895145" Accession="SCV000296057" Version="1"/>
</SubmittedInterpretationList>
<InterpretedVariationList>
<InterpretedVariation VariationID="431733" Accession="VCV000431733" Version="1"/>
</InterpretedVariationList>
</IncludedRecord>
</VariationArchive>
</ClinVarVariationRelease>

Parsing

In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output.

id

<VariationArchive VariationID="431749" VariationName="GRCh37/hg19 1p36.31(chr1:6051187-6158763)" VariationType="copy number gain" DateCreated="2017-08-12" DateLastUpdated="2019-09-10" Accession="VCV000431749" Version="1" RecordType="included" NumberOfSubmissions="0" NumberOfSubmitters="0">

The Acc and Version fields are merged to form the ID (RCV000000001.2)

significance

<ClinVarVariationRelease>
<VariationArchive>
<IncludedRecord>
<SimpleAllele>
<Interpretations>
<Interpretation NumberOfSubmissions="0" NumberOfSubmitters="0" Type="Clinical significance">
<Description>no interpretation for the single variant</Description>
</Interpretation>
</Interpretations>
</SimpleAllele>
</IncludedRecord>
</VariationArchive>
</ClinVarVariationRelease>

May have multiple significances listed.

reviewStatus

<ClinVarVariationRelease>
<VariationArchive>
<IncludedRecord>
<ReviewStatus>no interpretation for the single variant</ReviewStatus>
</IncludedRecord>
</VariationArchive>
</ClinVarVariationRelease>

Known Issues

Known Issues
  • The XML file contains ~1k more entries (out of 162K) than the VCF file
  • The XML file does not have a field indicating that a record is associated with the reference base - something that was present in VCF
  • The XML file contains entries (e.g. RCV000016645 version=1) which have IUPAC ambiguous bases ("R", "Y", "H", -etc.) as their alternate allele

Download URLs

ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz

https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_00-latest.xml.gz

JSON Output

small variants:

"clinvar":[
{
"id":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"significance":[
"benign"
],
"refAllele":"G",
"altAllele":"A",
"lastUpdatedDate":"2020-03-01",
"isAlleleSpecific":true
},
{
"id":"RCV000030258.4",
"variationId":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"alleleOrigins":[
"germline"
],
"refAllele":"G",
"altAllele":"A",
"phenotypes":[
"Lynch syndrome"
],
"medGenIds":[
"C1333990"
],
"omimIds":[
"120435"
],
"significance":[
"benign"
],
"lastUpdatedDate":"2017-05-01",
"isAlleleSpecific":true
}
]

large variants:

"clinvar":[
{
"chromosome":"1",
"begin":629025,
"end":8537745,
"variantType":"copy_number_loss",
"id":"RCV000051993.4",
"variationId":"VCV000058242.1",
"reviewStatus":"criteria provided, single submitter",
"alleleOrigins":[
"not provided"
],
"phenotypes":[
"See cases"
],
"significance":[
"pathogenic"
],
"lastUpdatedDate":"2022-04-21",
"pubMedIds":[
"21844811"
]
},
{
"id":"VCV000058242.1",
"reviewStatus":"criteria provided, single submitter",
"significance":[
"pathogenic"
],
"lastUpdatedDate":"2022-04-21"
},
......
]
FieldTypeNotes
idstringClinVar ID
variationIdstringClinVar VCV ID
variantTypestringvariant type
reviewStatusstringsee possible values below
alleleOriginsstring arraysee possible values below
refAllelestring
altAllelestring
phenotypesstring array
medGenIdsstring arrayMedGen IDs
omimIdsstring arrayOMIM IDs
orphanetIdsstring arrayOrphanet IDs
significancestring arraysee possible values below
lastUpdatedDatestringyyyy-MM-dd
pubMedIdsstring arrayPubMed IDs
isAlleleSpecificbooltrue when the current variant alternate allele matches the ClinVar alternate allele

reviewStatus:

  • no assertion provided
  • no assertion criteria provided
  • criteria provided, single submitter
  • practice guideline
  • classified by multiple submitters
  • criteria provided, conflicting interpretations
  • criteria provided, multiple submitters, no conflicts
  • no interpretation for the single variant

alleleOrigins:

  • unknown
  • other
  • germline
  • somatic
  • inherited
  • paternal
  • maternal
  • de-novo
  • biparental
  • uniparental
  • not-tested
  • tested-inconclusive

significance:

  • uncertain significance
  • not provided
  • benign
  • likely benign
  • likely pathogenic
  • pathogenic
  • drug response
  • histocompatibility
  • association
  • risk factor
  • protective
  • affects
  • conflicting data from submitters
  • other
  • no interpretation for the single variant
  • conflicting interpretations of pathogenicity

Building the supplementary files

The ClinVar .nsa and .nsi for Nirvana can be built using the SAUtils command's clinvar subcommand.

Source data files

Two input .xml files and a .version file are required in order to build the .nsa and .nsi file. You should have the following files:

ClinVarFullRelease_00-latest.xml.gz     ClinVarVariationRelease_00-latest.xml.gz
ClinVarFullRelease_00-latest.xml.gz.version

The version file is a text file with the follwoing format.

NAME=ClinVar
VERSION=20220505
DATE=2022-05-05
DESCRIPTION=A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence

The help menu for the utility is as follows:

dotnet SAUtils.dll clinvar
---------------------------------------------------------------------------
SAUtils (c) 2022 Illumina, Inc.
Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.18.1
---------------------------------------------------------------------------

USAGE: dotnet SAUtils.dll clinvar [options]
Creates a supplementary database with ClinVar annotations

OPTIONS:
--ref, -r <VALUE> compressed reference sequence file
--rcv, -i <VALUE> ClinVar Full release XML file
--vcv, -c <VALUE> ClinVar Variation release XML file
--out, -o <VALUE> output directory
--help, -h displays the help menu
--version, -v displays the version

dotnet SAUtils.dll clinvar

Here is a sample execution:

dotnet ~/development/Nirvana/bin/Debug/net6.0/SAUtils.dll clinvar \\
--ref ~/development/References/7/Homo_sapiens.GRCh38.Nirvana.dat --rcv ClinVarFullRelease_00-latest.xml.gz \\
--vcv ClinVarVariationRelease_00-latest.xml.gz --out ~/development/SupplementaryDatabase/63/GRCh38
---------------------------------------------------------------------------
SAUtils (c) 2022 Illumina, Inc.
Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.18.1
---------------------------------------------------------------------------

Found 1535677 VCV records
Unknown vcv id:225946 found in RCV000211201.2
Unknown vcv id:225946 found in RCV000211253.2
Unknown vcv id:225946 found in RCV000211375.2
Unknown vcv id:976117 found in RCV001253316.1
Unknown vcv id:1321016 found in RCV001776995.2
3 unknown VCVs found in RCVs.
225946,976117,1321016
0 unknown VCVs found in RCVs.
Chromosome 1 completed in 00:00:15.1
Chromosome 2 completed in 00:00:20.0
Chromosome 3 completed in 00:00:09.7
Chromosome 4 completed in 00:00:05.9
Chromosome 5 completed in 00:00:09.8
Chromosome 6 completed in 00:00:08.3
Chromosome 7 completed in 00:00:08.7
Chromosome 8 completed in 00:00:06.2
Chromosome 9 completed in 00:00:08.6
Chromosome 10 completed in 00:00:07.0
Chromosome 11 completed in 00:00:11.7
Chromosome 12 completed in 00:00:08.0
Chromosome 13 completed in 00:00:06.3
Chromosome 14 completed in 00:00:06.0
Chromosome 15 completed in 00:00:06.6
Chromosome 16 completed in 00:00:10.8
Chromosome 17 completed in 00:00:13.8
Chromosome 18 completed in 00:00:02.9
Chromosome 19 completed in 00:00:08.7
Chromosome 20 completed in 00:00:03.6
Chromosome 21 completed in 00:00:02.4
Chromosome 22 completed in 00:00:03.6
Chromosome MT completed in 00:00:00.2
Chromosome X completed in 00:00:07.5
Chromosome Y completed in 00:00:00.0
Maximum bp shifted for any variant:2
Writing 37097 intervals to database...

Time: 00:13:26.9

- - - - \ No newline at end of file diff --git a/3.18/data-sources/cosmic-json/index.html b/3.18/data-sources/cosmic-json/index.html deleted file mode 100644 index 41221fe1..00000000 --- a/3.18/data-sources/cosmic-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -cosmic-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

cosmic-json

   "cosmicGeneFusions":[
{
"id":"COSF881",
"numSamples":6,
"geneSymbols":[
"MYB",
"NFIB"
],
"hgvsr":"ENST00000341911.5(MYB):r.1_2368::ENST00000397581.2(NFIB):r.2592_3318",
"histologies":[
{
"name":"adenoid cystic carcinoma",
"numSamples":6
}
],
"sites":[
{
"name":"salivary gland (submandibular)",
"numSamples":1
},
{
"name":"salivary gland (parotid)",
"numSamples":1
},
{
"name":"salivary gland (nasal cavity)",
"numSamples":1
},
{
"name":"breast",
"numSamples":3
}
],
"pubMedIds":[
19841262
]
}
]
FieldTypeNotes
idstringCOSMIC fusion ID
numSamplesint
geneSymbolsstring array5' gene & 3' gene
hgvsrstringHGVS RNA translocation fusion notation
histologiescount arrayphenotypic descriptions
sitescount arraytissue types
pubMedIdsint arrayPubMed IDs

Count

FieldTypeNotes
namestringdescription
numSamplesint
- - - - \ No newline at end of file diff --git a/3.18/data-sources/cosmic/index.html b/3.18/data-sources/cosmic/index.html deleted file mode 100644 index 8d469f96..00000000 --- a/3.18/data-sources/cosmic/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -COSMIC | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

COSMIC

Overview

COSMIC, the Catalogue of Somatic Mutations in Cancer, is the world's largest source of expert manually curated somatic mutation information relating to human cancers.

Publication

John G Tate, Sally Bamford, Harry C Jubb, Zbyslaw Sondka, David M Beare, Nidhi Bindal, Harry Boutselakis, Charlotte G Cole, Celestino Creatore, Elisabeth Dawson, Peter Fish, Bhavana Harsha, Charlie Hathaway, Steve C Jupe, Chai Yin Kok, Kate Noble, Laura Ponting, Christopher C Ramshaw, Claire E Rye, Helen E Speedy, Ray Stefancsik, Sam L Thompson, Shicai Wang, Sari Ward, Peter J Campbell, Simon A Forbes. (2019) COSMIC: the Catalogue Of Somatic Mutations In Cancer, Nucleic Acids Research, Volume 47, Issue D1

Licensed Content

Commercial companies are required to acquire a license from COSMIC. At the moment, this means that our COSMIC content is only available in Illumina's products and services, not in the open source distribution.

Since many of you are academic users, we will enable a COSMIC login in our downloader later this year that will allow academic and commercial organizations (with a license) access our COSMIC data sources.

Gene Fusions

Gene fusions are manually curated from peer reviewed publications by expert COSMIC curators. A comprehensive literature curation is completed for each fusion pair when it is released in the database. Currently COSMIC includes information on fusions involved in solid tumours and leukaemias.

TSV File

Example

SAMPLE_ID       SAMPLE_NAME     PRIMARY_SITE    SITE_SUBTYPE_1  SITE_SUBTYPE_2  SITE_SUBTYPE_3  PRIMARY_HISTOLOGY      HISTOLOGY_SUBTYPE_1      HISTOLOGY_SUBTYPE_2     HISTOLOGY_SUBTYPE_3     FUSION_ID       TRANSLOCATION_NAME      5'_CHROMOSOME   5'_STRAND       5'_GENE_ID      5'_GENE_NAME    5'_LAST_OBSERVED_EXON   5'_GENOME_START_FROM    5'_GENOME_START_TO      5'_GENOME_STOP_FROM     5'_GENOME_STOP_TO       3'_CHROMOSOME   3'_STRAND       3'_GENE_ID      3'_GENE_NAME   3'_FIRST_OBSERVED_EXON   3'_GENOME_START_FROM    3'_GENOME_START_TO      3'_GENOME_STOP_FROM     3'_GENOME_STOP_TO      FUSION_TYPE      PUBMED_PMID
749711 HCC1187 breast NS NS NS carcinoma ductal_carcinoma NS NS 665 ENST00000360863.10(RGS22):r.1_3555_ENST00000369518.1(SYCP1):r.2100_3452 8 - 197199 RGS22 22 99981937 99981937 100106116 100106116 1 + 212470 SYCP1_ENST00000369518 24 114944339 114944339 114995367 114995367 Inferred Breakpoint 20033038

Parsing

From the TSV file, we're mainly interested in the following columns:

  • SAMPLE_ID
  • PRIMARY_SITE
  • PRIMARY_HISTOLOGY
  • HISTOLOGY_SUBTYPE_1
  • FUSION_ID
  • TRANSLOCATION_NAME
  • PUBMED_PMID
info

For all the histologies and sites, we replace all the underlines with spaces. salivary_gland would become salivary gland.

Aggregation

To create the gene fusion entries in Nirvana, we perform the following on each row in the TSV file:

  • Group all entries by FUSION_ID
  • Using all the entries related to this FUSION_ID:
    • Collect all the PubMed IDs
    • Tally the number of observed sample IDs
    • Grab the HGVS r. notation (should not change throughout the FUSION_ID)
    • Tally the number of samples observed for each histology
    • Tally the number of samples observed for each site
  • Extract the transcript IDs from the HGVS notation and lookup the associated gene symbols

Fixing the HGVS RNA Notation

ENST00000360863.6(RGS22):r.1_3555_ENST00000369518.1(SYCP1):r.2100_3452

There are some issues with the HGVS RNA notation:

  • The two transcripts should be linked by a double colon ::.
  • For coding transcripts, HGVS numbering should use CDS coordinates. Right now COSMIC is using cDNA coordinates for all their fusion
  • If only the breakpoint is truly known, the recommendation is to use ? marks

We chose to only update the linkage between each transcript using double colons ::. While we could have recalculated the HGVS notation using the supplied breakpoints, we chose not to because the resulting notation would be quite different from the original material. This would potentially lead to some confusion.

Aggregating Histologies

For histologies we want to capture the most specific description available. In the example above, we saw that the primary histology was carcinoma, but the subtype was ductal carcinoma. In this case we would use the subtype for the annotation.

COSMIC uses NS to show that a value is empty. If the subtype is NS, we will use the primary histology instead.

Aggregating Sites

For sites, we observe that the subtype provides additional description but is still dependent on the primary site value. For example, the primary site might be skin, but the subtype is foot. Therefore, we will combine the values in the following manner: skin (foot).

Known Issues

Known Issues

There are some issues with the HGVS RNA notation:

  • The two transcripts should be linked by a double colon ::. We fixed this aspect in Nirvana.
  • For coding transcripts, HGVS numbering should use CDS coordinates. Right now COSMIC is using cDNA coordinates for all their fusions.

Download URL

JSON Output

   "cosmicGeneFusions":[
{
"id":"COSF881",
"numSamples":6,
"geneSymbols":[
"MYB",
"NFIB"
],
"hgvsr":"ENST00000341911.5(MYB):r.1_2368::ENST00000397581.2(NFIB):r.2592_3318",
"histologies":[
{
"name":"adenoid cystic carcinoma",
"numSamples":6
}
],
"sites":[
{
"name":"salivary gland (submandibular)",
"numSamples":1
},
{
"name":"salivary gland (parotid)",
"numSamples":1
},
{
"name":"salivary gland (nasal cavity)",
"numSamples":1
},
{
"name":"breast",
"numSamples":3
}
],
"pubMedIds":[
19841262
]
}
]
FieldTypeNotes
idstringCOSMIC fusion ID
numSamplesint
geneSymbolsstring array5' gene & 3' gene
hgvsrstringHGVS RNA translocation fusion notation
histologiescount arrayphenotypic descriptions
sitescount arraytissue types
pubMedIdsint arrayPubMed IDs

Count

FieldTypeNotes
namestringdescription
numSamplesint
- - - - \ No newline at end of file diff --git a/3.18/data-sources/dann-json/index.html b/3.18/data-sources/dann-json/index.html deleted file mode 100644 index 54adaa7f..00000000 --- a/3.18/data-sources/dann-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -dann-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

dann-json

"dannScore": 0.27
FieldTypeNotes
dannScorefloatRange: 0 - 1.0
- - - - \ No newline at end of file diff --git a/3.18/data-sources/dann/index.html b/3.18/data-sources/dann/index.html deleted file mode 100644 index 81ba55ad..00000000 --- a/3.18/data-sources/dann/index.html +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - -DANN | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

DANN

Overview

DANN uses the same feature set and training data as CADD (Combined Annotation-Dependent Depletion) to train a deep neural network (DNN). -CADD is an algorithm designed to annotate both coding and non-coding variants, and has been shown to outperform other annotation algorithms. -DANN improves on CADD (which uses Support Vector Machines (SVMs)) by capturing non-linear relationships by using a deep neural network instead of SVMs. -DANN achieves about a 19% relative reduction in the error rate and about a 14% relative increase in the area under the curve (AUC) metric over CADD’s SVM methodology.

Publication

Quang, Daniel, Yifei Chen, and Xiaohui Xie. DANN: a deep learning approach for annotating the pathogenicity of genetic variants. Bioinformatics 31.5 761-763 (2015). https://doi.org/10.1093/bioinformatics/btu703

TSV File

Example

chr     grch37_pos  ref     alt     DANN
1 10001 T A 0.16461391399220135
1 10001 T C 0.4396994049749739
1 10001 T G 0.38108629377072734
1 10002 A C 0.36182020272810128
1 10002 A G 0.44413258111779291
1 10002 A T 0.16812846819989813

Parsing

From the CSV file, we are interested in all columns:

  • chr
  • grch37_pos
  • ref
  • alt
  • DANN

GRCh38 liftover

The data is not available for GRCh38 on DANN website. We performed a liftover from GRCh37 to GRCh38 using crossmap.

Known Issues

None

Download URL

https://cbcl.ics.uci.edu/public_data/DANN/

JSON Output

"dannScore": 0.27
FieldTypeNotes
dannScorefloatRange: 0 - 1.0
- - - - \ No newline at end of file diff --git a/3.18/data-sources/dbsnp-json/index.html b/3.18/data-sources/dbsnp-json/index.html deleted file mode 100644 index c9dea6be..00000000 --- a/3.18/data-sources/dbsnp-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -dbsnp-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

dbsnp-json

"dbsnp":[
"rs1042821"
]
FieldTypeNotes
dbsnpstring arraydbSNP rsIDs
- - - - \ No newline at end of file diff --git a/3.18/data-sources/dbsnp/index.html b/3.18/data-sources/dbsnp/index.html deleted file mode 100644 index 377dd68c..00000000 --- a/3.18/data-sources/dbsnp/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -dbSNP | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

dbSNP

Overview

dbSNP contains human single nucleotide variations, microsatellites, and small-scale insertions and deletions along with publication, population frequency, molecular consequence, and genomic and RefSeq mapping information for both common variations and clinical mutations.

Publication

Sherry, S.T., Ward, M. and Sirotkin, K. (1999) dbSNP—Database for Single Nucleotide Polymorphisms and Other Classes of Minor Genetic Variation. Genome Res., 9, 677–679.

VCF File

Example

#CHROM  POS ID  REF ALT QUAL    FILTER  INFO
1 10177 rs367896724 A AC . . RS=367896724;RSPOS=10177;dbSNPBuildID=138; \
SSR=0;SAO=0;VP=0x050000020005130026000200;GENEINFO=DDX11L1:100287102;WGT=1; \
VC=DIV;R5;ASP;G5A;G5;KGPhase3;CAF=0.5747,0.4253;COMMON=1; \
TOPMED=0.76728147298674821,0.23271852701325178

Parsing

From the VCF file, we're mainly interested in the following:

  • rsID from the ID field
  • CAF from the INFO field

Global allele extraction

The global major and minor alleles are extracted based on the frequency of the alleles provided in the CAF field. The global minor allele frequency is the second highest value of the CAF comma delimited field (ignoring '.' values).

Tie Breaking: Global Major Allele

If there are two candidates for global major and the reference allele is one of them, we prefer the reference allele.

Tie Breaking: Global Minor Allele

If there are two candidates for global minor and the reference allele is one of them, we prefer the other allele. If the reference allele is not involved, they are chosen arbitrarily.

Equal Allele Frequency Example (2 alleles)

chr1    100 A   C   CAF=0.5,0.5

We will select A to be the global major allele and C to be the global minor allele.

Equal Allele Frequency Example (3 alleles)

chr1    100 A   C,T CAF=0.33,0.33,0.33

We will select A to be the global major allele and either C or T is chosen (arbitrarily) to be the global minor allele.

Equal Allele Frequency in Alternate Alleles

chr1    100 A   C,T CAF=0.2,0.4,0.4

We will select C or T to be arbitrarily assigned to be the global major or global minor allele.

Equal Allele Frequency Between Reference & Alternate Allele

chr1    100 A   C,T CAF=0.2,0.2,0.6

We will select T to be the global major allele and C to be the global minor allele.

Known Issues

Known Issues

If there are multiple entries with different CAF values for the same allele, we use the first CAF value.

Download URL

https://ftp.ncbi.nih.gov/snp/organisms/

JSON Output

"dbsnp":[
"rs1042821"
]
FieldTypeNotes
dbsnpstring arraydbSNP rsIDs
- - - - \ No newline at end of file diff --git a/3.18/data-sources/decipher-json/index.html b/3.18/data-sources/decipher-json/index.html deleted file mode 100644 index 10d85d32..00000000 --- a/3.18/data-sources/decipher-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -decipher-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

decipher-json

"decipher":[
{
"chromosome":"1",
"begin":13516,
"end":91073,
"numDeletions":27,
"deletionFrequency":0.675,
"numDuplications":27,
"duplicationFrequency":0.675,
"sampleSize":40,
"reciprocalOverlap": 0.27555,
"annotationOverlap": 0.5901
}
],
FieldTypeNotes
chromosomeintEnsembl-style chromosome names
beginint1-based position
endint1-based position
numDeletionsint# of observed deletions
deletionFrequencyfloatdeletion frequency
numDuplicationsint# of observed duplications
duplicationFrequencyfloatduplication frequency
sampleSizeinttotal # of samples
reciprocalOverlapfloatRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap
annotationOverlapfloatRange: 0 - 1. E.g. 0.57 would indicate a 57% annotation overlap
- - - - \ No newline at end of file diff --git a/3.18/data-sources/decipher/index.html b/3.18/data-sources/decipher/index.html deleted file mode 100644 index a320b8ab..00000000 --- a/3.18/data-sources/decipher/index.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - -DECIPHER | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

DECIPHER

Overview

DECIPHER (DatabasE of genomiC varIation and Phenotype in Humans using Ensembl Resources) is an interactive web-based database which incorporates a suite of tools designed to aid the interpretation of genomic variants.

DECIPHER enhances clinical diagnosis by retrieving information from a variety of bioinformatics resources relevant to the variant found in the patient. The patient's variant is displayed in the context of both normal variation and pathogenic variation reported at that locus thereby facilitating interpretation.

Publication

DECIPHER: Database of Chromosomal Imbalance and Phenotype in Humans using Ensembl Resources. Firth, H.V. et al., 2009. Am.J.Hum.Genet 84, 524-533 (DOI: dx.doi.org/10/1016/j.ajhg.2009.03.010)

TSV Extraction

#population_cnv_id  chr start   end deletion_observations   deletion_frequency  deletion_standard_error duplication_observations    duplication_frequency   duplication_standard_error  observations    frequency   standard_error  type    sample_size study
1 1 10529 177368 0 0 1 3 0.075 0.555277708 3 0.075 0.555277708 1 40 42M calls
2 1 13516 91073 0 0 1 27 0.675 0.109713431 27 0.675 0.109713431 1 40 42M calls
3 1 18888 35451 0 0 1 2 0.002366864 0.706269473 2 0.002366864 0.706269473 1 845 DDD

Parsing

We parse the DECIPHER tsv file and extract the following columns:

  • chr
  • start
  • end
  • deletion_observations
  • deletion_frequency
  • duplication_observations
  • duplication_frequency
  • sample_size

Download URL

https://www.deciphergenomics.org/files/downloads/population_cnv_grch38.txt.gz -https://www.deciphergenomics.org/files/downloads/population_cnv_grch37.txt.gz

JSON output

"decipher":[
{
"chromosome":"1",
"begin":13516,
"end":91073,
"numDeletions":27,
"deletionFrequency":0.675,
"numDuplications":27,
"duplicationFrequency":0.675,
"sampleSize":40,
"reciprocalOverlap": 0.27555,
"annotationOverlap": 0.5901
}
],
FieldTypeNotes
chromosomeintEnsembl-style chromosome names
beginint1-based position
endint1-based position
numDeletionsint# of observed deletions
deletionFrequencyfloatdeletion frequency
numDuplicationsint# of observed duplications
duplicationFrequencyfloatduplication frequency
sampleSizeinttotal # of samples
reciprocalOverlapfloatRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap
annotationOverlapfloatRange: 0 - 1. E.g. 0.57 would indicate a 57% annotation overlap
- - - - \ No newline at end of file diff --git a/3.18/data-sources/fusioncatcher-json/index.html b/3.18/data-sources/fusioncatcher-json/index.html deleted file mode 100644 index f26a4224..00000000 --- a/3.18/data-sources/fusioncatcher-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -fusioncatcher-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

fusioncatcher-json

   "fusionCatcher":[
{
"genes":{
"first":{
"hgnc":"ETV6",
"isOncogene":true
},
"second":{
"hgnc":"RUNX1"
},
"isParalogPair":true,
"isPseudogenePair":true,
"isReadthrough":true
},
"germlineSources":[
"1000 Genomes Project"
],
"somaticSources":[
"COSMIC",
"TCGA oesophageal carcinomas"
]
}
]
FieldTypeNotes
genesgenes object5' gene & 3' gene
germlineSourcesstring arraymatches in known germline data sources
somaticSourcesstring arraymatches in known somatic data sources

genes

FieldTypeNotes
firstgene object5' gene
secondgene object3' gene
isParalogPairbooltrue when both genes are paralogs for each other
isPseudogenePairbooltrue when both genes are pseudogenes for each other
isReadthroughbooltrue when this fusion gene is a readthrough event (both are on the same strand and there are no genes between them)

gene

FieldTypeNotes
hgncstringgene symbol. e.g. MSH6
isOncogenebooltrue when this gene is an oncogene
- - - - \ No newline at end of file diff --git a/3.18/data-sources/fusioncatcher/index.html b/3.18/data-sources/fusioncatcher/index.html deleted file mode 100644 index 5ef3430a..00000000 --- a/3.18/data-sources/fusioncatcher/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -FusionCatcher | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

FusionCatcher

Overview

FusionCatcher is a well-known tool that searches for somatic novel/known fusion genes, translocations, and/or chimeras in RNA-seq data. While FusionCatcher itself is not part of Nirvana, we have included a subset of their genomic databases in Nirvana.

Publication

Daniel Nicorici, Mihaela Şatalan, Henrik Edgren, Sara Kangaspeska, Astrid Murumägi, Olli Kallioniemi, Sami Virtanen, Olavi Kilkku. (2014) FusionCatcher – a tool for finding somatic fusion genes in paired-end RNA-sequencing data. bioRxiv 011650

Supported Data Sources

Oncogenes

The following data sources are aggregated and used to populate the isOncogene field in the gene JSON object:

DescriptionReferenceDataFusionCatcher filename
Bushmanbushmanlab.orgcancer_genes.txt
ONGENEJGGbioinfo-minzhao.orgoncogenes_more.txt
UniProt tumor genesNARuniprot.orgtumor_genes.txt

Germline

Nirvana labelReferenceDataFusionCatcher filename
1000 Genomes ProjectPLOS ONE1000genomes.txt
Healthy (strong support)banned.txt
Illumina Body Map 2.0EBIbodymap2.txt
CACGGenomicscacg.txt
ConjoinGPLOS ONEconjoing.txt
Healthy prefrontal cortexBMC Medical GenomicsNCBI GEOcortex.txt
Duplicated Genes DatabasePLOS ONEgenouest.orgdgd.txt
GTEx healthy tissuesgtexportal.orggtex.txt
Healthyhealthy.txt
Human Protein AtlasMCPEBIhpa.txt
Babiceanu non-cancer tissuesNARNARnon-cancer_tissues.txt
non-tumor cell linesnon-tumor_cells.txt
TumorFusions normalNARNARtcga-normal.txt

Somatic

Nirvana labelReferenceDataFusionCatcher filename
Alaei-Mahabadi 18 cancersPNAS18cancers.txt
DepMap CCLEdepmap.orgccle.txt
CCLE KlijnNature BiotechnologyNature Biotechnologyccle2.txt
CCLE VellichirammalMolecular Therapy Nucleic Acidsccle3.txt
Cancer Genome ProjectCOSMICcgp.txt
ChimerKB 4.0NARkobic.re.krchimerdb4kb.txt
ChimerPub 4.0NARkobic.re.krchimerdb4pub.txt
ChimerSeq 4.0NARkobic.re.krchimerdb4seq.txt
COSMICNARCOSMICcosmic.txt
Bao gliomasGenome Researchgliomas.txt
Knownknown.txt
Mitelman DBISB-CGCGoogle Cloudmitelman.txt
TCGA oesophageal carcinomasNatureoesophagus.txt
Bailey pancreatic cancersNatureNaturepancreases.txt
PCAWGCellICGCpcawg.txt
Robinson prostate cancersCellCellprostate_cancer.txt
TCGAcancer.govtcga.txt
TumorFusions tumorNARNARtcga-cancer.txt
TCGA GaoCellCelltcga2.txt
TCGA VellichirammalMolecular Therapy Nucleic Acidstcga3.txt
TICdbBMC Genomicsunav.eduticdb.txt

Gene Pair TSV File

Most of the data files in FusionCatcher are two-column TSV files containing the Ensembl gene IDs that are paired together.

Example

Here are the first few lines of the 1000genomes.txt file:

ENSG00000006210 ENSG00000102962
ENSG00000006652 ENSG00000181016
ENSG00000014138 ENSG00000149798
ENSG00000026297 ENSG00000071242
ENSG00000035499 ENSG00000155959
ENSG00000055211 ENSG00000131013
ENSG00000055332 ENSG00000179915
ENSG00000062485 ENSG00000257727
ENSG00000065978 ENSG00000166501
ENSG00000066044 ENSG00000104980

Parsing

In Nirvana, we will only import a gene pair if both Ensembl gene IDs are recognized from either our GRCh37 or GRCh38 cache files.

Gene TSV File

Some of the data files are single-column files containing Ensembl gene IDs. This is commonly used in the data files representing oncogene data sources.

Example

Here are the first few lines of the oncogenes_more.txt file:

ENSG00000000938
ENSG00000003402
ENSG00000005469
ENSG00000005884
ENSG00000006128
ENSG00000006453
ENSG00000006468
ENSG00000007350
ENSG00000008294
ENSG00000008952

Parsing

Known Issues

Known Issues

FusionCatcher also uses creates custom Ensembl genes (e.g. ENSG09000000002) to handle missing Ensembl genes. Nirvana will ignore these entries since we only include the gene IDs that are currently recognized by Nirvana.

I suspect that these were originally RefSeq genes and if so, we can support those directly in Nirvana in the future.

Download URL

https://sourceforge.net/projects/fusioncatcher/files/data

JSON Output

   "fusionCatcher":[
{
"genes":{
"first":{
"hgnc":"ETV6",
"isOncogene":true
},
"second":{
"hgnc":"RUNX1"
},
"isParalogPair":true,
"isPseudogenePair":true,
"isReadthrough":true
},
"germlineSources":[
"1000 Genomes Project"
],
"somaticSources":[
"COSMIC",
"TCGA oesophageal carcinomas"
]
}
]
FieldTypeNotes
genesgenes object5' gene & 3' gene
germlineSourcesstring arraymatches in known germline data sources
somaticSourcesstring arraymatches in known somatic data sources

genes

FieldTypeNotes
firstgene object5' gene
secondgene object3' gene
isParalogPairbooltrue when both genes are paralogs for each other
isPseudogenePairbooltrue when both genes are pseudogenes for each other
isReadthroughbooltrue when this fusion gene is a readthrough event (both are on the same strand and there are no genes between them)

gene

FieldTypeNotes
hgncstringgene symbol. e.g. MSH6
isOncogenebooltrue when this gene is an oncogene
- - - - \ No newline at end of file diff --git a/3.18/data-sources/gerp-json/index.html b/3.18/data-sources/gerp-json/index.html deleted file mode 100644 index fc570683..00000000 --- a/3.18/data-sources/gerp-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -gerp-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

gerp-json

"gerpScore": 1.27
FieldTypeNotes
gerpScorefloatRange: -∞ to +∞
- - - - \ No newline at end of file diff --git a/3.18/data-sources/gerp/index.html b/3.18/data-sources/gerp/index.html deleted file mode 100644 index 2a51fc6b..00000000 --- a/3.18/data-sources/gerp/index.html +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - - -GERP | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

GERP

Overview

GERP identifies constrained elements in multiple alignments by quantifying substitution deficits. -These deficits represent substitutions that would have occurred if the element were neutral DNA, but did not occur because the element has been under functional constraint (Rejected Substitutions). -Nirvana uses GERP++ which is based on a significantly faster and more statistically robust maximum likelihood estimation procedure to compute expected rates of evolution.

Publication

Davydov, Eugene V., et al. "Identifying a high fraction of the human genome to be under selective constraint using GERP++." PLoS computational biology 6.12 e1001025 (2010). https://doi.org/10.1371/journal.pcbi.1001025

Source Files

Example GRCh37

GRCh37 file is a TSV format

chr     position    GERP
1 12177 0.83
1 12178 -0.206
1 12179 -0.492
1 12180 -1.66
1 12181 0.83
1 12182 0.83
1 12183 -0.417
1 12184 0.83

Example GRCh38

GRCh38 file is a lift-over BED format

chr     pos_start   pos_end     GERP
1 12646 12647 0.298
1 12647 12648 2.63
1 12648 12649 1.87
1 12649 12650 0.252
1 12650 12651 -2.06
1 12651 12652 2.61
1 12652 12653 3.97

Parsing

From the CSV file, we are interested in columns:

  • chr
  • position
  • GERP

Known Issues

None

Download URL

GRCh37

http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html

GRCh38

The data is not available for GRCh38 on GERP++ website, and was obtained from https://personal.broadinstitute.org/konradk/loftee_data/GRCh38/

JSON Output

"gerpScore": 1.27
FieldTypeNotes
gerpScorefloatRange: -∞ to +∞
- - - - \ No newline at end of file diff --git a/3.18/data-sources/gme-json/index.html b/3.18/data-sources/gme-json/index.html deleted file mode 100644 index cc1f9503..00000000 --- a/3.18/data-sources/gme-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -gme-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

gme-json

"gmeVariome":{
"allAc":10,
"allAn":202,
"allAf":0.049504,
"failedFilter":true
}
FieldTypeNotes
allAcintGME allele count
allAnintGME allele number
allAffloatGME allele frequency
failedFilterboolTrue if this variant failed any filters
- - - - \ No newline at end of file diff --git a/3.18/data-sources/gme/index.html b/3.18/data-sources/gme/index.html deleted file mode 100644 index f933336b..00000000 --- a/3.18/data-sources/gme/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -GME Variome | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

GME Variome

Overview

The Greater Middle East (GME) Variome Project is aimed at generating a coding base reference for the countries found in the Greater Middle East. Nirvana presents variant frequencies for the Greater Middle Eastern population.

Publication

Scott, E. M., Halees, A., Itan, Y., Spencer, E. G., He, Y., Azab, M. A., Gabriel, S. B., Belkadi, A., Boisson, B., Abel, L., Clark, A. G., Greater Middle East Variome Consortium, Alkuraya, F. S., Casanova, J. L., & Gleeson, J. G. (2016). Characterization of Greater Middle Eastern genetic variation for enhanced disease gene discovery. Nature genetics, 48(9), 1071–1076. https://doi.org/10.1038/ng.3592

TSV Extraction

chrom   pos     ref     alt     AA      filter  FunctionGVS     geneFunction    Gene    GeneID  SIFT_pred       GERP++  AF      GME_GC  GME_AC  GME_AF  NWA     NEA     AP      Israel  SD      TP      CA      FunctionGVS_new Priority        Polyphen2_HVAR_pred     LRT_pred        MutationTaster_pred     rsid    OMIM_MIM        OMIM_Disease    AA_AC   EA_AC   rsid_link       position_link
1 69134 A G A VQSRTrancheSNP99.90to100.00 nonsynonymous_SNV exonic OR4F5 79501 T 2.31 96:0:5 10,192 0.04950495049504951 4:0:0 59:0:2 12:0:0 0:0:0 6:0:0 9:0:2 13:0:2 nonsynonymous_SNV MODERATE B N N none - - none none - http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&org=human&position=chr1%3A69134-69133
1 69270 A G A PASS synonymous_SNV exonic OR4F5 79501 . . 93:38:240 518,224 0.6981132075471698 5:5:11 63:30:86 12:5:28 1:0:2 2:2:18 7:3:46 7:2:52 synonymous_SNV LOW . . . rs201219564 - - none none http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?searchType=adhoc_search&type=rs&rs=rs201219564 http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&org=human&position=chr1%3A69270-69269
1 69428 T G T PASS nonsynonymous_SNV exonic OR4F5 79501 D 0.891 676:44:15 74,1396 0.050340136054421766 43:0:2 313:16:10 88:7:3 6:0:0 44:8:0 102:9:0 102:4:2 nonsynonymous_SNV MODERATE D N N rs140739101 - - 14,3808 313,6535 http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?searchType=adhoc_search&type=rs&rs=rs140739101 http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&org=human&position=chr1%3A69428-69427

Parsing

We parse the GME tsv file and extract the following columns:

  • chrom
  • pos
  • ref
  • alt
  • filter
  • GME_AC
  • GME_AF

GRCh37 liftover

The data is not available for GRCh38 on GME website. We performed a liftover from GRCh37 to GRCh38 using CrossMap.

Download URL

http://igm.ucsd.edu/gme/download.shtml

JSON output

"gmeVariome":{
"allAc":10,
"allAn":202,
"allAf":0.049504,
"failedFilter":true
}
FieldTypeNotes
allAcintGME allele count
allAnintGME allele number
allAffloatGME allele frequency
failedFilterboolTrue if this variant failed any filters
- - - - \ No newline at end of file diff --git a/3.18/data-sources/gnomad-lof-json/index.html b/3.18/data-sources/gnomad-lof-json/index.html deleted file mode 100644 index 94fb6152..00000000 --- a/3.18/data-sources/gnomad-lof-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -gnomad-lof-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

gnomad-lof-json

"gnomAD":{ 
"pLi":1.00e0,
"pNull":8.94e-40,
"pRec":1.84e-16,
"synZ":-8.44e-2,
"misZ":5.96e-1,
"loeuf":1.13e0
}
FieldTypeNotes
pLifloatprobability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)
pNullfloatprobability of being completely tolerant of loss of function variation (observed = expected)
pRecfloatprobability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)
synZfloatcorrected synonymous Z score
misZfloatcorrected missense Z score
loeuffloatloss of function observed/expected upper bound fraction (LOEUF)
- - - - \ No newline at end of file diff --git a/3.18/data-sources/gnomad-small-variants-json/index.html b/3.18/data-sources/gnomad-small-variants-json/index.html deleted file mode 100644 index b0eb96ca..00000000 --- a/3.18/data-sources/gnomad-small-variants-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -gnomad-small-variants-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

gnomad-small-variants-json

"gnomad":{ 
"coverage":20,
"allAf":0.190317,
"maleAf":0.193,
"femaleAf": 0.1935,
"afrAf":0.222876,
"amrAf":0.121394,
"easAf":0.239802,
"finAf":0.136833,
"nfeAf":0.181282,
"asjAf":0.258278,
"othAf":0.186094,
"allAn":30796,
"maleAn":15096,
"femaleAn":15700
"afrAn":8664,
"amrAn":832,
"easAn":1618,
"finAn":3486,
"nfeAn":14916,
"asjAn":302,
"othAn":978,
"allAc":5861,
"maleAc":2930,
"femaleAc": 2931,
"afrAc":1931,
"amrAc":101,
"easAc":388,
"finAc":477,
"nfeAc":2704,
"asjAc":78,
"othAc":182,
"allHc":561,
"afrHc":208,
"amrHc":6,
"easHc":42,
"finHc":31,
"nfeHc":242,
"asjHc":13,
"othHc":19,
"maleHc":280,
"femaleHc":281,
"controlsAllAf":0.190317,
"controlsAllAn":30796,
"controlsAllAc":5861,
"lowComplexityRegion":true,
"failedFilter":true
}
FieldTypeNotes
coverageintaverage coverage (non-negative integer values)
allAffloatallele frequency for all populations. Range: 0 - 1.0
maleAffloatallele frequency for male population. Range: 0 - 1.0
femaleAffloatallele frequency for female population. Range: 0 - 1.0
controlsAllAffloatallele frequency for the controls subset. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
maleAcintallele count for male population. Integer.
femaleAcintallele count for female population. Integer.
controlsAllAcintallele count for the controls subset. Integer.
allAnintallele number for all populations. Non-zero integer.
maleAnintallele number for male population. Non-zero integer.
femaleAnintallele number for female population. Non-zero integer.
controlsAllAnintallele number for the controls subset. Non-zero integer.
allHcintcount of homozygous individuals for all populations. Non-negative integer.
maleHcintcount of homozygous individuals for male population. Non-negative integer.
femaleHcintcount of homozygous individuals for female population. Non-negative integer.
afrAffloatallele frequency for the African / African American population. Range: 0 - 1.0
afrAcintallele count for the African / African American population. Integer.
afrAnintallele number for the African / African American population. Non-zero integer.
afrHcintcount of homozygous individuals for African / African American population. Non-negative integer.
amrAffloatallele frequency for the Latino population. Range: 0 - 1.0
amrAcintallele count for the Latino population. Integer.
amrAnintallele number for the Latino population. Non-zero integer.
amrHcintcount of homozygous individuals for Latino population. Non-negative integer.
easAffloatallele frequency for the East Asian population. Range: 0 - 1.0
easAcintallele count for the East Asian population. Integer.
easAnintallele number for the East Asian population. Non-zero integer.
easHcintcount of homozygous individuals for East Asian population. Non-negative integer.
finAffloatallele frequency for the Finnish population. Range: 0 - 1.0
finAcintallele count for the Finnish population. Integer.
finAnintallele number for the Finnish population. Non-zero integer.
finHcintcount of homozygous individuals for Finnish population. Non-negative integer
nfeAffloatallele frequency for the Non-Finnish European population. Range: 0 - 1.0
nfeAcintallele count for the Non-Finnish European population. Integer.
nfeAnintallele number for the Non-Finnish European population. Non-zero integer.
nfeHcintcount of homozygous individuals for Non-Finnish European population. Non-negative integer
othAffloatallele frequency for the Other population. Range: 0 - 1.0
othAcintallele count for the Other population. Integer.
othAnintallele number for the Other population. Non-zero integer.
othHcintcount of homozygous individuals for Other population. Non-negative integer
asjAffloatallele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0
asjAcintallele count for the Ashkenazi Jewish population Integer.
asjAnintallele number for the Ashkenazi Jewish population. Non-zero integer.
asjHcintcount of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer
sasAffloatallele frequency for the South Asian population. Range: 0 - 1.0
sasAcintallele count for the South Asian population Integer.
sasAnintallele number for the South Asian population. Non-zero integer.
sasHcintcount of homozygous individuals for the South Asian population. Non-negative integer.
failedFilterboolTrue if this variant failed any filters (Note: we do not list the failed filters)
lowComplexityRegionboolTrue if this variant is located in a low complexity region.
- - - - \ No newline at end of file diff --git a/3.18/data-sources/gnomad-structural-variants-data_description/index.html b/3.18/data-sources/gnomad-structural-variants-data_description/index.html deleted file mode 100644 index ec3ea90f..00000000 --- a/3.18/data-sources/gnomad-structural-variants-data_description/index.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - -gnomad-structural-variants-data_description | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

gnomad-structural-variants-data_description

Bed Example

The bed file was obtained from original source for GRCh37

#chrom  start   end name    svtype  ALGORITHMS  BOTHSIDES_SUPPORT   CHR2    CPX_INTERVALS   CPX_TYPE    END2    ENDEVIDENCE HIGH_SR_BACKGROUND  PCRPLUS_DEPLETED    PESR_GT_OVERDISPERSION  POS2    PROTEIN_CODING__COPY_GAIN   PROTEIN_CODING__DUP_LOF PROTEIN_CODING__DUP_PARTIAL PROTEIN_CODING__INTERGENIC  PROTEIN_CODING__INTRONIC    PROTEIN_CODING__INV_SPAN    PROTEIN_CODING__LOF PROTEIN_CODING__MSV_EXON_OVR    PROTEIN_CODING__NEAREST_TSS PROTEIN_CODING__PROMOTER    PROTEIN_CODING__UTR SOURCE  STRANDS SVLEN   SVTYPE  UNRESOLVED_TYPE UNSTABLE_AF_PCRPLUS VARIABLE_ACROSS_BATCHES AN  AC  AF  N_BI_GENOS  N_HOMREF    N_HET   N_HOMALT    FREQ_HOMREF FREQ_HET    FREQ_HOMALT MALE_AN MALE_AC MALE_AF MALE_N_BI_GENOS MALE_N_HOMREF   MALE_N_HET  MALE_N_HOMALT   MALE_FREQ_HOMREF    MALE_FREQ_HET   MALE_FREQ_HOMALT    MALE_N_HEMIREF  MALE_N_HEMIALT  MALE_FREQ_HEMIREF   MALE_FREQ_HEMIALT   PAR FEMALE_AN   FEMALE_AC   FEMALE_AF   FEMALE_N_BI_GENOS   FEMALE_N_HOMREF FEMALE_N_HET    FEMALE_N_HOMALT FEMALE_FREQ_HOMREF  FEMALE_FREQ_HET FEMALE_FREQ_HOMALT  POPMAX_AF   AFR_AN  AFR_AC  AFR_AF  AFR_N_BI_GENOS  AFR_N_HOMREF    AFR_N_HET   AFR_N_HOMALT    AFR_FREQ_HOMREF AFR_FREQ_HEAFR_FREQ_HOMALT  AFR_MALE_AN AFR_MALE_AC AFR_MALE_AF AFR_MALE_N_BI_GENOS AFR_MALE_N_HOMREF   AFR_MALE_N_HET  AFR_MALE_N_HOMALT   AFR_MALE_FREQ_HOMREF    AFR_MALE_FREQ_HET   AFR_MALE_FREQ_HOMALT    AFR_MALE_N_HEMIREF  AFR_MALE_N_HEMIALT  AFR_MALE_FREQ_HEMIREF   AFR_MALE_FREQ_HEMIALT   AFR_FEMALE_AN   AFR_FEMALE_AC   AFR_FEMALE_AF   AFR_FEMALE_N_BI_GENOS   AFR_FEMALE_N_HOMREF AFR_FEMALE_N_HET    AFR_FEMALE_N_HOMALT AFR_FEMALE_FREQ_HOMREF  AFR_FEMALE_FREQ_HET AFR_FEMALE_FREQ_HOMALT  AMR_AN  AMR_AC  AMR_AF  AMR_N_BI_GENOS  AMR_N_HOMREF    AMR_N_HET   AMR_N_HOMALT    AMR_FREQ_HOMREF AMR_FREQ_HET    AMR_FREQ_HOMALT AMR_MALE_AN AMR_MALE_AC AMR_MALE_AF AMR_MALE_N_BI_GENOS AMR_MALE_N_HOMREF   AMR_MALE_N_HET  AMR_MALE_N_HOMALT   AMR_MALE_FREQ_HOMREF    AMR_MALE_FREQ_HET   AMR_MALE_FREQ_HOMALT    AMR_MALE_N_HEMIREF  AMR_MALE_N_HEMIALT  AMR_MALE_FREQ_HEMIREF   AMR_MALE_FREQ_HEMIALT   AMR_FEMALE_AN   AMR_FEMALE_AC   AMR_FEMALE_AF   AMR_FEMALE_N_BI_GENOS   AMR_FEMALE_N_HOMREF AMR_FEMALE_N_HET    AMR_FEMALE_N_HOMALT AMR_FEMALE_FREQ_HOMREF  AMR_FEMALE_FREQ_HET AMR_FEMALE_FREQ_HOMALT  EAS_AN  EAS_AC  EAS_AF  EAS_N_BI_GENOS  EAS_N_HOMREF    EAS_N_HET   EAS_N_HOMALT    EAS_FREQ_HOMREF EAS_FREQ_HET    EAS_FREQ_HOMALT EAS_MALE_AN EAS_MALE_AC EAS_MALE_AF EAS_MALE_N_BI_GENOS EAS_MALE_N_HOMREF   EAS_MALE_N_HET  EAS_MALE_N_HOMALT   EAS_MALE_FREQ_HOMREF    EAS_MALE_FREQ_HET   EAS_MALE_FREQ_HOMALT    EAS_MALE_N_HEMIREF  EAS_MALE_N_HEMIALT  EAS_MALE_FREQ_HEMIREF   EAS_MALE_FREQ_HEMIALT   EAS_FEMALE_AN   EAS_FEMALE_AC   EAS_FEMALE_AF   EAS_FEMALE_N_BI_GENOS   EAS_FEMALE_N_HOMREF EAS_FEMALE_N_HET    EAS_FEMALE_N_HOMALT EAS_FEMALE_FREQ_HOMREF  EAS_FEMALE_FREQ_HET EAS_FEMALE_FREQ_HOMALT  EUR_AN  EUR_AC  EUR_AF  EUR_N_BI_GENOS  EUR_N_HOMREF    EUR_N_HET   EUR_N_HOMALT    EUR_FREQ_HOMREF EUR_FREQ_HET    EUR_FREQ_HOMALT EUR_MALE_AN EUR_MALE_AC EUR_MALE_AF EUR_MALE_N_BI_GENOS EUR_MALE_N_HOMREF   EUR_MALE_N_HET  EUR_MALE_N_HOMALT   EUR_MALE_FREQ_HOMREF    EUR_MALE_FREQ_HET   EUR_MALE_FREQ_HOMALT    EUR_MALE_N_HEMIREF  EUR_MALE_N_HEMIALT  EUR_MALE_FREQ_HEMIREF   EUR_MALE_FREQ_HEMIALT   EUR_FEMALE_AN   EUR_FEMALE_AC   EUR_FEMALE_AF   EUR_FEMALE_N_BI_GENOS   EUR_FEMALE_N_HOMREF EUR_FEMALE_N_HET    EUR_FEMALE_N_HOMALT EUR_FEMALE_FREQ_HOMREF  EUR_FEMALE_FREQ_HET EUR_FEMALE_FREQ_HOMALT  OTH_AN  OTH_AC  OTH_AF  OTH_N_BI_GENOS  OTH_N_HOMREF    OTH_N_HET   OTH_N_HOMALT    OTH_FREQ_HOMREF OTH_FREQ_HET    OTH_FREQ_HOMALT OTH_MALE_AN OTH_MALE_AC OTH_MALE_AF OTH_MALE_N_BI_GENOS OTH_MALE_N_HOMREF   OTH_MALE_N_HET  OTH_MALE_N_HOMALT   OTH_MALE_FREQ_HOMREF    OTH_MALE_FREQ_HET   OTH_MALE_FREQ_HOMALT    OTH_MALE_N_HEMIREF  OTH_MALE_N_HEMIALT  OTH_MALE_FREQ_HEMIREF   OTH_MALE_FREQ_HEMIALT   OTH_FEMALE_AN   OTH_FEMALE_AC   OTH_FEMALE_AF   OTH_FEMALE_N_BI_GENOS   OTH_FEMALE_N_HOMREF OTH_FEMALE_N_HET    OTH_FEMALE_N_HOMALT OTH_FEMALE_FREQ_HOMREF  OTH_FEMALE_FREQ_HET OTH_FEMALE_FREQ_HOMALT  FILTER
1 10641 10642 gnomAD-SV_v2.1_BND_1_1 BND manta False 15 NA NA 10643 10643 PE,SR False False True 10642 NA NA NA False NA NA NA NA NA NA NA NA NA -1 BND SINGLE_ENDER_-- False False 21366 145 0.006785999983549118 10683 10543 135 5 0.9868950247764587 0.012636899948120117 0.00046803298755548894 10866 69 0.00634999992325902 5433 5366 65 2 0.987667977809906 0.011963900178670883 0.000368120992789045 NA NA NA NA False 10454 76 0.007269999943673615227 5154 70 3 0.9860339760780334 0.013392000459134579 0.0005739430198445916 0.015956999734044075 93972 0.007660999894142151 4699 4629 68 2 0.9851030111312866 0.014471200294792652 0.0004256220126990229 5154 33 0.006403000093996525 2577 2544 33 0 0.9871940016746521 0.012805599719285965 0.0NA NA NA NA 4232 39 0.009216000325977802 2116 2079 35 2 0.9825140237808228 0.01654059998691082 0.0009451800142414868 1910 7 0.003664999967440963 955 949 5 1 0.9937170147895813 0.00523559981957078 0.001047119963914156 950 4 0.004211000166833401 475 472 2 1 0.9936839938163757 0.00421052984893322 0.0021052600350230932 NA NA NA NA 952 3 0.0031510000117123127 476473 3 0 0.9936969876289368 0.006302520167082548 0.0 2296 31 0.013501999899744987 1148 11131 0 0.9729970097541809 0.02700350061058998 0.0 1312 13 0.009909000247716904 656 643 13 0.9801830053329468 0.01981710083782673 0.0 NA NA NA NA 976 18 0.018442999571561813 488470 18 0 0.9631149768829346 0.03688519820570946 0.0 7574 32 0.004224999807775021 3787 37528 2 0.9920780062675476 0.007393720094114542 0.0005281229969114065 3374 17 0.005038999952375889 1681671 15 1 0.9905160069465637 0.008891520090401173 0.000592768017668277 NA NA NA NA 41815 0.003587000072002411 2091 2077 13 1 0.9933050274848938 0.006217120215296745 0.00047823999193497188 3 0.015956999734044075 94 91 3 0 0.968084990978241 0.03191490098834038 0.0 76 0.026316000148653984 38 36 2 0 0.9473680257797241 0.05263160169124603 0.0 NA NA NA NA 112 1 0.008929000236093998 56 55 1 0 0.982142984867096 0.017857100814580917 0.0UNRESOLVED

TSV Example

The tsv was obtained from lifted over dataset created by dbVar for GRCh38

#variant_call_accession variant_call_id variant_call_type   experiment_id   sample_id   sampleset_id    assembly    chrcontig   outer_start start   inner_start inner_stop  stop    outer_stop  insertion_length    variant_region_acc  variant_region_id   copy_number description validation  zygosity    origin  phenotype   hgvs_name   placement_method    placement_rank  placements_per_assembly remap_alignment remap_best_within_cluster   remap_coverage  remap_diff_chr  remap_failure_code  allele_count    allele_frequency    allele_number
nssv15777856 gnomAD-SV_v2.1_CNV_10_564_alt_1 copy number variation 1 1 GRCh38.p12 10 736806 738184 nsv4039284 10__782746___784124______GRCh37.p13_copy_number_variation 0 Remapped BestAvailable Single First Pass 0 1 AC=21,AFR_AC=10,AMR_AC=9,EAS_AC=0,EUR_AC=2,OTH_AC=0AF=0.038889,AFR_AF=0.044643,AMR_AF=0.03913,EAS_AF=0,EUR_AF=0.023256,OTH_AF=0 AN=540,AFR_AN=224,AMR_AN=230,EAS_AN=0,EUR_AN=86,OTH_AN=0

Structural Variant Type Mapping

The source files represented the structural variants with keys using various naming conventions. -In the Nirvana JSON output, these keys will be mapped according to the following.

Nirvana JSON SV Type KeyGRCh37 Source SV Type KeyGRCh38 Source SV Type Key
copy_number_variationcopy number variation
deletionDEL, CN=0deletion
duplicationDUPduplication
insertionINSinsertion
inversionINVinversion
mobile_element_insertionINS:MEmobile element insertion
mobile_element_insertionINS:ME:ALUalu insertion
mobile_element_insertionINS:ME:LINE1line1 insertion
mobile_element_insertionINS:ME:SVAsva insertion
structural alterationsequence alteration
complex_structural_alterationCPX
- - - - \ No newline at end of file diff --git a/3.18/data-sources/gnomad-structural-variants-json/index.html b/3.18/data-sources/gnomad-structural-variants-json/index.html deleted file mode 100644 index 937e556a..00000000 --- a/3.18/data-sources/gnomad-structural-variants-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -gnomad-structural-variants-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

gnomad-structural-variants-json

"gnomAD-preview": [
{
"chromosome": "1",
"begin": 40001,
"end": 47200,
"variantId": "gnomAD-SV_v2.1_DUP_1_1",
"variantType": "duplication",
"failedFilter": true,
"allAf": 0.068963,
"afrAf": 0.135694,
"amrAf": 0.022876,
"easAf": 0.01101,
"eurAf": 0.007846,
"othAf": 0.017544,
"femaleAf": 0.065288,
"maleAf": 0.07255,
"allAc": 943,
"afrAc": 866,
"amrAc": 21,
"easAc": 17,
"eurAc": 37,
"othAc": 2,
"femaleAc": 442,
"maleAc": 499,
"allAn": 13674,
"afrAn": 6382,
"amrAn": 918,
"easAn": 1544,
"eurAn": 4716,
"othAn": 114,
"femaleAn": 6770,
"maleAn": 6878,
"allHc": 91,
"afrHc": 90,
"amrHc": 1,
"easHc": 0,
"eurHc": 0,
"othHc": 55,
"femaleHc": 44,
"maleHc": 47,
"reciprocalOverlap": 0.01839,
"annotationOverlap": 0.16667
}
]

FieldTypeNotes
chromosomestringchromosome number
beginintegerposition interval start
endintegerposition internal end
variantTypestringstructural variant type
variantIdstringgnomAD ID
allAffloating pointallele frequency for all populations. Range: 0 - 1.0
afrAffloating pointallele frequency for the African super population. Range: 0 - 1.0
amrAffloating pointallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
easAffloating pointallele frequency for the East Asian super population. Range: 0 - 1.0
eurAffloating pointallele frequency for the European super population. Range: 0 - 1.0
othAffloating pointallele frequency for all other populations. Range: 0 - 1.0
femaleAffloating pointallele frequency for female population. Range: 0 - 1.0
maleAffloating pointallele frequency for male population. Range: 0 - 1.0
allAcintegerallele count for all populations.
afrAcintegerallele count for the African super population.
amrAcintegerallele count for the Ad Mixed American super population.
easAcintegerallele count for the East Asian super population.
eurAcintegerallele count for the European super population.
othAcintegerallele count for all other populations.
maleAcintegerallele count for male population.
femaleAcintegerallele count for female population.
allAnintegerallele number for all populations.
afrAnintegerallele number for the African super population.
amrAnintegerallele number for the Ad Mixed American super population.
easAnintegerallele number for the East Asian super population.
eurAnintegerallele number for the European super population.
othAnintegerallele number for all other populations.
femaleAnintegerallele number for female population.
maleAnintegerallele number for male population.
allHcintegercount of homozygous individuals for all populations.
afrHcintegercount of homozygous individuals for the African / African American population.
amrHcintegercount of homozygous individuals for the Latino population.
easHcintegercount of homozygous individuals for the East Asian population.
eurAcintegercount of homozygous individuals for the European super population.
othHcintegercount of homozygous individuals for all other populations.
maleHcintegercount of homozygous individuals for male population.
femaleHcintegercount of homozygous individuals for female population.
failedFilterbooleanTrue if this variant failed any filters (Note: we do not list the failed filters)
reciprocalOverlapfloating pointReciprocal overlap. Range: 0 - 1.0
annotationOverlapfloating pointReciprocal overlap. Range: 0 - 1.0

Note: Following fields are not available in GRCh38 because the source file does not contain this information:

Field
femaleAf
maleAf
maleAc
femaleAc
femaleAn
maleAn
allHc
afrHc
amrHc
easHc
eurAc
othHc
maleHc
femaleHc
failedFilter
- - - - \ No newline at end of file diff --git a/3.18/data-sources/gnomad/index.html b/3.18/data-sources/gnomad/index.html deleted file mode 100644 index 0c5e7261..00000000 --- a/3.18/data-sources/gnomad/index.html +++ /dev/null @@ -1,22 +0,0 @@ - - - - - - - -gnomAD | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

gnomAD

Overview

The Genome Aggregation Database (gnomAD) is a resource developed by an international coalition of investigators, with the goal of aggregating and harmonizing both exome and genome sequencing data from a wide variety of large-scale sequencing projects, and making summary data available for the wider scientific community.

Publication

Koch, L., 2020. Exploring human genomic diversity with gnomAD. Nature Reviews Genetics, 21(8), pp.448-448.

Small Variants

VCF extraction

We currently extract the following info fields from gnomAD genome and exome VCF files:

##INFO=<ID=AC,Number=A,Type=Integer,Description="Alternate allele count for samples">
##INFO=<ID=AN,Number=A,Type=Integer,Description="Total number of alleles in samples">
##INFO=<ID=nhomalt,Number=A,Type=Integer,Description="Count of homozygous individuals in samples">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Depth of informative coverage for each sample; reads with MQ=255 or with bad mates are filtered">
##INFO=<ID=lcr,Number=0,Type=Flag,Description="Variant falls within a low complexity region">
##INFO=<ID=AC_afr,Number=A,Type=Integer,Description="Alternate allele count for samples of African-American ancestry">
##INFO=<ID=AN_afr,Number=A,Type=Integer,Description="Total number of alleles in samples of African-American ancestry">
##INFO=<ID=AF_afr,Number=A,Type=Float,Description="Alternate allele frequency in samples of African-American ancestry">
##INFO=<ID=nhomalt_afr,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of African-American ancestry">
##INFO=<ID=AC_amr,Number=A,Type=Integer,Description="Alternate allele count for samples of Latino ancestry">
##INFO=<ID=AN_amr,Number=A,Type=Integer,Description="Total number of alleles in samples of Latino ancestry">
##INFO=<ID=nhomalt_amr,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of Latino ancestry">
##INFO=<ID=AC_eas,Number=A,Type=Integer,Description="Alternate allele count for samples of East Asian ancestry">
##INFO=<ID=AN_eas,Number=A,Type=Integer,Description="Total number of alleles in samples of East Asian ancestry">
##INFO=<ID=nhomalt_eas,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of East Asian ancestry">
##INFO=<ID=AC_female,Number=A,Type=Integer,Description="Alternate allele count for female samples">
##INFO=<ID=AN_female,Number=A,Type=Integer,Description="Total number of alleles in female samples">
##INFO=<ID=nhomalt_female,Number=A,Type=Integer,Description="Count of homozygous individuals in female samples">
##INFO=<ID=AC_nfe,Number=A,Type=Integer,Description="Alternate allele count for samples of non-Finnish European ancestry">
##INFO=<ID=AN_nfe,Number=A,Type=Integer,Description="Total number of alleles in samples of non-Finnish European ancestry">
##INFO=<ID=nhomalt_nfe,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of non-Finnish European ancestry">
##INFO=<ID=AC_fin,Number=A,Type=Integer,Description="Alternate allele count for samples of Finnish ancestry">
##INFO=<ID=AN_fin,Number=A,Type=Integer,Description="Total number of alleles in samples of Finnish ancestry">
##INFO=<ID=nhomalt_fin,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of Finnish ancestry">
##INFO=<ID=AC_asj,Number=A,Type=Integer,Description="Alternate allele count for samples of Ashkenazi Jewish ancestry">
##INFO=<ID=AN_asj,Number=A,Type=Integer,Description="Total number of alleles in samples of Ashkenazi Jewish ancestry">
##INFO=<ID=nhomalt_asj,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of Ashkenazi Jewish ancestry">
##INFO=<ID=AC_oth,Number=A,Type=Integer,Description="Alternate allele count for samples of uncertain ancestry">
##INFO=<ID=AN_oth,Number=A,Type=Integer,Description="Total number of alleles in samples of uncertain ancestry">
##INFO=<ID=nhomalt_oth,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of uncertain ancestry">
##INFO=<ID=AC_male,Number=A,Type=Integer,Description="Alternate allele count for male samples">
##INFO=<ID=AN_male,Number=A,Type=Integer,Description="Total number of alleles in male samples">
##INFO=<ID=nhomalt_male,Number=A,Type=Integer,Description="Count of homozygous individuals in male samples">
##INFO=<ID=controls_AC,Number=A,Type=Integer,Description="Alternate allele count for samples in the controls subset">
##INFO=<ID=controls_AN,Number=A,Type=Integer,Description="Total number of alleles in samples in the controls subset">

We also extract the following extra fields from gnomAD exome VCF file:

##INFO=<ID=AC_sas,Number=A,Type=Integer,Description="Alternate allele count for samples of South Asian ancestry">
##INFO=<ID=AN_sas,Number=A,Type=Integer,Description="Total number of alleles in samples of South Asian ancestry">
##INFO=<ID=nhomalt_sas,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of South Asian ancestry">

Computation

Using these, we compute the following:

  • Coverage
  • Allele count, Homozygous count, allele number and allele frequencies for:
  • Global population
  • African/African Americans
  • Admixed Americans
  • Ashkenazi Jews
  • East Asians
  • Finnish
  • Non-Finnish Europeans
  • South Asian
  • Others (population not assigned)
  • Male
  • Female
  • Controls
Note
  • Coverage = DP / AN. Frequencies are computed using AC/AN for each population.
  • Please note that currently there is no genome sequencing data of south asian (SAS) population available in gnomAD.
  • Allele Count, Homozygous count, allele number and allele frequencies for control groups are also provided for the global population.

Merging genomes and exomes

When merging the genomes and exomes, the allele counts and allele numbers will be summed across both of the data sets.

info
  • For GRCh37, Nirvana currently uses gnomAD version 2.1 which contains both genomes and exomes data. Genomes and exomes data are merged in the output.
  • For GRCh38, Nirvana currently uses gnomAD version 3.0 which doesn't contain the exomes data. Therefore, only genomes data are presented in the output.

Filters

The following strategy will be used when there's a conflict in filter status:

Genomes PASSGenomes Filtered
Exomes PASSPASSOnly use exome data
Exomes FilteredOnly use genome dataFiltered

VCF download instructions

https://gnomad.broadinstitute.org/downloads

JSON output

"gnomad":{ 
"coverage":20,
"allAf":0.190317,
"maleAf":0.193,
"femaleAf": 0.1935,
"afrAf":0.222876,
"amrAf":0.121394,
"easAf":0.239802,
"finAf":0.136833,
"nfeAf":0.181282,
"asjAf":0.258278,
"othAf":0.186094,
"allAn":30796,
"maleAn":15096,
"femaleAn":15700
"afrAn":8664,
"amrAn":832,
"easAn":1618,
"finAn":3486,
"nfeAn":14916,
"asjAn":302,
"othAn":978,
"allAc":5861,
"maleAc":2930,
"femaleAc": 2931,
"afrAc":1931,
"amrAc":101,
"easAc":388,
"finAc":477,
"nfeAc":2704,
"asjAc":78,
"othAc":182,
"allHc":561,
"afrHc":208,
"amrHc":6,
"easHc":42,
"finHc":31,
"nfeHc":242,
"asjHc":13,
"othHc":19,
"maleHc":280,
"femaleHc":281,
"controlsAllAf":0.190317,
"controlsAllAn":30796,
"controlsAllAc":5861,
"lowComplexityRegion":true,
"failedFilter":true
}
FieldTypeNotes
coverageintaverage coverage (non-negative integer values)
allAffloatallele frequency for all populations. Range: 0 - 1.0
maleAffloatallele frequency for male population. Range: 0 - 1.0
femaleAffloatallele frequency for female population. Range: 0 - 1.0
controlsAllAffloatallele frequency for the controls subset. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
maleAcintallele count for male population. Integer.
femaleAcintallele count for female population. Integer.
controlsAllAcintallele count for the controls subset. Integer.
allAnintallele number for all populations. Non-zero integer.
maleAnintallele number for male population. Non-zero integer.
femaleAnintallele number for female population. Non-zero integer.
controlsAllAnintallele number for the controls subset. Non-zero integer.
allHcintcount of homozygous individuals for all populations. Non-negative integer.
maleHcintcount of homozygous individuals for male population. Non-negative integer.
femaleHcintcount of homozygous individuals for female population. Non-negative integer.
afrAffloatallele frequency for the African / African American population. Range: 0 - 1.0
afrAcintallele count for the African / African American population. Integer.
afrAnintallele number for the African / African American population. Non-zero integer.
afrHcintcount of homozygous individuals for African / African American population. Non-negative integer.
amrAffloatallele frequency for the Latino population. Range: 0 - 1.0
amrAcintallele count for the Latino population. Integer.
amrAnintallele number for the Latino population. Non-zero integer.
amrHcintcount of homozygous individuals for Latino population. Non-negative integer.
easAffloatallele frequency for the East Asian population. Range: 0 - 1.0
easAcintallele count for the East Asian population. Integer.
easAnintallele number for the East Asian population. Non-zero integer.
easHcintcount of homozygous individuals for East Asian population. Non-negative integer.
finAffloatallele frequency for the Finnish population. Range: 0 - 1.0
finAcintallele count for the Finnish population. Integer.
finAnintallele number for the Finnish population. Non-zero integer.
finHcintcount of homozygous individuals for Finnish population. Non-negative integer
nfeAffloatallele frequency for the Non-Finnish European population. Range: 0 - 1.0
nfeAcintallele count for the Non-Finnish European population. Integer.
nfeAnintallele number for the Non-Finnish European population. Non-zero integer.
nfeHcintcount of homozygous individuals for Non-Finnish European population. Non-negative integer
othAffloatallele frequency for the Other population. Range: 0 - 1.0
othAcintallele count for the Other population. Integer.
othAnintallele number for the Other population. Non-zero integer.
othHcintcount of homozygous individuals for Other population. Non-negative integer
asjAffloatallele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0
asjAcintallele count for the Ashkenazi Jewish population Integer.
asjAnintallele number for the Ashkenazi Jewish population. Non-zero integer.
asjHcintcount of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer
sasAffloatallele frequency for the South Asian population. Range: 0 - 1.0
sasAcintallele count for the South Asian population Integer.
sasAnintallele number for the South Asian population. Non-zero integer.
sasHcintcount of homozygous individuals for the South Asian population. Non-negative integer.
failedFilterboolTrue if this variant failed any filters (Note: we do not list the failed filters)
lowComplexityRegionboolTrue if this variant is located in a low complexity region.

Building the supplementary files

The gnomAD .nsa for Nirvana can be built using the SAUtils command's gnomad subcommand. We will describe building gnomAD version 3.1 here.

Source data files

Input VCF files (one per chromosome) and a .version file are required in a folder to build the .nsa file. For example, my directory contains:

chr10.vcf.bgz  chr22.vcf.bgz
chr11.vcf.bgz chr2.vcf.bgz
chr12.vcf.bgz chr3.vcf.bgz
chr13.vcf.bgz chr4.vcf.bgz
chr14.vcf.bgz chr5.vcf.bgz
chr15.vcf.bgz chr6.vcf.bgz
chr16.vcf.bgz chr7.vcf.bgz
chr17.vcf.bgz chr8.vcf.bgz
chr18.vcf.bgz chr9.vcf.bgz
chr19.vcf.bgz chrM.vcf.bgz
chr1.vcf.bgz chrX.vcf.bgz
chr20.vcf.bgz chrY.vcf.bgz
chr21.vcf.bgz gnomad.r3.1.version

The version file is a text file with the following content.

NAME=gnomAD
VERSION=3.1
DATE=2020-10-29
DESCRIPTION=Allele frequencies from Genome Aggregation Database (gnomAD)

The help menu for the utility is as follows:

SAUtils.dll gnomad
---------------------------------------------------------------------------
SAUtils (c) 2021 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.17.0
---------------------------------------------------------------------------

USAGE: dotnet SAUtils.dll gnomad [options]
Reads provided supplementary data files and populates tsv files

OPTIONS:
--ref, -r <VALUE> compressed reference sequence file
--genome, -g <VALUE> input directory containing VCF (and .version)
files with genomic frequencies
--exome, -e <VALUE> input directory containing VCF (and .version)
files with exomic frequencies
--temp, -t <VALUE> output temp directory for intermediate (per chrom)
NSA files
--out, -o <VALUE> output directory for NSA file
--help, -h displays the help menu
--version, -v displays the version

Here is a sample execution:

dotnet ~/Nirvana/bin/Debug/netcoreapp3.1/SAUtils.dll Gnomad \\
--ref ~/References/7/Homo_sapiens.GRCh38.Nirvana.dat --genome genomes/ \\
--out ~/SupplementaryDatabase/63/GRCh38 --temp ~/ExternalDataSources/gnomAD/3.1/GRCh38/temp

LoF Gene Metrics

Tab delimited file example

gene transcript obs_mis exp_mis oe_mis mu_mis possible_mis obs_mis_pphen exp_mis_pphen oe_mis_pphen possible_mis_pphen obs_syn exp_syn oe_syn mu_syn possible_syn obs_lof mu_lof possible_lof exp_lof pLI pNull pRec oe_lof oe_syn_lower oe_syn_upper oe_mis_lower oe_mis_upper oe_lof_lower oe_lof_upper constraint_flag syn_zmis_z lof_z oe_lof_upper_rank oe_lof_upper_bin oe_lof_upper_bin_6 n_sites classic_caf max_af no_lofs obs_het_lof obs_hom_lof defined p exp_hom_lof classic_caf_afr classic_caf_amr classic_caf_asj classic_caf_eas classic_caf_fin classic_caf_nfe classic_caf_oth classic_caf_sas p_afr p_amr p_asj p_eas p_fin p_nfep_oth p_sas transcript_type gene_id transcript_level cds_length num_coding_exons gene_type gene_length exac_pLI exac_obs_lof exac_exp_lof exac_oe_lof brain_expression chromosome start_positionend_position
MED13 ENST00000397786 871 1.1178e+03 7.7921e-01 5.5598e-05 14195 314 5.2975e+02 5.9273e-01 6708 422 3.8753e+02 1.0890e+00 1.9097e-05 4248 0 4.9203e-06 1257 9.8429e+01 1.0000e+00 8.9436e-40 1.8383e-16 0.0000e+00 1.0050e+00 1.1800e+00 7.3600e-01 8.2400e-01 0.0000e+00 3.0000e-02 -1.3765e+00 2.6232e+00 9.1935e+00 0 0 0 2 1.2058e-05 8.0492e-06 124782 3 0 124785 1.2021e-05 1.8031e-05 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 9.2812e-05 8.8571e-06 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 9.2760e-05 8.8276e-06 0.0000e+00 0.0000e+00 protein_coding ENSG00000108510 2 6522 30 protein_coding 122678 1.0000e+00 0 6.4393e+01 0.0000e+00 NA 17 60019966 60142643

JSON key to TSV column mapping

JSON keyTSV columnDescription
pLipLIprobability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)
pNullpNullprobability of being completely tolerant of loss of function variation (observed = expected)
pRecpRecprobability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)
synZsyn_zcorrected synonymous Z score
misZmis_zcorrected missense Z score
loeufoe_lof_upperloss of function observed/expected upper bound fraction (LOEUF)

Gene symbol update

The input file provides Ensembl gene ids for each entry. We observed that they were unique while gene symbols may be repeated (multiple lines may have the same gene symbol). Since Ensembl gene Ids are more stable, and Nirvana transcript cache data contains Ensembl gene ids, we use these ids to extract the gene symbols from the transcript cache. For example, if ENSG0001 has gene symbol GENE1 in the input but Nirvana cache say ENSG0001 maps to GENE2, we use GENE2 as the gene symbol for that entry.

Conflict resolution

gnomAD uses Ensembl GeneID as unique identifiers in the source file but Nirvana uses HGNC gene symbols. Multiple Ensembl GeneIDs can map to the same HGNC symbol and therefore may result is conflict.

MDGA2   ENST00000426342 306 4.0043e+02  7.6419e-01  2.1096e-05  4724    78  1.6525e+02  4.7202e-01  1923    125 1.3737e+02  9.0993e-01  7.1973e-06  1413    4   2.0926e-06  453 3.8316e+01  9.9922e-01  8.6490e-12  7.8128e-04  1.0440e-01  7.8600e-01  1.0560e+00  6.9500e-01  8.4000e-01  5.0000e-02  2.3900e-01      8.2988e-01  1.6769e+00  5.1372e+00  1529    0   0   7   2.8103e-05  4.0317e-06  124784  7   0   124791  2.8047e-05  9.8167e-05  0.0000e+00  2.8962e-05  0.0000e+00  0.0000e+00  0.0000e+00  3.5391e-05  1.6672e-04  3.2680e-05  0.0000e+00  2.8962e-05  0.0000e+00  0.0000e+00  0.0000e+00  3.5308e-05  1.6492e-04  3.2678e-05  protein_coding  ENSG00000139915 2   2181    13  protein_coding  835332  9.9322e-01  3   2.7833e+01  1.0779e-01  NA  14  47308826    48144157
MDGA2 ENST00000439988 438 5.5311e+02 7.9189e-01 2.9490e-05 6608 105 2.0496e+02 5.1228e-01 2386 180 1.9491e+02 9.2351e-01 9.8371e-06 2048 11 2.8074e-06 627 5.1882e+01 6.6457e-01 5.5841e-10 3.3543e-01 2.1202e-01 8.1700e-01 1.0450e+00 7.3100e-01 8.5700e-01 1.3200e-01 3.5100e-01 8.3940e-01 1.7393e+00 5.2595e+00 2989 1 0 9 3.6173e-05 4.0463e-06 124782 9 0 124791 3.6061e-05 1.6228e-04 6.4986e-05 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 4.4275e-05 1.6672e-04 3.2680e-05 6.4577e-05 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 4.4135e-05 1.6492e-04 3.2678e-05 protein_coding ENSG00000272781 3 3075 17 protein_coding 832866 NA NA NA NA NA 14 47311134 48143999

In such cases, Nirvana chooses the entry with the smallest "LOEUF" value. The reason for choosing this value can be highlighted by the following table:

LOEUF decileHaplo-insufficientAutosomal DominantAutosomal RecessiveOlfactory Genes
0-10%104140360
10-20%47128721
20-30%17861120
30-40%8801734
40-50%7652068
50-60%4542076
60-70%04615418
70-80%24912049
80-90%0345896
90-100%02640174
Note

List of genes with conflicting entries

MDGA2:
{"pLI":9.99e-1,"pRec":7.81e-4,"pNull":8.65e-12,"synZ":8.30e-1,"misZ":1.68e0,"loeuf":2.39e-1}
{"pLI":6.65e-1,"pRec":3.35e-1,"pNull":5.58e-10,"synZ":8.39e-1,"misZ":1.74e0,"loeuf":3.51e-1}
CRYBG3:
{"pLI":9.27e-5,"pRec":1.00e0,"pNull":1.88e-7,"synZ":1.82e0,"misZ":4.68e-1,"loeuf":4.93e-1}
{"pLI":2.69e-4,"pRec":1.00e0,"pNull":1.20e-4,"synZ":2.63e0,"misZ":9.80e-1,"loeuf":5.98e-1}
CHTF8:
{"pLI":8.29e-1,"pRec":1.67e-1,"pNull":3.21e-3,"synZ":1.94e0,"misZ":9.48e-1,"loeuf":5.13e-1}
{"pLI":3.73e-1,"pRec":5.84e-1,"pNull":4.29e-2,"synZ":3.33e-1,"misZ":2.91e-1,"loeuf":9.92e-1}
SEPT1:
{"pLI":6.77e-8,"pRec":8.90e-1,"pNull":1.10e-1,"synZ":1.58e-1,"misZ":1.57e0,"loeuf":9.68e-1}
{"pLI":1.96e-8,"pRec":6.71e-1,"pNull":3.29e-1,"synZ":1.68e-1,"misZ":1.41e0,"loeuf":1.08e0}
ARL14EPL:
{"pLI":3.48e-2,"pRec":8.38e-1,"pNull":1.28e-1,"synZ":3.56e-1,"misZ":-1.87e-1,"loeuf":1.23e0}
{"pLI":3.23e-2,"pRec":8.29e-1,"pNull":1.38e-1,"synZ":1.15e0,"misZ":-4.05e-1,"loeuf":1.26e0}
UGT2A1:
{"pLI":2.90e-13,"pRec":1.40e-1,"pNull":8.60e-1,"synZ":-1.29e0,"misZ":-1.77e0,"loeuf":1.18e0}
{"pLI":3.88e-17,"pRec":2.87e-3,"pNull":9.97e-1,"synZ":-8.00e-1,"misZ":-1.40e0,"loeuf":1.53e0}
LTB4R2:
{"pLI":4.39e-4,"pRec":6.71e-1,"pNull":3.29e-1,"synZ":-5.24e-1,"misZ":-2.96e-1,"loeuf":1.40e0}
{"pLI":1.38e-5,"pRec":4.12e-1,"pNull":5.88e-1,"synZ":-4.58e-1,"misZ":-2.02e-1,"loeuf":1.54e0}
CDRT1:
{"pLI":4.98e-14,"pRec":5.31e-1,"pNull":4.69e-1,"synZ":8.18e-1,"misZ":6.57e-1,"loeuf":1.00e0}
{"pLI":3.50e-3,"pRec":6.37e-1,"pNull":3.59e-1,"synZ":4.89e-1,"misZ":6.90e-1,"loeuf":1.63e0}
MUC3A:
{"pLI":1.48e-10,"pRec":5.76e-1,"pNull":4.24e-1,"synZ":5.81e-2,"misZ":-6.01e-1,"loeuf":1.06e0}
{"pLI":4.03e-1,"pRec":4.79e-1,"pNull":1.17e-1,"synZ":4.05e-2,"misZ":-1.60e-1,"loeuf":1.70e0}
COG8:
{"pLI":2.97e-9,"pRec":5.04e-1,"pNull":4.96e-1,"synZ":-1.35e0,"misZ":-9.37e-2,"loeuf":1.13e0}
{"pLI":2.31e-3,"pRec":5.47e-1,"pNull":4.50e-1,"synZ":-4.94e-1,"misZ":-1.48e-1,"loeuf":1.76e0}
AC006486.1:
{"pLI":9.37e-1,"pRec":6.27e-2,"pNull":2.47e-4,"synZ":1.44e0,"misZ":2.12e0,"loeuf":3.41e-1}
{"pLI":1.14e-1,"pRec":6.16e-1,"pNull":2.70e-1,"synZ":-7.57e-2,"misZ":8.33e-2,"loeuf":1.84e0}
AL645922.1:
{"pLI":4.67e-16,"pRec":1.00e0,"pNull":4.15e-5,"synZ":7.99e-1,"misZ":1.61e0,"loeuf":6.92e-1}
{"pLI":1.60e-3,"pRec":2.78e-1,"pNull":7.21e-1,"synZ":-7.30e-2,"misZ":3.21e-1,"loeuf":1.96e0}
NBPF20:
{"pLI":1.42e-7,"pRec":3.40e-2,"pNull":9.66e-1,"synZ":-1.86e0,"misZ":-2.88e0,"loeuf":1.97e0}
{"pLI":1.92e-22,"pRec":7.96e-6,"pNull":1.00e0,"synZ":-9.73e0,"misZ":-7.67e0,"loeuf":1.97e0}
PRAMEF11:
{"pLI":6.16e-4,"pRec":7.42e-1,"pNull":2.58e-1,"synZ":-4.02e0,"misZ":-3.69e0,"loeuf":1.31e0}
{"synZ":-3.33e0,"misZ":-2.59e0}
FAM231D:
{"synZ":-1.98e0,"misZ":-1.44e0}
{"synZ":1.07e0,"misZ":3.13e-1}

Conflict resolution

  • Pick the entry with the lowest LOEUF score
  • If the same, pick the lowest pLI
  • Otherwise pick the entry with the max absolute value of synZ + misZ

Download URL

https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz

JSON output

"gnomAD":{ 
"pLi":1.00e0,
"pNull":8.94e-40,
"pRec":1.84e-16,
"synZ":-8.44e-2,
"misZ":5.96e-1,
"loeuf":1.13e0
}
FieldTypeNotes
pLifloatprobability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)
pNullfloatprobability of being completely tolerant of loss of function variation (observed = expected)
pRecfloatprobability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)
synZfloatcorrected synonymous Z score
misZfloatcorrected missense Z score
loeuffloatloss of function observed/expected upper bound fraction (LOEUF)

Structural Variants

Publication

Collins, R.L., Brand, H., Karczewski, K.J. et al. 2020. A structural variation reference for medical and population genetics. Nature 581, pp.444–451. https://doi.org/10.1038/s41586-020-2287-8

Note -The gnomAD structural variant annotations are in a preview stage at the moment. -Currently, the annotations do not include translocation breakends. -Future updates will include a better way of annotating the structural variants.

Source Files

Bed Example

The bed file was obtained from original source for GRCh37

#chrom  start   end name    svtype  ALGORITHMS  BOTHSIDES_SUPPORT   CHR2    CPX_INTERVALS   CPX_TYPE    END2    ENDEVIDENCE HIGH_SR_BACKGROUND  PCRPLUS_DEPLETED    PESR_GT_OVERDISPERSION  POS2    PROTEIN_CODING__COPY_GAIN   PROTEIN_CODING__DUP_LOF PROTEIN_CODING__DUP_PARTIAL PROTEIN_CODING__INTERGENIC  PROTEIN_CODING__INTRONIC    PROTEIN_CODING__INV_SPAN    PROTEIN_CODING__LOF PROTEIN_CODING__MSV_EXON_OVR    PROTEIN_CODING__NEAREST_TSS PROTEIN_CODING__PROMOTER    PROTEIN_CODING__UTR SOURCE  STRANDS SVLEN   SVTYPE  UNRESOLVED_TYPE UNSTABLE_AF_PCRPLUS VARIABLE_ACROSS_BATCHES AN  AC  AF  N_BI_GENOS  N_HOMREF    N_HET   N_HOMALT    FREQ_HOMREF FREQ_HET    FREQ_HOMALT MALE_AN MALE_AC MALE_AF MALE_N_BI_GENOS MALE_N_HOMREF   MALE_N_HET  MALE_N_HOMALT   MALE_FREQ_HOMREF    MALE_FREQ_HET   MALE_FREQ_HOMALT    MALE_N_HEMIREF  MALE_N_HEMIALT  MALE_FREQ_HEMIREF   MALE_FREQ_HEMIALT   PAR FEMALE_AN   FEMALE_AC   FEMALE_AF   FEMALE_N_BI_GENOS   FEMALE_N_HOMREF FEMALE_N_HET    FEMALE_N_HOMALT FEMALE_FREQ_HOMREF  FEMALE_FREQ_HET FEMALE_FREQ_HOMALT  POPMAX_AF   AFR_AN  AFR_AC  AFR_AF  AFR_N_BI_GENOS  AFR_N_HOMREF    AFR_N_HET   AFR_N_HOMALT    AFR_FREQ_HOMREF AFR_FREQ_HEAFR_FREQ_HOMALT  AFR_MALE_AN AFR_MALE_AC AFR_MALE_AF AFR_MALE_N_BI_GENOS AFR_MALE_N_HOMREF   AFR_MALE_N_HET  AFR_MALE_N_HOMALT   AFR_MALE_FREQ_HOMREF    AFR_MALE_FREQ_HET   AFR_MALE_FREQ_HOMALT    AFR_MALE_N_HEMIREF  AFR_MALE_N_HEMIALT  AFR_MALE_FREQ_HEMIREF   AFR_MALE_FREQ_HEMIALT   AFR_FEMALE_AN   AFR_FEMALE_AC   AFR_FEMALE_AF   AFR_FEMALE_N_BI_GENOS   AFR_FEMALE_N_HOMREF AFR_FEMALE_N_HET    AFR_FEMALE_N_HOMALT AFR_FEMALE_FREQ_HOMREF  AFR_FEMALE_FREQ_HET AFR_FEMALE_FREQ_HOMALT  AMR_AN  AMR_AC  AMR_AF  AMR_N_BI_GENOS  AMR_N_HOMREF    AMR_N_HET   AMR_N_HOMALT    AMR_FREQ_HOMREF AMR_FREQ_HET    AMR_FREQ_HOMALT AMR_MALE_AN AMR_MALE_AC AMR_MALE_AF AMR_MALE_N_BI_GENOS AMR_MALE_N_HOMREF   AMR_MALE_N_HET  AMR_MALE_N_HOMALT   AMR_MALE_FREQ_HOMREF    AMR_MALE_FREQ_HET   AMR_MALE_FREQ_HOMALT    AMR_MALE_N_HEMIREF  AMR_MALE_N_HEMIALT  AMR_MALE_FREQ_HEMIREF   AMR_MALE_FREQ_HEMIALT   AMR_FEMALE_AN   AMR_FEMALE_AC   AMR_FEMALE_AF   AMR_FEMALE_N_BI_GENOS   AMR_FEMALE_N_HOMREF AMR_FEMALE_N_HET    AMR_FEMALE_N_HOMALT AMR_FEMALE_FREQ_HOMREF  AMR_FEMALE_FREQ_HET AMR_FEMALE_FREQ_HOMALT  EAS_AN  EAS_AC  EAS_AF  EAS_N_BI_GENOS  EAS_N_HOMREF    EAS_N_HET   EAS_N_HOMALT    EAS_FREQ_HOMREF EAS_FREQ_HET    EAS_FREQ_HOMALT EAS_MALE_AN EAS_MALE_AC EAS_MALE_AF EAS_MALE_N_BI_GENOS EAS_MALE_N_HOMREF   EAS_MALE_N_HET  EAS_MALE_N_HOMALT   EAS_MALE_FREQ_HOMREF    EAS_MALE_FREQ_HET   EAS_MALE_FREQ_HOMALT    EAS_MALE_N_HEMIREF  EAS_MALE_N_HEMIALT  EAS_MALE_FREQ_HEMIREF   EAS_MALE_FREQ_HEMIALT   EAS_FEMALE_AN   EAS_FEMALE_AC   EAS_FEMALE_AF   EAS_FEMALE_N_BI_GENOS   EAS_FEMALE_N_HOMREF EAS_FEMALE_N_HET    EAS_FEMALE_N_HOMALT EAS_FEMALE_FREQ_HOMREF  EAS_FEMALE_FREQ_HET EAS_FEMALE_FREQ_HOMALT  EUR_AN  EUR_AC  EUR_AF  EUR_N_BI_GENOS  EUR_N_HOMREF    EUR_N_HET   EUR_N_HOMALT    EUR_FREQ_HOMREF EUR_FREQ_HET    EUR_FREQ_HOMALT EUR_MALE_AN EUR_MALE_AC EUR_MALE_AF EUR_MALE_N_BI_GENOS EUR_MALE_N_HOMREF   EUR_MALE_N_HET  EUR_MALE_N_HOMALT   EUR_MALE_FREQ_HOMREF    EUR_MALE_FREQ_HET   EUR_MALE_FREQ_HOMALT    EUR_MALE_N_HEMIREF  EUR_MALE_N_HEMIALT  EUR_MALE_FREQ_HEMIREF   EUR_MALE_FREQ_HEMIALT   EUR_FEMALE_AN   EUR_FEMALE_AC   EUR_FEMALE_AF   EUR_FEMALE_N_BI_GENOS   EUR_FEMALE_N_HOMREF EUR_FEMALE_N_HET    EUR_FEMALE_N_HOMALT EUR_FEMALE_FREQ_HOMREF  EUR_FEMALE_FREQ_HET EUR_FEMALE_FREQ_HOMALT  OTH_AN  OTH_AC  OTH_AF  OTH_N_BI_GENOS  OTH_N_HOMREF    OTH_N_HET   OTH_N_HOMALT    OTH_FREQ_HOMREF OTH_FREQ_HET    OTH_FREQ_HOMALT OTH_MALE_AN OTH_MALE_AC OTH_MALE_AF OTH_MALE_N_BI_GENOS OTH_MALE_N_HOMREF   OTH_MALE_N_HET  OTH_MALE_N_HOMALT   OTH_MALE_FREQ_HOMREF    OTH_MALE_FREQ_HET   OTH_MALE_FREQ_HOMALT    OTH_MALE_N_HEMIREF  OTH_MALE_N_HEMIALT  OTH_MALE_FREQ_HEMIREF   OTH_MALE_FREQ_HEMIALT   OTH_FEMALE_AN   OTH_FEMALE_AC   OTH_FEMALE_AF   OTH_FEMALE_N_BI_GENOS   OTH_FEMALE_N_HOMREF OTH_FEMALE_N_HET    OTH_FEMALE_N_HOMALT OTH_FEMALE_FREQ_HOMREF  OTH_FEMALE_FREQ_HET OTH_FEMALE_FREQ_HOMALT  FILTER
1 10641 10642 gnomAD-SV_v2.1_BND_1_1 BND manta False 15 NA NA 10643 10643 PE,SR False False True 10642 NA NA NA False NA NA NA NA NA NA NA NA NA -1 BND SINGLE_ENDER_-- False False 21366 145 0.006785999983549118 10683 10543 135 5 0.9868950247764587 0.012636899948120117 0.00046803298755548894 10866 69 0.00634999992325902 5433 5366 65 2 0.987667977809906 0.011963900178670883 0.000368120992789045 NA NA NA NA False 10454 76 0.007269999943673615227 5154 70 3 0.9860339760780334 0.013392000459134579 0.0005739430198445916 0.015956999734044075 93972 0.007660999894142151 4699 4629 68 2 0.9851030111312866 0.014471200294792652 0.0004256220126990229 5154 33 0.006403000093996525 2577 2544 33 0 0.9871940016746521 0.012805599719285965 0.0NA NA NA NA 4232 39 0.009216000325977802 2116 2079 35 2 0.9825140237808228 0.01654059998691082 0.0009451800142414868 1910 7 0.003664999967440963 955 949 5 1 0.9937170147895813 0.00523559981957078 0.001047119963914156 950 4 0.004211000166833401 475 472 2 1 0.9936839938163757 0.00421052984893322 0.0021052600350230932 NA NA NA NA 952 3 0.0031510000117123127 476473 3 0 0.9936969876289368 0.006302520167082548 0.0 2296 31 0.013501999899744987 1148 11131 0 0.9729970097541809 0.02700350061058998 0.0 1312 13 0.009909000247716904 656 643 13 0.9801830053329468 0.01981710083782673 0.0 NA NA NA NA 976 18 0.018442999571561813 488470 18 0 0.9631149768829346 0.03688519820570946 0.0 7574 32 0.004224999807775021 3787 37528 2 0.9920780062675476 0.007393720094114542 0.0005281229969114065 3374 17 0.005038999952375889 1681671 15 1 0.9905160069465637 0.008891520090401173 0.000592768017668277 NA NA NA NA 41815 0.003587000072002411 2091 2077 13 1 0.9933050274848938 0.006217120215296745 0.00047823999193497188 3 0.015956999734044075 94 91 3 0 0.968084990978241 0.03191490098834038 0.0 76 0.026316000148653984 38 36 2 0 0.9473680257797241 0.05263160169124603 0.0 NA NA NA NA 112 1 0.008929000236093998 56 55 1 0 0.982142984867096 0.017857100814580917 0.0UNRESOLVED

TSV Example

The tsv was obtained from lifted over dataset created by dbVar for GRCh38

#variant_call_accession variant_call_id variant_call_type   experiment_id   sample_id   sampleset_id    assembly    chrcontig   outer_start start   inner_start inner_stop  stop    outer_stop  insertion_length    variant_region_acc  variant_region_id   copy_number description validation  zygosity    origin  phenotype   hgvs_name   placement_method    placement_rank  placements_per_assembly remap_alignment remap_best_within_cluster   remap_coverage  remap_diff_chr  remap_failure_code  allele_count    allele_frequency    allele_number
nssv15777856 gnomAD-SV_v2.1_CNV_10_564_alt_1 copy number variation 1 1 GRCh38.p12 10 736806 738184 nsv4039284 10__782746___784124______GRCh37.p13_copy_number_variation 0 Remapped BestAvailable Single First Pass 0 1 AC=21,AFR_AC=10,AMR_AC=9,EAS_AC=0,EUR_AC=2,OTH_AC=0AF=0.038889,AFR_AF=0.044643,AMR_AF=0.03913,EAS_AF=0,EUR_AF=0.023256,OTH_AF=0 AN=540,AFR_AN=224,AMR_AN=230,EAS_AN=0,EUR_AN=86,OTH_AN=0

Structural Variant Type Mapping

The source files represented the structural variants with keys using various naming conventions. -In the Nirvana JSON output, these keys will be mapped according to the following.

Nirvana JSON SV Type KeyGRCh37 Source SV Type KeyGRCh38 Source SV Type Key
copy_number_variationcopy number variation
deletionDEL, CN=0deletion
duplicationDUPduplication
insertionINSinsertion
inversionINVinversion
mobile_element_insertionINS:MEmobile element insertion
mobile_element_insertionINS:ME:ALUalu insertion
mobile_element_insertionINS:ME:LINE1line1 insertion
mobile_element_insertionINS:ME:SVAsva insertion
structural alterationsequence alteration
complex_structural_alterationCPX

Download URLs

GRCh37

The GRCh37 file was downloaded from the original source. Following table gives some essential data metrics:

https://storage.googleapis.com/gcp-public-data--gnomad/papers/2019-sv/gnomad_v2.1_sv.sites.bed.gz

GRCh38

Note: The data was unavailable from gnomAD 2.1 original source, however the lifted over structural variant dataset was created by dbVar and was obtained from them https://www.ncbi.nlm.nih.gov/sites/dbvarapp/studies/nstd166/.

Download URL

https://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/tsv/nstd166.GRCh38.variant_call.tsv.gz

JSON output

"gnomAD-preview": [
{
"chromosome": "1",
"begin": 40001,
"end": 47200,
"variantId": "gnomAD-SV_v2.1_DUP_1_1",
"variantType": "duplication",
"failedFilter": true,
"allAf": 0.068963,
"afrAf": 0.135694,
"amrAf": 0.022876,
"easAf": 0.01101,
"eurAf": 0.007846,
"othAf": 0.017544,
"femaleAf": 0.065288,
"maleAf": 0.07255,
"allAc": 943,
"afrAc": 866,
"amrAc": 21,
"easAc": 17,
"eurAc": 37,
"othAc": 2,
"femaleAc": 442,
"maleAc": 499,
"allAn": 13674,
"afrAn": 6382,
"amrAn": 918,
"easAn": 1544,
"eurAn": 4716,
"othAn": 114,
"femaleAn": 6770,
"maleAn": 6878,
"allHc": 91,
"afrHc": 90,
"amrHc": 1,
"easHc": 0,
"eurHc": 0,
"othHc": 55,
"femaleHc": 44,
"maleHc": 47,
"reciprocalOverlap": 0.01839,
"annotationOverlap": 0.16667
}
]

FieldTypeNotes
chromosomestringchromosome number
beginintegerposition interval start
endintegerposition internal end
variantTypestringstructural variant type
variantIdstringgnomAD ID
allAffloating pointallele frequency for all populations. Range: 0 - 1.0
afrAffloating pointallele frequency for the African super population. Range: 0 - 1.0
amrAffloating pointallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
easAffloating pointallele frequency for the East Asian super population. Range: 0 - 1.0
eurAffloating pointallele frequency for the European super population. Range: 0 - 1.0
othAffloating pointallele frequency for all other populations. Range: 0 - 1.0
femaleAffloating pointallele frequency for female population. Range: 0 - 1.0
maleAffloating pointallele frequency for male population. Range: 0 - 1.0
allAcintegerallele count for all populations.
afrAcintegerallele count for the African super population.
amrAcintegerallele count for the Ad Mixed American super population.
easAcintegerallele count for the East Asian super population.
eurAcintegerallele count for the European super population.
othAcintegerallele count for all other populations.
maleAcintegerallele count for male population.
femaleAcintegerallele count for female population.
allAnintegerallele number for all populations.
afrAnintegerallele number for the African super population.
amrAnintegerallele number for the Ad Mixed American super population.
easAnintegerallele number for the East Asian super population.
eurAnintegerallele number for the European super population.
othAnintegerallele number for all other populations.
femaleAnintegerallele number for female population.
maleAnintegerallele number for male population.
allHcintegercount of homozygous individuals for all populations.
afrHcintegercount of homozygous individuals for the African / African American population.
amrHcintegercount of homozygous individuals for the Latino population.
easHcintegercount of homozygous individuals for the East Asian population.
eurAcintegercount of homozygous individuals for the European super population.
othHcintegercount of homozygous individuals for all other populations.
maleHcintegercount of homozygous individuals for male population.
femaleHcintegercount of homozygous individuals for female population.
failedFilterbooleanTrue if this variant failed any filters (Note: we do not list the failed filters)
reciprocalOverlapfloating pointReciprocal overlap. Range: 0 - 1.0
annotationOverlapfloating pointReciprocal overlap. Range: 0 - 1.0

Note: Following fields are not available in GRCh38 because the source file does not contain this information:

Field
femaleAf
maleAf
maleAc
femaleAc
femaleAn
maleAn
allHc
afrHc
amrHc
easHc
eurAc
othHc
maleHc
femaleHc
failedFilter
- - - - \ No newline at end of file diff --git a/3.18/data-sources/mito-heteroplasmy/index.html b/3.18/data-sources/mito-heteroplasmy/index.html deleted file mode 100644 index 9d4f83f0..00000000 --- a/3.18/data-sources/mito-heteroplasmy/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Mitochondrial Heteroplasmy | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

Mitochondrial Heteroplasmy

Overview

Mitochondrial Heteroplasmy is an aggregate population data set that characterizes the amount of heteroplasmy observed for each variant. The latest version of this data set is based on re-processed 1000 Genomes Project data using the Illumina DRAGEN pipeline.

JSON File

Example

{
"T:C":{
"ad":[
1,
1,
1,
1,
1,
1
],
"allele_type":"alt",
"vrf":[
0.002369668246445498,
0.0024937655860349127,
0.0016129032258064516,
0.0025188916876574307,
0.0022935779816513763,
0.002008032128514056
],
"vrf_stats":{
"kurtosis":38.889891511122556,
"max":0.0025188916876574307,
"mean":5.4052190471990743e-05,
"min":0.0,
"nobs":246,
"skewness":6.346664692283075,
"stdev":0.0003461416264750575,
"variance":1.1981402557879823e-07
}
}
}

Parsing

From the JSON file, we're mainly interested in the following keys:

  • variant (i.e. T:C)
  • ad
  • vrf
  • nobs (number of observations)
Adjusting for null observations

The nobs value indicates how many observations were made. Ideally this would have been represented in the ad and vrf arrays, but it's left as an exercise for the reader.

Binning VRF Data

The vrf (variant read frequency) array in the JSON object above is paired with with the ad array (allele depths) shown above.

The data in the JSON object has a crazy number of significant digits. This means that as the number of samples increase, this array will grow. To make this more future-proof, Nirvana bins everything according to 0.1% increments.

With the binned data, we end up having 775 distinct vrf values in the entire JSON file. This also means that the variant with the largest number of VRFs would originally have 246 entries, but due to binning this will decrease to 143.

Pre-processing the Data

The JSON file is converted into a small TSV file that is embedded in Nirvana. Here is an example of the TSV file:

#CHROM  POS REF ALT VRF_BINS    VRF_COUNTS
chrM 1 G . 0.981,0.987,0.988,0.989,0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.998,0.999 1,2,2,4,7,8,11,19,43,60,48,64,499,1736
chrM 2 A . 0.981,0.987,0.988,0.989,0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.998,0.999 1,2,2,4,7,8,11,19,43,60,48,64,499,1736

Algorithm

Nirvana will calculate mitochondrial heteroplasmy data for every sample in the VCF. Using the computed VRF for each sample, we compute where in the empirical mitochondrial heteroplasmy distribution that VRF occurs and express that as a percentile.

Percentiles

Nirvana uses the statistical definition of percentile (indicating the value below which a given percentage of observations in a group of observations falls). Unless the sample's VRF is higher than all the VRFs represented in the distribution, the range will be [0, 1).

Download URL

Unavailable

The original data set is only available internally at Illumina at the moment.

JSON Output

"samples":[
{
"genotype":"0/1",
"variantFrequencies":[
0.333,
0.5
],
],
"alleleDepths":[
10,
20,
30
],
"heteroplasmyPercentile":[
23.13,
12.65
]
}
]
FieldTypeNotes
heteroplasmyPercentilefloat arrayone percentile for each variant frequency (each alternate allele)
- - - - \ No newline at end of file diff --git a/3.18/data-sources/mitomap-small-variants-json/index.html b/3.18/data-sources/mitomap-small-variants-json/index.html deleted file mode 100644 index 7454faf6..00000000 --- a/3.18/data-sources/mitomap-small-variants-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -mitomap-small-variants-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

mitomap-small-variants-json

"mitomap":[ 
{
"refAllele":"G",
"altAllele":"A",
"diseases":[
"Bipolar disorder",
"Melanoma"
],
"hasHomoplasmy":false,
"hasHeteroplasmy":true,
"status":"Reported",
"clinicalSignificance":"confirmed pathogenic",
"scorePercentile":83.30,
"numGenBankFullLengthSeqs":2,
"pubMedIds":["2316527","6299878","6301949"],
"isAlleleSpecific":true
}
]
FieldTypeNotes
refAllelestring
altAllelestring
diseasesstring arrayassociated diseases
hasHomoplasmyboolean
hasHeteroplasmyboolean
statusstringrecord status
clinicalSignificancestringpredicted pathogenicity
scorePercentilefloatMitoTIP score
numGenBankFullLengthSeqsinteger# of GenBank full-length sequences
pubMedIdsstring array
isAlleleSpecificbooleantrue when the current variant alternate allele matches the MITOMAP alternate allele
- - - - \ No newline at end of file diff --git a/3.18/data-sources/mitomap-structural-variants-json/index.html b/3.18/data-sources/mitomap-structural-variants-json/index.html deleted file mode 100644 index d888195a..00000000 --- a/3.18/data-sources/mitomap-structural-variants-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -mitomap-structural-variants-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

mitomap-structural-variants-json

"mitomap":[ 
{
"chromosome":"MT",
"begin":3166,
"end":14152,
"variantType":"deletion",
"reciprocalOverlap":0.18068,
"annotationOverlap":0.42405
}
]
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring array
reciprocalOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
annotationOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
- - - - \ No newline at end of file diff --git a/3.18/data-sources/mitomap/index.html b/3.18/data-sources/mitomap/index.html deleted file mode 100644 index 9593a642..00000000 --- a/3.18/data-sources/mitomap/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -MITOMAP | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

MITOMAP

Overview

MITOMAP provides a compendium of polymorphisms and mutations in human mitochondrial DNA.

Publication

Lott, M.T., Leipzig, J.N., Derbeneva, O., Xie, H.M., Chalkia, D., Sarmady, M., Procaccio, V., and Wallace, D.C. mtDNA variation and analysis using MITOMAP and MITOMASTER. Current Protocols in Bioinformatics 1(123):1.23.1-26 (2013). http://www.mitomap.org

Scraping HTML Pages

Example

MITOMAP is unique in that it doesn't offer the data in a downloadable format. As a result, the annotation content in Nirvana is scraped from the following MITOMAP pages:

  1. mtDNA Control Region Sequence Variants
  2. mtDNA Coding Region & RNA Sequence Variants
  3. Reported Mitochondrial DNA Base Substitution Diseases: rRNA/tRNA mutations
  4. Reported Mitochondrial DNA Base Substitution Diseases: Coding and Control Region Point Mutations
  5. Reported mtDNA Deletions
  6. mtDNA Simple Insertions

Parsing

Here's what the HTML code looks like:

["582","<a href='/MITOMAP/GenomeLoci#MTTF'>MT-TF</a>","Mitochondrial myopathy","T582C","tRNA Phe","-","+","Reported","<span style='display:inline-block;white-space:nowrap;'><a href='/cgi-bin/mitotip?pos=582&alt=C&quart=2'><u>72.90%</u></a> <i class='fa fa-arrow-up' style='color:orange' aria-hidden='true'></i></span>","0","<a href='/cgi-bin/print_ref_list?refs=90165,91590&title=RNA+Mutation+T582C' target='_blank'>2</a>"],
["583","<a href='/MITOMAP/GenomeLoci#MTTF'>MT-TF</a>","MELAS / MM & EXIT","G583A","tRNA Phe","-","+","Cfrm","<span style='display:inline-block;white-space:nowrap;'><a href='/cgi-bin/mitotip?pos=583&alt=A&quart=0'><u>93.10%</u></a> <i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i><i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i><i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i></span>","0","<a href='/cgi-bin/print_ref_list?refs=2066,90532,91590&title=RNA+Mutation+G583A' target='_blank'>3</a>"],

We're mainly interested in the following columns (numbers indicate the HTML page above):

  • Position1,2,3,4
  • Disease3,4
  • Nucleotide Change1,2
  • Allele3,4
  • Homoplasmy3,4
  • Heteroplasmy3,4
  • Status3,4
  • MitoTIP3,4
  • GB Seqs FL(CR)1,2,3,4
  • Deletion Junction5
  • Insert (nt)6
  • Insert Point (nt)6
  • References/Curated References1,2,3,4
MitoTIP

The MitoTIP information is used to populate the clinicalSignificance and scorePercentile JSON keys. The "frequency alert" entries are skipped since it's not directly relevant to clinical significance.

Left alignment

Many of the variants in MITOMAP have not been normalized. As part of our import procedure, we left align all insertions and deletions.

Variant Enumeration

Sometimes MITOMAP provides data that indicates that multiple values have been observed. Some examples of this are C-C(2-8) and A-AC or ACC. Alternate alleles containing IUPAC ambiguity codes are similarly enumerated.

Inversions

MITOMAP inversions are currently treated as MNVs.

Allele Parsing

The following MITOMAP allele parsing conventions are supported:

  • C123T
  • 16021_16022del
  • 8042del2
  • C9537insC
  • 3902_3908invACCTTGC
  • A-AC or ACC
  • C-C(2-8)
  • 8042delAT

PostgreSQL Dump File

Example

COPY mitomap.reference (id, authors, title, publication, editors, volume, number, pages, date, city, publisher, keywords, abstract, nlmid) FROM stdin;
1 Albring, M., Griffith, J. and Attardi, G. Association of a protein structure of probable membrane derivation with HeLa cell mitochondrial DNA near its origin of replication Proceedings of the National Academy of Sciences of the United States of America . 74 4 1348-1352 1977 . . Deoxyribonucleoproteins; DNA Replication; DNA, Mitochondrial; Hela Cells; Membrane Proteins; Microscopy, Electron; Molecular Weight; Neoplasm Proteins; Protein Binding Almost all (about 95 percent) of the mitochondrial DNA molecules released by Triton X-100 lysis of HeLa cell mitochondria in the presence of 0.15 M salt are associated with a single protein-containing structure varying in appearance between a 10-20 nm knob and a 100-500 nm membrane-like patch. Analysis by high resolution electron microscopy and by polyacrylamide gel electrophoresis after cleavage of mitochondrial DNA with the endonucleases EcoRI, HindIII, and Hpa II has shown that the protein structure is attached to the DNA in the region of the D-loop, and probably near the origin of mitochondrial DNA replication. The data strongly suggest that HeLa cell mitochondrial DNA is attached in vivo to the inner mitochondrial membrane at or near the origin of replication, and that a membrane fragment of variable size remains associated with the DNA during the isolation. After sodium dodecyl sulfate extraction of mitochondrial DNA, a small 5-10 nm protein is found at the same site on a fraction of the mitochondrial DNA molecules. 266177
2 Anderson, S., Bankier, A.T., Barrell, B.G., de Bruijn, M.H., Coulson, A.R., Drouin, J., Eperon, I.C., Nierlich, D.P., Roe, B.A., Sanger, F., Schreier, P.H., Smith, A.J., Staden, R., Young, I.G. Sequence and organization of the human mitochondrial genome Nature . 290 5806 457-465 1981 . . Base Sequence; Codon; DNA Replication; mtDNA; Evolution; Genes, Structural; Human; Nucleic Acid Precursors; Peptide Chain Initiation; Peptide Chain Termination; RNA, Ribosomal; RNA, Transfer; Transcription, Genetic The complete sequence of the 16,569-base pair human mitochondrial genome is presented. The genes for the 12S and 16S rRNAs, 22 tRNAs, cytochrome c oxidase subunits I, II and III, ATPase subunit 6, cytochrome b and eight other predicted protein coding genes have been located. The sequence shows extreme economy in that the genes have none or only a few noncoding bases between them, and in many cases the termination codons are not coded in the DNA but are created post- transcriptionally by polyadenylation of the mRNAs. 7219534

Parsing

From the PostgreSQL dump file, we're interested in parsing the mapping between reference IDs and the PubMed IDs:

  • id
  • nlmid
Why not use the PostgreSQL file for everything?

Ideally we would use this file for parsing all of our data, but the schema contains 80+ tables and we haven't invested the time yet to see how the tables are linked together to produce the 6 main HTML pages that we're interested in.

Known Issues

Duplicated records

Multiple records describing the same nucleotide change are merged into the same record. If any conflicting information is found (homoplasmy, heteroplasmy, status, clinical significance, score percentile, end coordinate, variant type), an exception is thrown.

  • For diseases and PubMed IDs, we take the union of the values in the duplicated records.
  • For full length GenBank sequences, we take the largest number from each of the duplicated records since it provides the strongest evidence for this variant.
Skipped records

Records that represent an alternate notation of the original variant are skipped. Similarly some variants with confusing alleles (T961delT+ / -C(n)ins) are also skipped.

Download URLs

JSON Output

Small Variants

"mitomap":[ 
{
"refAllele":"G",
"altAllele":"A",
"diseases":[
"Bipolar disorder",
"Melanoma"
],
"hasHomoplasmy":false,
"hasHeteroplasmy":true,
"status":"Reported",
"clinicalSignificance":"confirmed pathogenic",
"scorePercentile":83.30,
"numGenBankFullLengthSeqs":2,
"pubMedIds":["2316527","6299878","6301949"],
"isAlleleSpecific":true
}
]
FieldTypeNotes
refAllelestring
altAllelestring
diseasesstring arrayassociated diseases
hasHomoplasmyboolean
hasHeteroplasmyboolean
statusstringrecord status
clinicalSignificancestringpredicted pathogenicity
scorePercentilefloatMitoTIP score
numGenBankFullLengthSeqsinteger# of GenBank full-length sequences
pubMedIdsstring array
isAlleleSpecificbooleantrue when the current variant alternate allele matches the MITOMAP alternate allele

Structural Variants

"mitomap":[ 
{
"chromosome":"MT",
"begin":3166,
"end":14152,
"variantType":"deletion",
"reciprocalOverlap":0.18068,
"annotationOverlap":0.42405
}
]
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring array
reciprocalOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
annotationOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
- - - - \ No newline at end of file diff --git a/3.18/data-sources/omim-json/index.html b/3.18/data-sources/omim-json/index.html deleted file mode 100644 index 44717db4..00000000 --- a/3.18/data-sources/omim-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -omim-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

omim-json

"omim":[ 
{
"mimNumber":600678,
"geneName":"MutS, E. coli, homolog of, 6",
"description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",
"phenotypes":[
{
"mimNumber":614350,
"phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",
"description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal dominant"
]
},
{
"mimNumber":608089,
"phenotype":"Endometrial cancer, familial",
"mapping":"molecular basis of the disorder is known"
},
{
"mimNumber":276300,
"phenotype":"Mismatch repair cancer syndrome",
"description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal recessive"
],
"comments" : [
"contribute to susceptibility to multifactorial disorders or to susceptibility to infection",
"unconfirmed or possibly spurious mapping"
]
}
]
}
]
FieldTypeNotes
mimNumberintOMIM ID for gene
geneNamestringgene name
descriptionstring
phenotypesobject arraysee Phenotype entry below

Phenotype

FieldTypeNotes
mimNumberint
phenotypestring
descriptionstring
mappingstringsee possible values below
inheritancestring arraysee possible values below
commentsstring arraysee possible values below

Mapping

  1. disorder was positioned by mapping of the wild type gene
  2. disease phenotype itself was mapped
  3. molecular basis of the disorder is known
  4. disorder is a chromosome deletion or duplication syndrome

Inheritance

  • autosomal recessive
  • autosomal dominant

Comments

  • contributes to the susceptibility to multifactorial disorders
  • variations that lead to apparently abnormal laboratory test values
  • unconfirmed mapping
- - - - \ No newline at end of file diff --git a/3.18/data-sources/omim/index.html b/3.18/data-sources/omim/index.html deleted file mode 100644 index d1afb4fb..00000000 --- a/3.18/data-sources/omim/index.html +++ /dev/null @@ -1,23 +0,0 @@ - - - - - - - -OMIM | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

OMIM

Overview

OMIM is a comprehensive, authoritative compendium of human genes and genetic phenotypes that is freely available and updated daily.

Publications

Amberger JS, Bocchini CA, Scott AF, Hamosh A. OMIM.org: leveraging knowledge across phenotype-gene relationships. Nucleic Acids Res. 2019 Jan 8;47(D1):D1038-D1043. doi:10.1093/nar/gky1151. PMID: 30445645.

Amberger JS, Bocchini CA, Schiettecatte FJM, Scott AF, Hamosh A. OMIM.org: Online Mendelian Inheritance in Man (OMIM®), an online catalog of human genes and genetic disorders. Nucleic Acids Res. 2015 Jan;43(Database issue):D789-98. PMID: 25428349.

Parse OMIM data

Nirvana uses gene symbols as the gene identifiers internally. To generate the OMIM database, we first map the MIM numbers, which are the primary identifiers used by OMIM, to gene symbols supported by Nirvana. Please note that there can be multiple MIM numbers mapped to one gene symbol. Only MIM numbers successfully mapped to a Nirvana gene symbol are further processed. The OMIM API is used to fetch all the information associated with a gene MIM number, except the gene symbols.

mim2gene.txt

This mim2gene.txt (http://omim.org/static/omim/data/mim2gene.txt) file provides the mapping between MIM numbers and gene symbols. An example of this file is given below:

# MIM Number    MIM Entry Type (see FAQ 1.3 at https://omim.org/help/faq)   Entrez Gene ID (NCBI)   Approved Gene Symbol (HGNC) Ensembl Gene ID (Ensembl)
100050 predominantly phenotypes
100070 phenotype 100329167
100100 phenotype
100200 predominantly phenotypes
100300 phenotype
100500 moved/removed
100600 phenotype
100640 gene 216 ALDH1A1 ENSG00000165092
100650 gene/phenotype 217 ALDH2 ENSG00000111275
100660 gene 218 ALDH3A1 ENSG00000108602
100670 gene 219 ALDH1B1 ENSG00000137124
100675 predominantly phenotypes
100678 gene 39 ACAT2 ENSG00000120437

The information in the "Entrez Gene ID (NCBI)", "Approved Gene Symbol (HGNC)" and "Ensembl Gene ID (Ensembl)" columns are used to find the proper gene symbol supported by Nirvana, which may or may not be the same as the gene symbol listed here.

OMIM API

Nirvana retrieves the OMIM annotations from the OMIM API JSON responses. The "entry" handler is used to fetch all the annotations associated with a given OMIM gene. A sample JSON response from the API is provided there.

{
"omim": {
"version": "1.0",
"entryList": [
{
"entry": {
"prefix": "*",
"mimNumber": 100640,
"status": "live",
"titles": {
"preferredTitle": "ALDEHYDE DEHYDROGENASE 1 FAMILY, MEMBER A1; ALDH1A1",
"alternativeTitles": "ALDEHYDE DEHYDROGENASE 1; ALDH1;;\nACETALDEHYDE DEHYDROGENASE 1;;\nALDH, LIVER CYTOSOLIC;;\nRETINAL DEHYDROGENASE 1; RALDH1"
},
"textSectionList": [
{
"textSection": {
"textSectionName": "description",
"textSectionTitle": "Description",
"textSectionContent": "The ALDH1A1 gene encodes a liver cytosolic isoform of acetaldehyde dehydrogenase ({EC 1.2.1.3}), an enzyme involved in the major pathway of alcohol metabolism after alcohol dehydrogenase (ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650}), variation in which has been implicated in different responses to alcohol ingestion.\n\nALDH1 is associated with a low Km for NAD, a high Km for acetaldehyde, and is strongly inactivated by disulfiram. ALDH2 is associated with a high Km for NAD, and low Km for acetaldehyde, and is insensitive to inhibition by disulfiram ({4:Hsu et al., 1985})."
}
}
],
"geneMap": {
"sequenceID": 7709,
"chromosome": 9,
"chromosomeSymbol": "9",
"chromosomeSort": 225,
"chromosomeLocationStart": 72900670,
"chromosomeLocationEnd": 72953052,
"transcript": "ENST00000297785.7",
"cytoLocation": "9q21",
"computedCytoLocation": "9q21.13",
"mimNumber": 100640,
"geneSymbols": "ALDH1A1",
"geneName": "Aldehyde dehydrogenase-1 family, member A1, soluble",
"mappingMethod": "REa, A",
"confidence": "P",
"mouseGeneSymbol": "Aldh1a1",
"mouseMgiID": "MGI:1353450",
"geneInheritance": null
},
"externalLinks": {
"geneIDs": "216",
"hgncID": "402",
"ensemblIDs": "ENSG00000165092,ENST00000297785.8",
"approvedGeneSymbols": "ALDH1A1",
"ncbiReferenceSequences": "1519246465",
"proteinSequences": "194378740,211947843,2183299,178400,119582947,119582948,178372,40807656,194375548,30582681,209402710,4262707,194739599,4261625,178394,261487497,16306661,21361176,32815082,118495,62089228",
"uniGenes": "Hs.76392",
"swissProtIDs": "P00352",
"decipherGene": false,
"umlsIDs": "C1412333",
"gtr": true,
"cmgGene": false,
"keggPathways": true,
"gwasCatalog": false,

}
}
},
{
"entry": {
"prefix": "*",
"mimNumber": 102560,
"status": "live",
"titles": {
"preferredTitle": "ACTIN, GAMMA-1; ACTG1",
"alternativeTitles": "ACTIN, GAMMA; ACTG;;\nCYTOSKELETAL GAMMA-ACTIN;;\nACTIN, CYTOPLASMIC, 2"
},
"textSectionList": [
{
"textSection": {
"textSectionName": "description",
"textSectionTitle": "Description",
"textSectionContent": "Actins are a family of highly conserved cytoskeletal proteins that play fundamental roles in nearly all aspects of eukaryotic cell biology. The ability of a cell to divide, move, endocytose, generate contractile force, and maintain shape is reliant upon functional actin-based structures. Actin isoforms are grouped according to expression patterns: muscle actins predominate in striated and smooth muscle (e.g., ACTA1, {102610}, and ACTA2, {102620}, respectively), whereas the 2 cytoplasmic nonmuscle actins, gamma-actin (ACTG1) and beta-actin (ACTB; {102630}), are found in all cells ({13:Sonnemann et al., 2006})."
}
}
],
"geneMap": {
"sequenceID": 13666,
"chromosome": 17,
"chromosomeSymbol": "17",
"chromosomeSort": 947,
"chromosomeLocationStart": 81509970,
"chromosomeLocationEnd": 81512798,
"transcript": "ENST00000331925.7",
"cytoLocation": "17q25.3",
"computedCytoLocation": "17q25.3",
"mimNumber": 102560,
"geneSymbols": "ACTG1, DFNA20, DFNA26, BRWS2",
"geneName": "Actin, gamma-1",
"mappingMethod": "REa, A, Fd",
"confidence": "C",
"mouseGeneSymbol": "Actg1",
"mouseMgiID": "MGI:87906",
"geneInheritance": null,
"phenotypeMapList": [
{
"phenotypeMap": {
"mimNumber": 102560,
"phenotype": "Baraitser-Winter syndrome 2",
"phenotypeMimNumber": 614583,
"phenotypicSeriesNumber": "PS243310",
"phenotypeMappingKey": 3,
"phenotypeInheritance": "Autosomal dominant"
}
},
{
"phenotypeMap": {
"mimNumber": 102560,
"phenotype": "Deafness, autosomal dominant 20/26",
"phenotypeMimNumber": 604717,
"phenotypicSeriesNumber": "PS124900",
"phenotypeMappingKey": 3,
"phenotypeInheritance": "Autosomal dominant"
}
}
]
}
}
}
]
}
}

Content from the OMIM API JSON response is reorganized as shown in the Nirvana JSON Output

Mappings between the Nirvana JSON output and OMIM JSON API are listed in the table below:

Nirvana JSON key chainOMIM API JSON key chain
omim:mimNumberomim:entryList:entry:mimNumber
omim:geneNameomim:entryList:entry:geneMap:geneName
omim:descriptionomim:entryList:entry:textSectionList:textSection:textSectionContent
omim:phenotypes:mimNumberomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:mimNumber
omim:phenotypes:phenotypeomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotype
omim:phenotypes:descriptionomim:entryList:entry:textSectionList:textSection:textSectionContent
omim:phenotypes:mappingomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotypeMappingKey (see mapping below)
omim:phenotypes:inheritancesomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotypeInheritance
omim:phenotypes:commentsomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotype (see mapping below)

Mapping key to content

1 to disorder was positioned by mapping of the wild type gene
-2 to disease phenotype itself was mapped
-3 to molecular basis of the disorder is known
-4 to disorder is a chromosome deletion or duplication syndrome

Phenotype character to comment

? to unconfirmed or possibly spurious mapping
-[/] to nondiseases
-{/} to contribute to susceptibility to multifactorial disorders or to susceptibility to infection

There are different types of link in the OMIM description section. For example, in above JSON response, we have the description of MIM entry 100640:

The ALDH1A1 gene encodes a liver cytosolic isoform of acetaldehyde dehydrogenase ({EC 1.2.1.3}), an enzyme involved in the major pathway of alcohol metabolism after alcohol dehydrogenase (ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650}), variation in which has been implicated in different responses to alcohol ingestion.\n\nALDH1 is associated with a low Km for NAD, a high Km for acetaldehyde, and is strongly inactivated by disulfiram. ALDH2 is associated with a high Km for NAD, and low Km for acetaldehyde, and is insensitive to inhibition by disulfiram ({4:Hsu et al., 1985}).

As the descriptions will be shown as plain text, we remove the curry brackets surrounding links and try to make the text still readable with minimal modifications. Briefly:

  • Links referring to another MIM entry (e.g. {100650}) will be removed. Any word(s) specifically associated with the removed link will also be removed. For example, "(ADH, see {103700})" will become "(ADH)" after the process.
  • Links referring to a literature reference will be processed to remove the internal index and curry brackets. For example, "{4:Hsu et al., 1985}" becomes "Hsu et al., 1985".
  • All the other links will simple have their curry brackets removed. For example, "{EC 1.2.1.3}" becomes "EC 1.2.1.3".
  • If the content within a pair of parentheses becomes empty after being processed, the parentheses need to be removed as well and its surrounding white spaces should be properly processed. For example, "ALDH2 ({100650})," will become "ALDH2,".

Here is a list of examples about how the description section supposed to be processed:

Original textProcessed text
({516030}, {516040}, and {516050})
(e.g., D1, {168461}; D2, {123833}; D3, {123834})(e.g., D1; D2; D3)
(desmocollins; see DSC2, {125645})(desmocollins; see DSC2)
(e.g., see {102700}, {300755})
(ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650})(ADH). See also liver mitochondrial ALDH2
(see, e.g., CACNA1A; {601011})(see, e.g., CACNA1A)
(e.g., GSTA1; {138359}), mu (e.g., {138350})(e.g., GSTA1), mu
(NFKB; see {164011})(NFKB)
(see ISGF3G, {147574})(see ISGF3G)
(DCK; {EC 2.7.1.74}; {125450})(DCK; EC 2.7.1.74)

JSON output

"omim":[ 
{
"mimNumber":600678,
"geneName":"MutS, E. coli, homolog of, 6",
"description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",
"phenotypes":[
{
"mimNumber":614350,
"phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",
"description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal dominant"
]
},
{
"mimNumber":608089,
"phenotype":"Endometrial cancer, familial",
"mapping":"molecular basis of the disorder is known"
},
{
"mimNumber":276300,
"phenotype":"Mismatch repair cancer syndrome",
"description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal recessive"
],
"comments" : [
"contribute to susceptibility to multifactorial disorders or to susceptibility to infection",
"unconfirmed or possibly spurious mapping"
]
}
]
}
]
FieldTypeNotes
mimNumberintOMIM ID for gene
geneNamestringgene name
descriptionstring
phenotypesobject arraysee Phenotype entry below

Phenotype

FieldTypeNotes
mimNumberint
phenotypestring
descriptionstring
mappingstringsee possible values below
inheritancestring arraysee possible values below
commentsstring arraysee possible values below

Mapping

  1. disorder was positioned by mapping of the wild type gene
  2. disease phenotype itself was mapped
  3. molecular basis of the disorder is known
  4. disorder is a chromosome deletion or duplication syndrome

Inheritance

  • autosomal recessive
  • autosomal dominant

Comments

  • contributes to the susceptibility to multifactorial disorders
  • variations that lead to apparently abnormal laboratory test values
  • unconfirmed mapping

Building the supplementary files

The first step in builing the OMIM .nga files is to use the SAUtils command's subcommand downloadOMIM to download the necessary data. In order to download the data the user must possess an API key obtained from OMIM. This key has to be set as the environment variable OmimApiKey.

export OmimApiKey=<users-omim-api-key>
dotnet NirvanaBuild/SAUtils.dll downloadOMIM
---------------------------------------------------------------------------
SAUtils (c) 2021 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0
---------------------------------------------------------------------------

USAGE: dotnet SAUtils.dll downloadomim [options]
Download the OMIM gene annotation data

OPTIONS:
--uga, -u <path> universal gene archive path
--ref, -r <filename> input reference filename
--out, -o <VALUE> output directory
--help, -h displays the help menu
--version, -v displays the version

dotnet NirvanaBuild/SAUtils.dll downloadOMIM --ref References/7/Homo_sapiens.GRCh38.Nirvana.dat --uga Cache/27/UGA.tsv.gz --out ExternalDataSources/OMIM/2021-06-14
---------------------------------------------------------------------------
SAUtils (c) 2021 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0
---------------------------------------------------------------------------

Unable to resolve gene symbol conflict for CD300H: Ensembl: [ENSG00000284690]: AC079325.2, Entrez Gene: [100130520]: LOC100130520
Unable to resolve gene symbol conflict for STRIT1: Ensembl: [ENSG00000240045]: DWORF, Entrez Gene: [100507537]: LOC100507537
Unable to resolve gene symbol conflict for WAKMAR2: Ensembl: [ENSG00000237499]: AL357060.2, Entrez Gene: [100130476]: LOC100130476
Unable to resolve gene symbol conflict for PERCC1: Ensembl: [ENSG00000284395]: AL032819.3, Entrez Gene: [105371045]: LOC105371045
Unable to resolve gene symbol conflict for LASTR: Ensembl: [ENSG00000242147]: AL365356.5, Entrez Gene: [105376382]: LOC105376382
Unable to resolve gene symbol conflict for PRANCR: Ensembl: [ENSG00000257815]: LINC01481, Entrez Gene: [101928062]: LOC101928062
Unable to resolve gene symbol conflict for THORLNC: Ensembl: [ENSG00000226856]: AC093901.1, Entrez Gene: [100506797]: LOC100506797
Gene Symbol Update Statistics
============================================
# of gene symbols already up-to-date: 15,952
# of gene symbols updated: 330
# of genes where both IDs are null: 0
# of gene symbols not in cache: 148
# of resolved gene symbol conflicts: 15
# of unresolved gene symbol conflicts: 7

Time: 00:02:38.2

Once the download has succeeded, the nga files can be produced using the SAUtils command's subcommand omim.

dotnet NirvanaBuild/SAUtils.dll omim
---------------------------------------------------------------------------
SAUtils (c) 2021 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0
---------------------------------------------------------------------------

USAGE: dotnet SAUtils.dll omim [options]
Creates a gene annotation database from OMIM data

OPTIONS:
--m2g, -m <VALUE> MimToGeneSymbol tsv file
--json, -j <VALUE> OMIM entry json file
--out, -o <VALUE> output directory
--help, -h displays the help menu
--version, -v displays the version


dotnet NirvanaBuild/SAUtils.dll omim --m2g ExternalDataSources/OMIM/2021-06-14/MimToGeneSymbol.tsv --json ExternalDataSources/OMIM/2021-06-14/MimEntries.json.gz --out SupplementaryDatabase/63/
---------------------------------------------------------------------------
SAUtils (c) 2021 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0
---------------------------------------------------------------------------


Time: 00:00:04.5
- - - - \ No newline at end of file diff --git a/3.18/data-sources/phylop-json/index.html b/3.18/data-sources/phylop-json/index.html deleted file mode 100644 index e4b44704..00000000 --- a/3.18/data-sources/phylop-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -phylop-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

phylop-json

"variants":[
{
"vid":"2:48010488:A",
"chromosome":"chr2",
"begin":48010488,
"end":48010488,
"refAllele":"G",
"altAllele":"A",
"variantType":"SNV",
"phylopScore":0.459
}
]
FieldTypeNotes
phylopScorefloatrange: -14.08 to 6.424
- - - - \ No newline at end of file diff --git a/3.18/data-sources/phylop/index.html b/3.18/data-sources/phylop/index.html deleted file mode 100644 index 97f74565..00000000 --- a/3.18/data-sources/phylop/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -PhyloP | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

PhyloP

Overview

PhyloP (phylogenetic p-values) conservation scores are obtained from the [PHAST package] (http://compgen.bscb.cornell.edu/phast/) for multiple alignments of vertebrate genomes to the human genome. For GRCh38, the multiple alignments are against 19 mammals and for GRCh37, it is against 45 vertebrate genomes.

Publication

Siepel A, Bejerano G, Pedersen JS, Hinrichs AS, Hou M, Rosenbloom K, Clawson H, Spieth J, Hillier LW, Richards S, et al. Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. Genome Res. 2005 Aug;15(8):1034-50. (http://www.genome.org/cgi/doi/10.1101/gr.3715005)

WigFix File

The data is provided in WigFix files which is a text file that provides conservation scores for contiguous intervals in the following format:

fixedStep chrom=chr1 start=10918 step=1
0.064
0.058
0.064
0.058
0.064
0.064
fixedStep chrom=chr1 start=34045 step=1
0.111
0.100
0.111
0.111
0.100
0.111
0.111
0.111
0.100
0.111
-1.636

We convert them to binary files with indexes for fast query. Note that these are scores for genomic positions and are reported only for SNVs.

Download URL

GRCh37: http://hgdownload.cse.ucsc.edu/goldenpath/hg19/phyloP46way/vertebrate/

GRCh38: http://hgdownload.cse.ucsc.edu/goldenPath/hg38/phyloP20way/

JSON Output

Unlike other supplemetary datasources, phyloP scores are reported in the variants section.

"variants":[
{
"vid":"2:48010488:A",
"chromosome":"chr2",
"begin":48010488,
"end":48010488,
"refAllele":"G",
"altAllele":"A",
"variantType":"SNV",
"phylopScore":0.459
}
]
FieldTypeNotes
phylopScorefloatrange: -14.08 to 6.424
- - - - \ No newline at end of file diff --git a/3.18/data-sources/primate-ai-json/index.html b/3.18/data-sources/primate-ai-json/index.html deleted file mode 100644 index 822398a6..00000000 --- a/3.18/data-sources/primate-ai-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -primate-ai-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

primate-ai-json

"primateAI":[
{
"hgnc":"TP53",
"scorePercentile":0.3,
}
]
FieldTypeNotes
hgncstring
scorePercentilefloatrange: 0 - 1.0
- - - - \ No newline at end of file diff --git a/3.18/data-sources/primate-ai/index.html b/3.18/data-sources/primate-ai/index.html deleted file mode 100644 index b2659eb7..00000000 --- a/3.18/data-sources/primate-ai/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Primate AI | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

Primate AI

Overview

Primate AI is a deep residual neural network for classifying the pathogenicity of missense mutations. The method is described in the publication:

Publication

Sundaram, L., Gao, H., Padigepati, S.R. et al. Predicting the clinical impact of human mutation with deep neural networks. Nat Genet 50, 1161–1170 (2018). https://doi.org/10.1038/s41588-018-0167-z

TSV File

Example

chr pos ref alt refAA   altAA   strand_1pos_0neg    trinucleotide_context   UCSC_gene   ExAC_coverage   primateDL_score
chr10 1046704 C T R C 1 CCG uc001ift.3 45.49 0.849114537239
chr10 1046704 C G R G 1 CCG uc001ift.3 45.49 0.795686006546

Parsing

From the TSV file, we're mainly interested in the following columns:

  • chr
  • pos
  • ref
  • alt
  • primateDL_score

We also use UCSC_gene to filter out variants that don't have matching gene models in Nirvana.

Pre-processing

Converting UCSC IDs

Primate AI only provides UCSC IDs. As an initial pre-processing step, we'll need to convert these to either Entrez or Ensembl Gene IDs.

The following queries are used to download the conversions from UCSC:

mysql -h genome-mysql.soe.ucsc.edu -u genome -A -P 3306 \
-e "select * FROM knownToLocusLink;" hg19 > ucsc_locuslink.tsv

mysql -h genome-mysql.soe.ucsc.edu -u genome -A -P 3306 \
-e "select knownToEnsembl.name, knownToEnsembl.value, ensGene.name2 FROM knownToEnsembl, ensGene WHERE knownToEnsembl.value = ensGene.name;" \
hg19 > ucsc_ensembl.tsv

Running the Pre-Processor

The Primate AI pre-processor can be run as follows:

dotnet PrimateAiPreProcessor.dll UGA_develop.tsv PrimateAI_scores_v0.2.tsv.gz \
ucsc_locuslink.tsv ucsc_ensembl.tsv PrimateAI_0.2_GRCh37.tsv.gz

During conversion, 0.5% of the UCSC Ids cannot be converted to either Entrez or Ensembl gene IDs. Once the gene IDs have been acquired, we check to see which are available in Nirvana.

The following Entrez Gene IDs were not found:

399753
401980
504189
504191
100293534

Here is the output from the pre-processor:

- loading UCSC to Entrez Gene ID dictionary... 73,432 genes loaded.
- loading UCSC to Ensembl Gene ID dictionary... 76,178 genes loaded.
- loading UGA gene ID to gene dictionary... 103,277 genes loaded.
- parsing Primate AI variants... 70,121,953 variants parsed.

# variants with unknown gene ID: 27,253 / 70,121,953
# genes with unknown gene ID: 109 / 19,614

# variants not in UGA: 2,036 / 70,121,953
# genes not in UGA: 6 / 19,614

Known Issues

Known Issues

The Primate AI data set provides raw scores, but the scores are biased according to gene context. I.e. a 0.4 means something different in TP53 than it does in KRAS.

As a result, the Primate AI team provided guidance on aggregating these scores and presenting them as percentiles with respect to the associated gene. According to their research, the 25th percentile is a good proxy for benign variants and the 75th percentile is a good proxy for pathogenic variants.

Download URL

https://basespace.illumina.com/s/cPgCSmecvhb4

JSON Output

"primateAI":[
{
"hgnc":"TP53",
"scorePercentile":0.3,
}
]
FieldTypeNotes
hgncstring
scorePercentilefloatrange: 0 - 1.0
- - - - \ No newline at end of file diff --git a/3.18/data-sources/revel-json/index.html b/3.18/data-sources/revel-json/index.html deleted file mode 100644 index efbec6c7..00000000 --- a/3.18/data-sources/revel-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -revel-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

revel-json

"revel":{ 
"score":0.027
}
FieldTypeNotes
scorefloatRange: 0 - 1.0
- - - - \ No newline at end of file diff --git a/3.18/data-sources/revel/index.html b/3.18/data-sources/revel/index.html deleted file mode 100644 index 5f79d51c..00000000 --- a/3.18/data-sources/revel/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -REVEL | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

REVEL

Overview

REVEL is an ensemble method for predicting the pathogenicity of missense variants based on a combination of scores from 13 individual tools: MutPred, FATHMM v2.3, VEST 3.0, PolyPhen-2, SIFT, PROVEAN, MutationAssessor, MutationTaster, LRT, GERP++, SiPhy, phyloP, and phastCons.

Publication

Ioannidis, N. M. et al. REVEL: An Ensemble Method for Predicting the Pathogenicity of Rare Missense Variants. The American Journal of Human Genetics 99, 877-885 (2016). https://doi.org/10.1016/j.ajhg.2016.08.016

CSV File

Example

chr,hg19_pos,grch38_pos,ref,alt,aaref,aaalt,REVEL
1,35142,35142,G,A,T,M,0.027
1,35142,35142,G,C,T,R,0.035
1,35142,35142,G,T,T,K,0.043
1,35143,35143,T,A,T,S,0.018
1,35143,35143,T,C,T,A,0.034

Parsing

From the CSV file, we're mainly interested in the following columns:

  • chr
  • hg19_pos
  • grch38_pos
  • ref
  • alt
  • REVEL

Known Issues

Sorting

Since the input file contains positions for both GRCh37 and GRCh38, we split it into two TSV files (for the sake of better readability) with identical format. The positions for GRCh37 were sorted but not for GRCh38. So we re-sort the variants by position in the GRCh38 file.

Conflicting Scores

When there are multiple scores available for the same variant (i.e. the same position with the same alternative allele), we pick the highest score.

Download URL

https://sites.google.com/site/revelgenomics/downloads

JSON Output

"revel":{ 
"score":0.027
}
FieldTypeNotes
scorefloatRange: 0 - 1.0
- - - - \ No newline at end of file diff --git a/3.18/data-sources/splice-ai-json/index.html b/3.18/data-sources/splice-ai-json/index.html deleted file mode 100644 index f0a4fc14..00000000 --- a/3.18/data-sources/splice-ai-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -splice-ai-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

splice-ai-json

"spliceAI":[ 
{
"hgnc":"BLCAP",
"acceptorGainDistance":-3,
"acceptorGainScore":0.3,
"donorLossDistance":7,
"donorLossScore":0.9
},
{
"hgnc":"NNAT",
"acceptorGainDistance":-1,
"acceptorGainScore":0.2,
"donorGainDistance":-2,
"donorGainScore":0.3
}
]
FieldTypeNotes
hgncstringHGNC gene symbol
acceptorGainDistanceint± bp from current position
acceptorGainScorefloatrange: 0 - 1.0. 1 decimal place
acceptorLossDistanceint± bp from current position
acceptorLossScorefloatrange: 0 - 1.0. 1 decimal place
donorGainDistanceint± bp from current position
donorGainScorefloatrange: 0 - 1.0. 1 decimal place
donorLossDistanceint± bp from current position
donorLossScorefloatrange: 0 - 1.0. 1 decimal place
- - - - \ No newline at end of file diff --git a/3.18/data-sources/splice-ai/index.html b/3.18/data-sources/splice-ai/index.html deleted file mode 100644 index bbe0a79a..00000000 --- a/3.18/data-sources/splice-ai/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Splice AI | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

Splice AI

Overview

SpliceAI, a 32-layer deep neural network, predicts splicing from a pre-mRNA sequence.

Publication

K. Jaganathan, et al. Predicting splicing from primary sequence with deep learning. Cell, 176 (3) (2019), pp. 535-548 e24

VCF File

Example

##fileformat=VCFv4.0
##assembly=GRCh37/hg19
##INFO=<ID=SYMBOL,Number=1,Type=String,Description="HGNC gene symbol">
##INFO=<ID=STRAND,Number=1,Type=String,Description="+ or - depending on whether the gene lies in the positive or negative strand">
##INFO=<ID=TYPE,Number=1,Type=String,Description="E or I depending on whether the variant position is exonic or intronic (GENCODE V24lift37 canonical annotation)">
##INFO=<ID=DIST,Number=1,Type=Integer,Description="Distance between the variant position and the closest splice site (GENCODE V24lift37 canonical annotation)">
##INFO=<ID=DS_AG,Number=1,Type=Float,Description="Delta score (acceptor gain)">
##INFO=<ID=DS_AL,Number=1,Type=Float,Description="Delta score (acceptor loss)">
##INFO=<ID=DS_DG,Number=1,Type=Float,Description="Delta score (donor gain)">
##INFO=<ID=DS_DL,Number=1,Type=Float,Description="Delta score (donor loss)">
##INFO=<ID=DP_AG,Number=1,Type=Integer,Description="Delta position (acceptor gain) relative to the variant position">
##INFO=<ID=DP_AL,Number=1,Type=Integer,Description="Delta position (acceptor loss) relative to the variant position">
##INFO=<ID=DP_DG,Number=1,Type=Integer,Description="Delta position (donor gain) relative to the variant position">
##INFO=<ID=DP_DL,Number=1,Type=Integer,Description="Delta position (donor loss) relative to the variant position">
#CHROM POS ID REF ALT QUAL FILTER INFO
10 92946 . C T . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0000;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=-26;DP_AL=-10;DP_DG=3;DP_DL=35
10 92946 . C G . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0008;DS_AL=0.0000;DS_DG=0.0003;DS_DL=0.0000;DP_AG=34;DP_AL=-27;DP_DG=35;DP_DL=1
10 92946 . C A . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0004;DS_AL=0.0000;DS_DG=0.0001;DS_DL=0.0000;DP_AG=-10;DP_AL=-48;DP_DG=35;DP_DL=-21
10 92947 . A C . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0002;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=-49;DP_AL=-11;DP_DG=0;DP_DL=34
10 92947 . A T . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0002;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=33;DP_AL=-11;DP_DG=-22;DP_DL=34
10 92947 . A G . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0006;DS_AL=0.0000;DS_DG=0.0001;DS_DL=0.0000;DP_AG=33;DP_AL=-11;DP_DG=34;DP_DL=32

Parsing

From the VCF file, we're mainly interested in the following columns:

  • DS_AG - Δ score (acceptor gain)
  • DS_AL - Δ score (acceptor loss)
  • DS_DG - Δ score (donor gain)
  • DS_DL - Δ score (donor loss)
  • DP_AG - Δ position (acceptor gain) relative to the variant position
  • DP_AL - Δ position (acceptor loss) relative to the variant position
  • DP_DG - Δ position (donor gain) relative to the variant position
  • DP_DL - Δ position (donor loss) relative to the variant position

The Splice AI team suggests the following interpretation for the scores:

RangeConfidencePathogenicity
0 ≤ x < 0.1lowlikely benign
0.1 ≤ x ≤ 0.5mediumlikely pathogenic
x > 0.5highpathogenic

Pre-processing

Filtering

Splice AI provides a comprehensive list of entries throughout the genome. However, many of the entries have little value. I.e. observing low splice scores in intergenic regions. Not only do these extra entries require more storage, but the unused content has a negative impact on annotation speed.

As a result, Nirvana filters out all the values in the low confidence tier except for regions within 15 bp of nascent splice sites. For those regions, we found it useful to see if Splice AI predicted an interruption of the splicing mechanism.

Download URL

https://basespace.illumina.com/s/5u6ThOblecrh

JSON Output

"spliceAI":[ 
{
"hgnc":"BLCAP",
"acceptorGainDistance":-3,
"acceptorGainScore":0.3,
"donorLossDistance":7,
"donorLossScore":0.9
},
{
"hgnc":"NNAT",
"acceptorGainDistance":-1,
"acceptorGainScore":0.2,
"donorGainDistance":-2,
"donorGainScore":0.3
}
]
FieldTypeNotes
hgncstringHGNC gene symbol
acceptorGainDistanceint± bp from current position
acceptorGainScorefloatrange: 0 - 1.0. 1 decimal place
acceptorLossDistanceint± bp from current position
acceptorLossScorefloatrange: 0 - 1.0. 1 decimal place
donorGainDistanceint± bp from current position
donorGainScorefloatrange: 0 - 1.0. 1 decimal place
donorLossDistanceint± bp from current position
donorLossScorefloatrange: 0 - 1.0. 1 decimal place
- - - - \ No newline at end of file diff --git a/3.18/data-sources/topmed-json/index.html b/3.18/data-sources/topmed-json/index.html deleted file mode 100644 index 010a92c1..00000000 --- a/3.18/data-sources/topmed-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -topmed-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

topmed-json

"topmed":{ 
"allAc":20,
"allAn":125568,
"allAf":0.000159,
"allHc":0,
"failedFilter":true
}
FieldTypeNotes
allAcintTOPMed allele count
allAnintTOPMed allele number. Non-zero integer.
allAffloatTOPMed allele frequency (computed by Nirvana)
allHcintTOPMed homozygous count
failedFilterboolTrue if this variant failed any filters
- - - - \ No newline at end of file diff --git a/3.18/data-sources/topmed/index.html b/3.18/data-sources/topmed/index.html deleted file mode 100644 index 0eec0d55..00000000 --- a/3.18/data-sources/topmed/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -TOPMed | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

TOPMed

Overview

The Trans-Omics for Precision Medicine (TOPMed) program, sponsored by the National Institutes of Health (NIH) National Heart, Lung and Blood Institute (NHLBI), is part of a broader Precision Medicine Initiative, which aims to provide disease treatments tailored to an individual’s unique genes and environment. TOPMed contributes to this Initiative through the integration of whole-genome sequencing (WGS) and other omics (e.g., metabolic profiles, epigenomics, protein and RNA expression patterns) data with molecular, behavioral, imaging, environmental, and clinical data.

Publication

Kowalski, M.H., Qian, H., Hou, Z., Rosen, J.D., Tapia, A.L., Shan, Y., Jain, D., Argos, M., Arnett, D.K., Avery, C. and Barnes, K.C., 2019. Use of> 100,000 NHLBI Trans-Omics for Precision Medicine (TOPMed) Consortium whole genome sequences improves imputation quality and detection of rare variant associations in admixed African and Hispanic/Latino populations. PLoS genetics, 15(12), p.e1008500.

VCF extraction

We currently extract the following fields from TOPMed VCF file:

##INFO=<ID=AN,Number=1,Type=Integer,Description="Number of Alleles in Samples with Coverage">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Alternate Allele Counts in Samples with Coverage">
##INFO=<ID=AF,Number=A,Type=Float,Description="Alternate Allele Frequencies">
##INFO=<ID=Het,Number=A,Type=Integer,Description="Number of samples with heterozygous genotype calls">
##INFO=<ID=Hom,Number=A,Type=Integer,Description="Number of samples with homozygous alternate genotype calls">

Example:

chr1    10132   TOPMed_freeze_5?chr1:10,132     T       C       255     SVM     VRT=1;NS=62784;AN=125568;AC=32;AF=0.000254842;Het=32;Hom=0      NA:FRQ  125568:0.000254842

GRCh37 liftover

The data is not available for GRCh37 on TOPMed website. We performed a liftover from GRCh38 to GRCh37 using dbSNP ids.

Download URL

https://bravo.sph.umich.edu/freeze5/hg38/download

JSON output

"topmed":{ 
"allAc":20,
"allAn":125568,
"allAf":0.000159,
"allHc":0,
"failedFilter":true
}
FieldTypeNotes
allAcintTOPMed allele count
allAnintTOPMed allele number. Non-zero integer.
allAffloatTOPMed allele frequency (computed by Nirvana)
allHcintTOPMed homozygous count
failedFilterboolTrue if this variant failed any filters
- - - - \ No newline at end of file diff --git a/3.18/file-formats/custom-annotations/index.html b/3.18/file-formats/custom-annotations/index.html deleted file mode 100644 index 7df35dc2..00000000 --- a/3.18/file-formats/custom-annotations/index.html +++ /dev/null @@ -1,40 +0,0 @@ - - - - - - - -Custom Annotations | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

Custom Annotations

Overview

While the team tries to keep data sources up-to-date, you might want to start incorporate new annotations ahead of our update cycle. Another -common use case involves protected health information (PHI). Custom annotations are a mechanism that enables both use cases.

Here are some examples of how our collaborators use custom annotations:

  • associating context from both a patient-level and a patient cohort level with the variant annotations
  • adding content that is licensed (e.g. HGMD) to the variant annotations

At the moment, we have two different custom annotation file formats. One provides additional annotations to variants (both small variants and SVs) -while the other caters to gene annotations.

In both cases, the custom annotation file format is a tab-delimited file that is separated into two parts: the header & the data.

The header is where you can customize how you want the data to appear in the JSON file and provide context about the genome assembly and how -Nirvana should match the variants.

At Illumina, there are usually many components downstream of Nirvana that have to parse our annotations. If a customer provides a custom -annotation, those downstream tools need to understand more about the data such as:

  • data type (e.g. number, boolean, or a string)
  • data category (e.g. is this an allele count, allele number, allele frequency, etc.)
  • associated population (i.e. if this is an allele frequency)

For each custom annotation, Nirvana uses this context to create a JSON schema that can be sent to downstream tools. If -a tool knows that this is an allele frequency, it can validate user input to ensure that it's in the range of [0, 1].

Variant File Format

File Format

Nirvana expects plain text (or gzipped text) files. Using tools like Excel can add extra characters that can break parsing. We highly recommend creating and modifying these files with plain text editor like Notepad, Notepad++ or Atom.

Basic Allele Frequency Example

Create the Custom Annotation TSV

Imagine that you want to create a basic allele frequency custom annotation for small variants. If we visualized the tab-delimited file -(TSV), it would look something like this:

Col 1Col 2Col 3Col 4Col 5
#title=MyDataSource
#assembly=GRCh38
#matchVariantsBy=allele
#CHROMPOSREFALTallAf
#categories...AlleleFrequency
#descriptions...ALL
#type...number
chr1623603511TGAT0.000006579
chr1668801894GA0.000006569
chr1911107436GA0.00003291

Here's the full TSV file.

Let's go over the header and discuss the contents:

  • title indicates the name of the JSON key
  • assembly indicates that this data is only valid for GRCh38.
  • matchVariantsBy indicates how annotations should be matched and reported. In this case annotations will be matched and reported by allele.
  • categories provides hints to downstream tools on how they might want to treat the data. In this case, we indicate that it's an allele frequency.
  • descriptions are used in special circumstances to provide more context. Even though column 5 is called allAf, it might not be clear to a -downstream tool that this means a global allele frequency using all sub-populations. In this case, ALL indicates the intended population.
  • type indicates to downstream tools the data type. Since allele frequencies are numbers, we'll write number in this column.
Reference Base Checking

Nirvana validates all the reference bases in a custom annotation. If a variant or genomic region is specified that has the wrong reference base, an error will be produced.

Sorting

The variants within each chromosome must be sorted by genomic position.

Convert to Nirvana Format

First we need to convert the TSV file to Nirvana's native file format and let's put that file in a new directory called CA:

$ mkdir CA
$ dotnet bin/Release/netcoreapp2.1/SAUtils.dll customvar \
-r Data/References/Homo_sapiens.GRCh38.Nirvana.dat -i MyDataSource.tsv -o CA
---------------------------------------------------------------------------
SAUtils (c) 2020 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0
---------------------------------------------------------------------------

Chromosome 16 completed in 00:00:00.1
Chromosome 19 completed in 00:00:00.0

Time: 00:00:00.2

Annotate with Nirvana

Let's annotate the following VCF (notice that it's one of the variants that we have in our custom annotation):

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
16 68801894 . G A . . .

Here's the full VCF file.

Since Nirvana can handle multiple directories with external annotations, all we need to do is specify our new CA directory in addition to -the normal Nirvana command-line.

$ dotnet bin/Release/netcoreapp2.1/Nirvana.dll -c Data/Cache/GRCh38/Both \
-r Data/References/Homo_sapiens.GRCh38.Nirvana.dat \
--sd Data/SupplementaryAnnotation/GRCh38 --sd CA -i TestCA.vcf -o TestCA
---------------------------------------------------------------------------
Nirvana (c) 2020 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0
---------------------------------------------------------------------------

Initialization Time Positions/s
---------------------------------------------------------------------------
Cache 00:00:01.8
SA Position Scan 00:00:00.0 19

Reference Preload Annotation Variants/s
---------------------------------------------------------------------------
chr16 00:00:00.2 00:00:01.3 1

Summary Time Percent
---------------------------------------------------------------------------
Initialization 00:00:01.9 25.5 %
Preload 00:00:00.2 3.3 %
Annotation 00:00:01.3 18.2 %

Time: 00:00:06.3

Investigate the Results

We would expect the following data to show up in our JSON output file:

      "variants": [
{
"vid": "16-68801894-G-A",
"chromosome": "16",
"begin": 68801894,
"end": 68801894,
"refAllele": "G",
"altAllele": "A",
"variantType": "SNV",
"hgvsg": "NC_000016.10:g.68801894G>A",
"phylopScore": 1,
"MyDataSource": {
"refAllele": "G",
"altAllele": "A",
"allAf": 7e-06
},
"clinvar": [

Here's the full JSON file.

Nirvana preserves up to 6 decimal places for allele frequency data.

Categories & Descriptions Example

Create the Custom Annotation TSV

Building on the previous example, we can add other types of annotations like predictions and general notes.

Col 1Col 2Col 3Col 4Col 5Col 6Col 7
#title=MyDataSource
#assembly=GRCh38
#matchVariantsBy=allele
#CHROMPOSREFALTallAfpathogenicitynotes
#categories...AlleleFrequencyPrediction.
#descriptions...ALL..
#type...numberstringstring
chr1623603511TGAT0.000006579P.
chr1668801894GA0.000006569LPSeen in case 123
chr1911107436GA0.00003291..

Here's the full TSV file.

Placeholders

You can use a period to denote an empty value (much in the same way as periods are used in VCF files to signify missing values). While -Nirvana also accepts empty columns in the TSV file, we use them in these examples to promote readability.

Let's go over what's new in this example:

  • Column 6 adds a field called pathogenicity which uses the Prediction category. When using this category, Nirvana will -validate to make -sure that the field contains either the abbreviations (B, LB, VUS, LP, and P) or the long-form equivalents (e.g. benign or pathogenic).
  • Column 7 adds a field called notes and it doesn't have a category or description. We're just going to use it to add some internal -notes.

Annotate with Nirvana

Let's use a new VCF file. It includes all the same positions as our custom annotation file, but only the middle variant also matches the -alternate allele (allele-specific match):

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
16 23603511 . TG T . . .
16 68801894 . G A . . .
19 11107436 . G C . . .

Here's the full VCF file.

Investigate the Results

Because we specified #matchVariantsBy=allele in our custom annotation file, only the middle variant will get an annotation:

      "variants": [
{
"vid": "16-68801894-G-A",
"chromosome": "16",
"begin": 68801894,
"end": 68801894,
"refAllele": "G",
"altAllele": "A",
"variantType": "SNV",
"hgvsg": "NC_000016.10:g.68801894G>A",
"phylopScore": 1,
"MyDataSource": {
"refAllele": "G",
"altAllele": "A",
"allAf": 7e-06,
"pathogenicity": "LP",
"notes": "Seen in case 123"
},
"clinvar": [

Here's the full JSON file.

Using Positional Matches

What would happen if we changed to #matchVariantsBy=position? Two things will happen. First, our positional variants will now match:

      "variants": [
{
"vid": "16-23603511-TG-T",
"chromosome": "16",
"begin": 23603512,
"end": 23603512,
"refAllele": "G",
"altAllele": "-",
"variantType": "deletion",
"hgvsg": "NC_000016.10:g.23603512delG",
"MyDataSource": [
{
"refAllele": "GA",
"altAllele": "-",
"allAf": 7e-06,
"pathogenicity": "P"
}
],
"clinvar": [

In addition, you will now see an extra flag for our allele-specific variant:

      "variants": [
{
"vid": "16-68801894-G-A",
"chromosome": "16",
"begin": 68801894,
"end": 68801894,
"refAllele": "G",
"altAllele": "A",
"variantType": "SNV",
"hgvsg": "NC_000016.10:g.68801894G>A",
"phylopScore": 1,
"MyDataSource": [
{
"refAllele": "G",
"altAllele": "A",
"allAf": 7e-06,
"pathogenicity": "LP",
"notes": "Seen in case 123",
"isAlleleSpecific": true
}
],
"clinvar": [

Genomic Region Example

Create the Custom Annotation TSV

In the previous example, we added a note for the middle variant, but sometimes it's handy to annotate a genomic region. Consider the following example:

Col 1Col 2Col 3Col 4Col 5
#title=MyDataSource
#assembly=GRCh38
#matchVariantsBy=allele
#CHROMPOSREFENDnotes
#categories....
#descriptions....
#type...string
chr1620000000T70000000Lots of false positives in this region

Here's the full TSV file.

Let's go over what's new in this example:

  • Column 5 now has a field called notes. In essence, it looks exactly like column 7 from our previous example.
  • The main difference is that now one of our custom annotation entries is actually a genomic region. Any variant that overlaps with that region will get a custom annotation.

In the previous example we learned about positional matching vs allele-specific matching. For genomic regions, #matchVariantsBy=allele and #matchVariantsBy=position produce -the same result.

Annotate with Nirvana

Let's use the same VCF file as our previous example.

Investigate the Results

    {
"chromosome": "16",
"position": 23603511,
"refAllele": "TG",
"altAlleles": [
"T"
],
"cytogeneticBand": "16p12.2",
"MyDataSource": [
{
"start": 20000000,
"end": 70000000,
"notes": "Lots of false positives in this region",
"reciprocalOverlap": 0,
"annotationOverlap": 0
}
],
"variants": [

Here's the full JSON file.

Reciprocal & Annotation Overlap

For all intervals, Nirvana internally calculates two overlaps: a variant overlap and an annotation overlap. Variant overlap is the percentage of the variant's length that is -overlapped. Annotation overlap is the percentage of the annotation's length that is overlap.

Reciprocal overlap is the minimum of those two overlaps. Given that the annotation is 50 Mbp and the deletion is one 1 bp, both overlaps will be pretty close to 0.

We will also see this annotation for the other variant on chr16:

    {
"chromosome": "16",
"position": 68801894,
"refAllele": "G",
"altAlleles": [
"A"
],
"cytogeneticBand": "16q22.1",
"MyDataSource": [
{
"start": 20000000,
"end": 70000000,
"notes": "Lots of false positives in this region",
"reciprocalOverlap": 0,
"annotationOverlap": 0
}
],
"variants": [

Genomic Regions for Structural Variants Example

Create the Custom Annotation TSV

Often we use genomic regions to represent other known CNVs and SVs in the genome. In this use case, we usually don't want to match these regions to other small variants. To force Nirvana to match regions only to other SVs, use the #matchVariantsBy=sv option in the header. Here is an example:

Col 1Col 2Col 3Col 4Col 5
#title=MyDataSource
#assembly=GRCh38
#matchVariantsBy=sv
#CHROMPOSREFENDnotes
#categories....
#descriptions....
#type...string
chr1620000000T70000000Lots of false positives in this region

Here's the full TSV file.

Let's go over what's new in this example:

  • The main difference is the header field #matchVariantsBy=sv which indicates that only structural variants that overlap these genomic regions will receive annotations.

Annotate with Nirvana

Let's use a new VCF file. It contains the first variant from the previous file and a structural variant deletion- both of which overlap the given genomic region.

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
16 23603511 . TG T . . .
16 68801894 . G <DEL> . . END=73683789;SVTYPE=DEL

Here's the full VCF file.

Investigate the Results

Note that this time, MyDataSource only showed up for the <DEL> and not the deletion 16-23603511-TG-T.

    {
"chromosome": "16",
"position": 23603511,
"refAllele": "TG",
"altAlleles": [
"T"
],
"cytogeneticBand": "16p12.2",
"variants": [
...
...
{
"chromosome": "16",
"position": 68801894,
"svEnd": 73683789,
"refAllele": "G",
"altAlleles": [
"<DEL>"
],
"cytogeneticBand": "16q22.1-q22.3",
"MyDataSource": [
{
"start": 20000000,
"end": 70000000,
"notes": "Lots of false positives in this region",
"reciprocalOverlap": 0.02396,
"annotationOverlap": 0.02396
}
],
"variants": [

Mixing Small Variants and Genomic Regions

Create the Custom Annotation TSV

Previously we looked at examples that either had small variants or genomic regions. Let's create a file that contains both:

Col 1Col 2Col 3Col 4Col 5Col 6
#title=MyDataSource
#assembly=GRCh38
#matchVariantsBy=allele
#CHROMPOSREFALTENDnotes
#categories.....
#descriptions.....
#type....string
chr1623603511TGAT..
chr1668801894GA..
chr1911107436GA..
chr2110510818C.10699435Interval #1
chr2110510818C<DEL>10699435Interval #2
chr2212370388TT[chr22:12370729[.Known false-positive

Here's the full TSV file.

Let's go over what's new in this example:

  • Column 4 now has the REF field. Exception for the case listed below, this is only used by small variants or translocation breakends.
  • Column 5 now has the END field. This is only used by genomic regions.
  • There are two custom annotations on chr21 and the start and end coordinates look the same, so what's different? Interval #2 has a symbolic allele in the ALT column. When this is used in custom annotation, the start position is treated as the padding base (using VCF conventions). When Nirvana matches a variant to interval #2, it will ignore the padding base and consider the start position to be at position 10510819.

Annotate with Nirvana

Let's use a new VCF file to study how matching works for intervals #1 and #2:

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
21 10510818 . C <DUP> . . END=10699435;SVTYPE=DUP
22 12370388 . T T[chr22:12370729[ . . SVTYPE=BND

Here's the full VCF file.

The first variant is similar to the custom annotation labelled "interval #2". Position 10510818 is the padding base, so it effectively starts at position 10510819.

Investigate the Results

  "positions": [
{
"chromosome": "21",
"position": 10510818,
"svEnd": 10699435,
"refAllele": "C",
"altAlleles": [
"<DUP>"
],
"cytogeneticBand": "21p11.2",
"MyDataSource": [
{
"start": 10510818,
"end": 10699435,
"notes": "Interval #1",
"reciprocalOverlap": 0.99999,
"annotationOverlap": 0.99999
},
{
"start": 10510819,
"end": 10699435,
"notes": "Interval #2",
"reciprocalOverlap": 1,
"annotationOverlap": 1
}
],

Here's the full JSON file.

As expected, the variant and interval #2 have matching endpoints, therefore there is 100% overlap. Interval #1 technically starts 1 bp earlier, so its overlap 99.9%.

Further down the JSON file, we find the annotated translocation breakend:

      "variants": [
{
"vid": "22-12370388-T-T[chr22:12370729[",
"chromosome": "22",
"begin": 12370388,
"end": 12370388,
"isStructuralVariant": true,
"refAllele": "T",
"altAllele": "T[chr22:12370729[",
"variantType": "translocation_breakend",
"MyDataSource": {
"refAllele": "T",
"altAllele": "T[chr22:12370729[",
"notes": "Known false-positive"
}
}

Gene File Format

Basic Gene Example

Create the Custom Annotation TSV

Previously we looked at examples that either had small variants or genomic regions, however, sometimes we would like to add custom gene annotations. The gene custom annotation file format -looks slightly different:

Col 1Col 2Col 3Col 4
#title=MyDataSource
#geneSymbolgeneIdphenotypenotes
#categories...
#descriptions...
#type.stringstring
TP537157Colorectal cancer, hereditary nonpolyposis, type 5.
KRASENSG00000133703Mismatch repair cancer syndromeSeen in cohort 123

Here's the full TSV file.

Let's go over what's in this example:

  • Column 2 has the geneId field. This can be either an Entrez Gene ID or an Ensembl ID.
Gene Symbols

Gene symbols are always in flux and are being updated on a daily basis at the NCBI and at HGNC. Due to this, Nirvana uses the geneId to match genes rather than the gene symbol. However, to -make the custom annotation files easier to read, we've included the geneSymbol column as well.

Unknown Gene IDs

When Nirvana parses the gene custom annotation file, it will note any gene IDs that are currently not recognized in Nirvana. In such a case, Nirvana will display an error showing all the -unrecognized gene IDs.

Annotate with Nirvana

Let's use a VCF file that contain variants in TP53 and KRAS:

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
12 25227255 . A T . . .
17 7675074 . C A . . .

Here's the full VCF file.

Investigate the Results

  "genes": [
{
"name": "KRAS",
"clingenGeneValidity": [
{
"diseaseId": "MONDO_0009026",
"disease": "Costello syndrome",
"classification": "disputed",
"classificationDate": "2018-07-24"
}
],
"clingenDosageSensitivityMap": {
"haploinsufficiency": "no evidence to suggest that dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "no evidence to suggest that dosage sensitivity is associated with clinical phenotype"
},
"gnomAD": {
"pLi": 0.000788,
"pRec": 0.789,
"pNull": 0.21,
"synZ": 0.336,
"misZ": 2.32,
"loeuf": 1.24
},
"MyDataSource": {
"phenotype": "Mismatch repair cancer syndrome",
"notes": "Seen in cohort 123"
}
},

This is the abbreviated output for KRAS. Here's the full JSON file if you want to see the complete KRAS entry.

Customizing the Header

Title

For the title, you can provide any string that hasn't already been used. The title should be unique.

caution

Make sure that the title does not conflict with other keys in the JSON file.

For small variants, you can't provide a title that conflicts with other keys in the variant object. Some examples of this would be -vid, chromosome, transcripts, etc.. The title should also not conflict with other data source keys like clinvar or gnomad.

For structural variants, you can't provide a title that conflicts with other keys in the position object. Some examples of this would be -chromosome, svLength, cytogeneticBand, etc. The title should also not conflict with other data source keys like clingen or dgv.

caution

Care should be taken not to annotate using multiple custom annotations that all use the same title.

Genome Assemblies

The following genome assemblies can be specified:

  • GRCh37
  • GRCh38

Matching Criteria

The matching criteria instructs how Nirvana should match a VCF variant to the custom annotation.

The following matching criteria can be specified:

  • allele - use this when you only want allele-specific matches. This is commonly the case when using allele frequency data sources like gnomAD
  • position - use this when you want positional matches. This is commonly used with disease phenotype data sources like ClinVar
  • sv - use this when you want to match to all other overlapping SVs. This use case arose when we were adding custom annotations for baseline -copy number intervals along the genome.

Categories

Categories are not used by Nirvana, but are often used by downstream tools. Categories provide hints for how those tools should filter or display -the annotation data.

When a category is specified, Nirvana will provide additional validation for those fields. The following table describes each category:

CategoryDescriptionValidation
AlleleCountallele counts for a specific populationSee the supported populations below
AlleleNumberallele numbers for a specific populationSee the supported populations below
AlleleFrequencyallele frequencies for a specific populationSee the supported populations below
PredictionACMG-style pathogenicity classificationsbenign (B)
likely benign (LB)
VUS
likely pathogenic (LP)
pathogenic (P)
Filterfree text that signals downstream tools to add the column to the filterMax 20 characters
Descriptionfree-text descriptionMax 100 characters
Identifierany IDMax 50 characters
HomozygousCountcount of homozygous individuals for a specific populationSee the supported populations below
Scoreany score valueAny double-precision floating point number

Descriptions

Descriptions are used to add more context to the categories. For now, descriptions are mainly used to associate allele counts, numbers, and frequencies with their respective populations.

Populations

The following populations were specified in the HapMap project, 1000 Genomes Project, ExAC, and gnomAD.

Population CodeSuper-population CodeDescription
ACBAFRAfrican Caribbeans in Barbados
AFRAFRAfrican
ALLALLAll populations
AMRAMRAd Mixed American
ASJAshkenazi Jewish
ASWAFRAmericans of African Ancestry in SW USA
BEBSASBengali from Bangladesh
CDXEASChinese Dai in Xishuangbanna, China
CEUEURUtah Residents (CEPH) with Northern and Western European Ancestry
CHBEASHan Chinese in Beijing, China
CHSEASSouthern Han Chinese
CLMAMRColombians from Medellin, Colombia
EASEASEast Asian
ESNAFREsan in Nigeria
EUREUREuropean
FINEURFinnish in Finland
GBREURBritish in England and Scotland
GIHSASGujarati Indian from Houston, Texas
GWDAFRGambian in Western Divisions in the Gambia
IBSEURIberian population in Spain
ITUSASIndian Telugu from the UK
JPTEASJapanese in Tokyo, Japan
KHVEASKinh in Ho Chi Minh City, Vietnam
LWKAFRLuhya in Webuye, Kenya
MAGAFRMandinka in the Gambia
MKKAFRMaasai in Kinyawa, Kenya
MSLAFRMende in Sierra Leone
MXLAMRMexican Ancestry from Los Angeles, USA
NFEEUREuropean (Non-Finnish)
OTHOTHOther
PELAMRPeruvians from Lima, Peru
PJLSASPunjabi from Lahore, Pakistan
PURAMRPuerto Ricans from Puerto Rico
SASSASSouth Asian
STUSASSri Lankan Tamil from the UK
TSIEURToscani in Italia
YRIAFRYoruba in Ibadan, Nigeria

Data Types

Each custom annotation can be one of the following data types:

  • bool - true or false
  • number - any integer or floating-point number
  • string - text
tip

For boolean variables, only keys with a true value will be output to the JSON object.

Using SAUtils

Nirvana includes a tool called SAUtils that converts various data sources into Nirvana's native binary format. The sub-commands customvar and customgene are used to specify a variant file or a gene file respectively.

Convert Variant File

dotnet bin/Release/netcoreapp2.1/SAUtils.dll customvar \
-r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \
-i MyDataSource.tsv \
-o SupplementaryAnnotation
  • the -r argument specifies the compressed reference path
  • the -i argument specifies the input TSV path
  • the -o argument specifies the output directory

Convert Gene File

dotnet bin/Release/netcoreapp2.1/SAUtils.dll customgene \
--uga Nirvana_UGA.tsv \
-i MyDataSource.tsv \
-o SupplementaryAnnotation
  • the --uga argument specifies the Nirvana universal gene archive (UGA) path
  • the -i argument specifies the input TSV path
  • the -o argument specifies the output directory
Nirvana_UGA file

The Nirvana_UGA is not part of the official set of files retrieved using the Downloader utility. But it is available here.

- - - - \ No newline at end of file diff --git a/3.18/file-formats/nirvana-json-file-format/index.html b/3.18/file-formats/nirvana-json-file-format/index.html deleted file mode 100644 index 2fc531fa..00000000 --- a/3.18/file-formats/nirvana-json-file-format/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Nirvana JSON File Format | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

Nirvana JSON File Format

Overview

Conventions

In the Nirvana JSON representation, we try to maximize the amount of useful information that is relayed in the output file. As such, we have several conventions that are useful to know about:

  • With boolean key/value pairs, we only output the keys that have a true value. I.e. there's no reason to display "isStructuralVariant":false a few million times when annotating a small variant VCF.
  • When transferring data from the VCF file to the JSON (e.g. for allele depths (AD)), it is common to use a period (.) as a placeholder for missing data in the VCF file. Nirvana treats periods like empty or null strings and therefore will not output those entries.

JSON Layout

info

In general, each position corresponds to a row in the original VCF file.

For each gene that was referenced in the transcripts found in the positions section, there will be additional gene-level annotation in the gene section.

Parsing

info

We've put together a new section that discusses how to parse our JSON files easily using examples in a Python Jupyter notebook and a R version as well. In addition, we have information about how to quickly dump content from our JSON file using a tabix-like utility called JASIX.

{
"header":{
"annotator":"Nirvana 3.0.0-alpha.5+g6c52e247",
"creationTime":"2017-06-14 15:53:13",
"genomeAssembly":"GRCh37",
"dataSources":[
{
"name":"OMIM",
"version":"unknown",
"description":"An Online Catalog of Human Genes and Genetic Disorders",
"releaseDate":"2017-05-03"
},
{
"name":"VEP",
"version":"84",
"description":"BothRefSeqAndEnsembl",
"releaseDate":"2017-01-16"
},
{
"name":"ClinVar",
"version":"20170503",
"description":"A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence",
"releaseDate":"2017-05-03"
},
{
"name":"phyloP",
"version":"hg19",
"description":"46 way conservation score between humans and 45 other vertebrates",
"releaseDate":"2009-11-10"
}
],
"samples":[
"NA12878",
"NA12891",
"NA12892"
]
},
FieldTypeNotes
annotatorstringthe name of the annotator and the current version
creationTimestringyyyy-MM-dd hh:mm:ss
genomeAssemblystringsee possible values below
schemaVersionintegerincremented whenever the core structure of the JSON file introduces breaking changes
dataVersionstring
dataSourcesobject arraysee Data Source entry below
samplesstring arraythe order of these sample names will be used throughout the JSON file when enumerating samples

Data Source

FieldTypeNotes
namestring
versionstring
descriptionstringoptional description of the data source
releaseDatestringyyyy-MM-dd

Genome Assemblies

  • GRCh37
  • GRCh38
  • hg19
  • SARSCoV2

Positions

"positions":[
{
"chromosome":"chr2",
"position":48010488,
"repeatUnit":"GGCCCC",
"refRepeatCount":3,
"svEnd":48020488,
"refAllele":"G",
"altAlleles":[
"A",
"GT"
],
"quality":461,
"filters":[
"PASS"
],
"ciPos":[
-170,
170
],
"ciEnd":[
-175,
175
],
"svLength":1000,
"strandBias":1.23,
"jointSomaticNormalQuality":29,
"cytogeneticBand":"2p16.3",
FieldTypeVariant TypeNotes
chromosomestringallexactly as displayed in the vcf
positionintegerallexactly as displayed in the vcf (1-based notation). Range: 1 - 250 million
repeatUnitstringSTRprovided by ExpansionHunter
refRepeatCountintegerSTRprovided by ExpansionHunter
svEndintegerSV
refAllelestringallexactly as displayed in the vcf
altAllelestring arrayallexactly as displayed in the vcf
qualityfloatallexactly as displayed in the vcf (Normally an integer, but some variant callers using floating point. Has been observed as high as 500k)
filtersstring arrayallexactly as displayed in the vcf
ciPosinteger arraySV
ciEndinteger arraySV
svLengthintegerSV
strandBiasfloatsmall variantprovided by GATK (from SB)
jointSomaticNormalQualityintegerSVprovided by the Manta variant caller (SOMATICSCORE)
cytogeneticBandstringalle.g. 17p13.1

ClinGen

"clingen":[
{
"chromosome":"17",
"begin":525,
"end":14667519,
"variantType":"copy_number_gain",
"id":"nsv996083",
"clinicalInterpretation":"pathogenic",
"observedGains":1,
"validated":true,
"phenotypes":[
"Intrauterine growth retardation"
],
"phenotypeIds":[
"HP:0001511",
"MedGen:C1853481"
],
"reciprocalOverlap":0.00131
},
{
"chromosome":"17",
"begin":45835,
"end":7600330,
"variantType":"copy_number_loss",
"id":"nsv869419",
"clinicalInterpretation":"pathogenic",
"observedLosses":1,
"validated":true,
"phenotypes":[
"Developmental delay AND/OR other significant developmental or morphological phenotypes"
],
"reciprocalOverlap":0.00254
}
]
FieldTypeNotes
clingenobject array
chromosomestringEnsembl-style chromosome names
begininteger1-based position
endinteger1-based position
variantTypestringAny of the sequence alterations defined here.
idstringIdentifier from the data source. Alternatively a VID
clinicalInterpretationstringsee possible values below
observedGainsintegerRange: 0 - (231 - 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.
observedLossesintegerRange: 0 - (231 - 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.
validatedboolean
phenotypesstring arrayDescription of the phenotype.
phenotypeIdsstring arrayDescription of the phenotype IDs.
reciprocalOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).

clinicalInterpretation

  • benign
  • curated benign
  • curated pathogenic
  • likely benign
  • likely pathogenic
  • path gain
  • path loss
  • pathogenic
  • uncertain
"clingenDosageSensitivityMap": [{
"chromosome": "15",
"begin": 30900686,
"end": 32153204,
"haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",
"reciprocalOverlap": 0.00147,
"annotationOverlap": 0.33994
},
{
"chromosome": "15",
"begin": 31727418,
"end": 32153204,
"haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "dosage sensitivity unlikely",
"reciprocalOverlap": 0.00147,
"annotationOverlap": 1
}]
FieldTypeNotes
clingenDosageSensitivityMapobject array
chromosomestringEnsembl-style chromosome names
begininteger1-based position
endinteger1-based position
haploinsufficiencystringsee possible values below
triplosensitivitystring(same as haploinsufficiency) 
reciprocalOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).
annotationOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).

haploinsufficiency and triplosensitivity

  • no evidence to suggest that dosage sensitivity is associated with clinical phenotype
  • little evidence suggesting dosage sensitivity is associated with clinical phenotype
  • emerging evidence suggesting dosage sensitivity is associated with clinical phenotype
  • sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype
  • gene associated with autosomal recessive phenotype
  • dosage sensitivity unlikely

1000 Genomes (SV)

"oneKg":[
{
"chromosome":"1",
"begin":1595369,
"end":1612441,
"variantType": "copy_number_variation",
"id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",
"allAn": 5008,
"allAc": 2702,
"allAf": 0.539537,
"afrAf": 0.6052,
"amrAf": 0.3675,
"eurAf": 0.5357,
"easAf": 0.5368,
"sasAf": 0.5797,
"reciprocalOverlap": 0.07555
}
],
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring
idstring
allAnintegerallele number for all populations. Non-zero integer.
allAcintegerallele count for all populations. Integer.
allAffloating pointallele frequency for all populations. Range: 0 - 1.0
afrAffloating pointallele frequency for the African super population. Range: 0 - 1.0
amrAffloating pointallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
eurAffloating pointallele frequency for the European super population. Range: 0 - 1.0
easAffloating pointallele frequency for the East Asian super population. Range: 0 - 1.0
sasAffloating pointallele frequency for the South Asian super population. Range: 0 - 1.0
reciprocalOverlapfloating pointrange: 0 - 1.

gnomAD (SV)

"gnomAD-preview": [
{
"chromosome": "1",
"begin": 40001,
"end": 47200,
"variantId": "gnomAD-SV_v2.1_DUP_1_1",
"variantType": "duplication",
"failedFilter": true,
"allAf": 0.068963,
"afrAf": 0.135694,
"amrAf": 0.022876,
"easAf": 0.01101,
"eurAf": 0.007846,
"othAf": 0.017544,
"femaleAf": 0.065288,
"maleAf": 0.07255,
"allAc": 943,
"afrAc": 866,
"amrAc": 21,
"easAc": 17,
"eurAc": 37,
"othAc": 2,
"femaleAc": 442,
"maleAc": 499,
"allAn": 13674,
"afrAn": 6382,
"amrAn": 918,
"easAn": 1544,
"eurAn": 4716,
"othAn": 114,
"femaleAn": 6770,
"maleAn": 6878,
"allHc": 91,
"afrHc": 90,
"amrHc": 1,
"easHc": 0,
"eurHc": 0,
"othHc": 55,
"femaleHc": 44,
"maleHc": 47,
"reciprocalOverlap": 0.01839,
"annotationOverlap": 0.16667
}
]

FieldTypeNotes
chromosomestringchromosome number
beginintegerposition interval start
endintegerposition internal end
variantTypestringstructural variant type
variantIdstringgnomAD ID
allAffloating pointallele frequency for all populations. Range: 0 - 1.0
afrAffloating pointallele frequency for the African super population. Range: 0 - 1.0
amrAffloating pointallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
easAffloating pointallele frequency for the East Asian super population. Range: 0 - 1.0
eurAffloating pointallele frequency for the European super population. Range: 0 - 1.0
othAffloating pointallele frequency for all other populations. Range: 0 - 1.0
femaleAffloating pointallele frequency for female population. Range: 0 - 1.0
maleAffloating pointallele frequency for male population. Range: 0 - 1.0
allAcintegerallele count for all populations.
afrAcintegerallele count for the African super population.
amrAcintegerallele count for the Ad Mixed American super population.
easAcintegerallele count for the East Asian super population.
eurAcintegerallele count for the European super population.
othAcintegerallele count for all other populations.
maleAcintegerallele count for male population.
femaleAcintegerallele count for female population.
allAnintegerallele number for all populations.
afrAnintegerallele number for the African super population.
amrAnintegerallele number for the Ad Mixed American super population.
easAnintegerallele number for the East Asian super population.
eurAnintegerallele number for the European super population.
othAnintegerallele number for all other populations.
femaleAnintegerallele number for female population.
maleAnintegerallele number for male population.
allHcintegercount of homozygous individuals for all populations.
afrHcintegercount of homozygous individuals for the African / African American population.
amrHcintegercount of homozygous individuals for the Latino population.
easHcintegercount of homozygous individuals for the East Asian population.
eurAcintegercount of homozygous individuals for the European super population.
othHcintegercount of homozygous individuals for all other populations.
maleHcintegercount of homozygous individuals for male population.
femaleHcintegercount of homozygous individuals for female population.
failedFilterbooleanTrue if this variant failed any filters (Note: we do not list the failed filters)
reciprocalOverlapfloating pointReciprocal overlap. Range: 0 - 1.0
annotationOverlapfloating pointReciprocal overlap. Range: 0 - 1.0

Note: Following fields are not available in GRCh38 because the source file does not contain this information:

Field
femaleAf
maleAf
maleAc
femaleAc
femaleAn
maleAn
allHc
afrHc
amrHc
easHc
eurAc
othHc
maleHc
femaleHc
failedFilter

MITOMAP (SV)

"mitomap":[ 
{
"chromosome":"MT",
"begin":3166,
"end":14152,
"variantType":"deletion",
"reciprocalOverlap":0.18068,
"annotationOverlap":0.42405
}
]
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring array
reciprocalOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
annotationOverlapfloatRange: 0 - 1. Specified up to 5 decimal places

Samples

"samples":[
{
"genotype":"0/1",
"variantFrequencies":[
0.333,
0.5
],
"totalDepth":57,
"genotypeQuality":12,
"copyNumber":3,
"repeatUnitCounts":[
10,
20
],
"alleleDepths":[
10,
20,
30
],
"failedFilter":true,
"splitReadCounts":[
10,
20
],
"pairedEndReadCounts":[
10,
20
],
"isDeNovo":true,
"diseaseAffectedStatuses":[
"-"
],
"artifactAdjustedQualityScore":89.3,
"likelihoodRatioQualityScore":78.2,
"heteroplasmyPercentile":[
23.13,
12.65
]
}
]
FieldTypeVCFNotes
genotypestringGT
variantFrequenciesfloat arrayVF, ADrange: 0 - 1.0. One value per alternate allele
totalDepthintegerDPnon-negative integer values
genotypeQualityintegerGQnon-negative integer values. Typically maxes out at 99
copyNumberintegerCNnon-negative integer values
minorHaplotypeCopyNumberintegerMCNnon-negative integer values
repeatUnitCountsinteger arrayREPCNExpansionHunter-specific
alleleDepthsinteger arrayADnon-negative integer values
failedFilterboolFT
splitReadCountsinteger arraySRManta-specific
pairedEndReadCountsinteger arrayPRManta-specific
isDeNovoboolDN
deNovoQualityfloatDQ
diseaseAffectedStatusesstring arrayDSTExpansionHunter-specific
artifactAdjustedQualityScorefloatAQPEPE-specific. Range: 0 - 100.0
likelihoodRatioQualityScorefloatLQPEPE-specific. Range: 0 - 100.0
lossOfHeterozygosityboolCN, MCN
somaticQualityfloatSQ
heteroplasmyPercentilefloatVFrange: 0 - 100. 2 decimal places. One value per alternate allele
binCountintegerBCnon-negative integer values
Empty Samples

If a sample does not contain any entries, we will create a sample object that contains the isEmpty key. This ensures that sample ordering is preserved while indicating that a sample is intentionally empty.

"samples":[
{
"isEmpty":true
}
],

Variants

"variants":[
{
"vid":"2:48010488:A",
"chromosome":"chr2",
"begin":48010488,
"end":48010488,
"isReferenceMinorAllele":true,
"isStructuralVariant":true,
"refAllele":"G",
"altAllele":"A",
"variantType":"SNV",
"isDecomposedVariant":true,
"isRecomposedVariant":true,
"linkedVids":["2:48010488:GTA:ATC"],
"hgvsg":"NC_000002.11:g.48010488G>A",
"phylopScore":0.459
FieldTypeNotes
vidstringsee Variant Identifiers
chromosomestring
beginint1-based non-negative integer values. Range: 1 - 250 million
endint1-based non-negative integer values. Range: 1 - 250 million
isReferenceMinorAllelebooltrue when this is a reference minor allele
isStructuralVariantbooltrue when the variant is a structural variant
inLowComplexityRegionbooltrue when the variant lies in a low complexity region (gnomAD low complexity regions)
refAllelestringparsimonious representation of the reference allele
altAllelestringparsimonious representation of the alternate allele.
variantTypestringuses Sequence Ontology sequence alterations
isDecomposedVariantbooltrue when the decomposed variant has been used to create another recomposed variant
isRecomposedVariantbooltrue when the variant is recomposed from two or more decomposed variants
linkedVidsstring arraylist of VIDs for variants connecting decomposed and recomposed variants
hgvsgstringHGVS g. notation
phylopScorefloatphyloP conservation score. Range: -14.08 to 6.424
Reference Minor Alleles

Nirvana supports annotating reference minor alleles. In such a case, refAllele will be replaced by the global major allele and altAllele will be replaced with the original reference allele.

Flagging Decomposed & Recomposed Variants

When two or more decomposed variants are recomposed into an MNV, the decomposed variants will be marked with "isDecomposedVariant":true.

Similarly, the recomposed variant will be shown as a new VCF position. This recomposed variant will be flagged with "isRecomposedVariant":true.

Transcripts

"transcripts":[
{
"transcript":"ENST00000445503.1",
"source":"Ensembl",
"bioType":"nonsense_mediated_decay",
"codons":"gGg/gAg",
"aminoAcids":"G/E",
"cdnaPos":"268",
"cdsPos":"116",
"exons":"1/9",
"introns":"1/8",
"proteinPos":"39",
"geneId":"ENSG00000116062",
"hgnc":"MSH6",
"consequence":[
"missense_variant",
"NMD_transcript_variant"
],
"hgvsc":"ENST00000445503.1:c.116G>A",
"hgvsp":"ENSP00000405294.1:p.(Gly39Glu)",
"geneFusion":{
"exon":6,
"intron":5,
"fusions":[
{
"hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000437180.1}:c.58+568_1443",
"exon":3,
"intron":2
},
{
"hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000300305.3}:c.58+568_1443",
"exon":2,
"intron":1
}
]
},
"isCanonical":true,
"polyPhenScore":0.95,
"polyPhenPrediction":"probably damaging",
"proteinId":"ENSP00000405294.1",
"siftScore":0.61,
"siftPrediction":"tolerated",
"completeOverlap":true
}
]
FieldTypeNotes
transcriptstringtranscript ID. e.g. ENST00000445503.1
sourcestringRefSeq / Ensembl
bioTypestringdescriptions of the biotypes from Ensembl
codonsstring
aminoAcidsstring
cdnaPosstring
cdsPosstring
exonsstringexons affected by the variant
intronsstringintrons affected by the variant
proteinPosstring
geneIdstringgene ID. e.g. ENSG00000116062
hgncstringgene symbol. e.g. MSH6
consequencestring arraySequence Ontology Consequences
hgvscstringHGVS coding nomenclature
hgvspstringHGVS protein nomenclature
geneFusionobjectsee Gene Fusions entry below
isCanonicalbooltrue when this is a canonical transcript
polyPhenScorefloatrange: 0 - 1.0
polyPhenPredictionstringsee possible values below
proteinIdstringprotein ID. E.g. ENSP00000405294.1
siftScorefloatrange: 0 - 1.0
siftPredictionstringsee possible values below
completeOverlapbooltrue when this transcript is completely overlapped by the variant

PolyPhen

  • probably damaging
  • possibly damaging
  • benign
  • unknown

SIFT

  • tolerated
  • deleterious
  • tolerated - low confidence
  • deleterious - low confidence

Amino Acid Conservation

"aminoAcidConservation": {
"scores": [0.34]
}
FieldTypeNotes
aminoAcidConservationobject
scoresobject array of doublespercent conserved with respect to human amino acid residue. Range: 0.01 - 1.00

Gene Fusions

FieldTypeNotes
exonintactual exon where the breakpoint was located
intronintactual intron where the breakpoint was located
fusionsobject arraysee Fusion entry below

Fusion

FieldTypeNotes
exonintactual exon where the other breakpoint was located
intronintactual intron where the other breakpoint was located
hgvscstringHGVS coding nomenclature describing the two genes and the transcripts that are fused along with

Regulatory Regions

"regulatoryRegions":[
{
"id":"ENSR00001542175",
"type":"promoter",
"consequence":[
"regulatory_region_variant"
]
}
]
FieldTypeNotes
idstring
typestringsee possible values below
consequencestring arraysee possible values below

Regulatory Types

  • CTCF_binding_site
  • enhancer
  • open_chromatin_region
  • promoter
  • promoter_flanking_region
  • TF_binding_site

Regulatory Consequences

  • regulatory_region_variant
  • regulatory_region_ablation
  • regulatory_region_amplification
  • regulatory_region_truncation

ClinVar

small variants:

"clinvar":[
{
"id":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"significance":[
"benign"
],
"refAllele":"G",
"altAllele":"A",
"lastUpdatedDate":"2020-03-01",
"isAlleleSpecific":true
},
{
"id":"RCV000030258.4",
"variationId":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"alleleOrigins":[
"germline"
],
"refAllele":"G",
"altAllele":"A",
"phenotypes":[
"Lynch syndrome"
],
"medGenIds":[
"C1333990"
],
"omimIds":[
"120435"
],
"significance":[
"benign"
],
"lastUpdatedDate":"2017-05-01",
"isAlleleSpecific":true
}
]

large variants:

"clinvar":[
{
"chromosome":"1",
"begin":629025,
"end":8537745,
"variantType":"copy_number_loss",
"id":"RCV000051993.4",
"variationId":"VCV000058242.1",
"reviewStatus":"criteria provided, single submitter",
"alleleOrigins":[
"not provided"
],
"phenotypes":[
"See cases"
],
"significance":[
"pathogenic"
],
"lastUpdatedDate":"2022-04-21",
"pubMedIds":[
"21844811"
]
},
{
"id":"VCV000058242.1",
"reviewStatus":"criteria provided, single submitter",
"significance":[
"pathogenic"
],
"lastUpdatedDate":"2022-04-21"
},
......
]
FieldTypeNotes
idstringClinVar ID
variationIdstringClinVar VCV ID
variantTypestringvariant type
reviewStatusstringsee possible values below
alleleOriginsstring arraysee possible values below
refAllelestring
altAllelestring
phenotypesstring array
medGenIdsstring arrayMedGen IDs
omimIdsstring arrayOMIM IDs
orphanetIdsstring arrayOrphanet IDs
significancestring arraysee possible values below
lastUpdatedDatestringyyyy-MM-dd
pubMedIdsstring arrayPubMed IDs
isAlleleSpecificbooltrue when the current variant alternate allele matches the ClinVar alternate allele

reviewStatus:

  • no assertion provided
  • no assertion criteria provided
  • criteria provided, single submitter
  • practice guideline
  • classified by multiple submitters
  • criteria provided, conflicting interpretations
  • criteria provided, multiple submitters, no conflicts
  • no interpretation for the single variant

alleleOrigins:

  • unknown
  • other
  • germline
  • somatic
  • inherited
  • paternal
  • maternal
  • de-novo
  • biparental
  • uniparental
  • not-tested
  • tested-inconclusive

significance:

  • uncertain significance
  • not provided
  • benign
  • likely benign
  • likely pathogenic
  • pathogenic
  • drug response
  • histocompatibility
  • association
  • risk factor
  • protective
  • affects
  • conflicting data from submitters
  • other
  • no interpretation for the single variant
  • conflicting interpretations of pathogenicity

1000 Genomes

"oneKg":{
"allAf":0.200879,
"afrAf":0.210287,
"amrAf":0.139769,
"easAf":0.275794,
"eurAf":0.181909,
"sasAf":0.173824,
"allAn":5008,
"afrAn":1322,
"amrAn":694,
"easAn":1008,
"eurAn":1006,
"sasAn":978,
"allAc":1006,
"afrAc":278,
"amrAc":97,
"easAc":278,
"eurAc":183,
"sasAc":170
}
FieldTypeNotes
allAffloatallele frequency for all populations. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
allAnintallele number for all populations. Non-zero integer.
afrAffloatallele frequency for the African super population. Range: 0 - 1.0
afrAcintallele count for the African super population. Integer.
afrAnintallele number for the African super population. Non-zero integer.
amrAffloatallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
amrAcintallele count for the Ad Mixed American super population. Integer.
amrAnintallele number for the Ad Mixed American super population. Non-zero integer.
easAffloatallele frequency for the East Asian super population. Range: 0 - 1.0
easAcintallele count for the East Asian super population. Integer.
easAnintallele number for the East Asian super population. Non-zero integer.
eurAffloatallele frequency for the European super population. Range: 0 - 1.0
eurAcintallele count for the European super population. Integer.
eurAnintallele number for the European super population. Non-zero integer.
sasAffloatallele frequency for the South Asian super population. Range: 0 - 1.0
sasAcintallele count for the South Asian super population. Integer.
sasAnintallele number for the South Asian super population. Non-zero integer.

DANN

"dannScore": 0.27
FieldTypeNotes
dannScorefloatRange: 0 - 1.0

dbSNP

"dbsnp":[
"rs1042821"
]
FieldTypeNotes
dbsnpstring arraydbSNP rsIDs

DECIPHER

"decipher":[
{
"chromosome":"1",
"begin":13516,
"end":91073,
"numDeletions":27,
"deletionFrequency":0.675,
"numDuplications":27,
"duplicationFrequency":0.675,
"sampleSize":40,
"reciprocalOverlap": 0.27555,
"annotationOverlap": 0.5901
}
],
FieldTypeNotes
chromosomeintEnsembl-style chromosome names
beginint1-based position
endint1-based position
numDeletionsint# of observed deletions
deletionFrequencyfloatdeletion frequency
numDuplicationsint# of observed duplications
duplicationFrequencyfloatduplication frequency
sampleSizeinttotal # of samples
reciprocalOverlapfloatRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap
annotationOverlapfloatRange: 0 - 1. E.g. 0.57 would indicate a 57% annotation overlap

GERP

"gerpScore": 1.27
FieldTypeNotes
gerpScorefloatRange: -∞ to +∞

GME Variome

"gmeVariome":{
"allAc":10,
"allAn":202,
"allAf":0.049504,
"failedFilter":true
}
FieldTypeNotes
allAcintGME allele count
allAnintGME allele number
allAffloatGME allele frequency
failedFilterboolTrue if this variant failed any filters

gnomAD

"gnomad":{ 
"coverage":20,
"allAf":0.190317,
"maleAf":0.193,
"femaleAf": 0.1935,
"afrAf":0.222876,
"amrAf":0.121394,
"easAf":0.239802,
"finAf":0.136833,
"nfeAf":0.181282,
"asjAf":0.258278,
"othAf":0.186094,
"allAn":30796,
"maleAn":15096,
"femaleAn":15700
"afrAn":8664,
"amrAn":832,
"easAn":1618,
"finAn":3486,
"nfeAn":14916,
"asjAn":302,
"othAn":978,
"allAc":5861,
"maleAc":2930,
"femaleAc": 2931,
"afrAc":1931,
"amrAc":101,
"easAc":388,
"finAc":477,
"nfeAc":2704,
"asjAc":78,
"othAc":182,
"allHc":561,
"afrHc":208,
"amrHc":6,
"easHc":42,
"finHc":31,
"nfeHc":242,
"asjHc":13,
"othHc":19,
"maleHc":280,
"femaleHc":281,
"controlsAllAf":0.190317,
"controlsAllAn":30796,
"controlsAllAc":5861,
"lowComplexityRegion":true,
"failedFilter":true
}
FieldTypeNotes
coverageintaverage coverage (non-negative integer values)
allAffloatallele frequency for all populations. Range: 0 - 1.0
maleAffloatallele frequency for male population. Range: 0 - 1.0
femaleAffloatallele frequency for female population. Range: 0 - 1.0
controlsAllAffloatallele frequency for the controls subset. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
maleAcintallele count for male population. Integer.
femaleAcintallele count for female population. Integer.
controlsAllAcintallele count for the controls subset. Integer.
allAnintallele number for all populations. Non-zero integer.
maleAnintallele number for male population. Non-zero integer.
femaleAnintallele number for female population. Non-zero integer.
controlsAllAnintallele number for the controls subset. Non-zero integer.
allHcintcount of homozygous individuals for all populations. Non-negative integer.
maleHcintcount of homozygous individuals for male population. Non-negative integer.
femaleHcintcount of homozygous individuals for female population. Non-negative integer.
afrAffloatallele frequency for the African / African American population. Range: 0 - 1.0
afrAcintallele count for the African / African American population. Integer.
afrAnintallele number for the African / African American population. Non-zero integer.
afrHcintcount of homozygous individuals for African / African American population. Non-negative integer.
amrAffloatallele frequency for the Latino population. Range: 0 - 1.0
amrAcintallele count for the Latino population. Integer.
amrAnintallele number for the Latino population. Non-zero integer.
amrHcintcount of homozygous individuals for Latino population. Non-negative integer.
easAffloatallele frequency for the East Asian population. Range: 0 - 1.0
easAcintallele count for the East Asian population. Integer.
easAnintallele number for the East Asian population. Non-zero integer.
easHcintcount of homozygous individuals for East Asian population. Non-negative integer.
finAffloatallele frequency for the Finnish population. Range: 0 - 1.0
finAcintallele count for the Finnish population. Integer.
finAnintallele number for the Finnish population. Non-zero integer.
finHcintcount of homozygous individuals for Finnish population. Non-negative integer
nfeAffloatallele frequency for the Non-Finnish European population. Range: 0 - 1.0
nfeAcintallele count for the Non-Finnish European population. Integer.
nfeAnintallele number for the Non-Finnish European population. Non-zero integer.
nfeHcintcount of homozygous individuals for Non-Finnish European population. Non-negative integer
othAffloatallele frequency for the Other population. Range: 0 - 1.0
othAcintallele count for the Other population. Integer.
othAnintallele number for the Other population. Non-zero integer.
othHcintcount of homozygous individuals for Other population. Non-negative integer
asjAffloatallele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0
asjAcintallele count for the Ashkenazi Jewish population Integer.
asjAnintallele number for the Ashkenazi Jewish population. Non-zero integer.
asjHcintcount of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer
sasAffloatallele frequency for the South Asian population. Range: 0 - 1.0
sasAcintallele count for the South Asian population Integer.
sasAnintallele number for the South Asian population. Non-zero integer.
sasHcintcount of homozygous individuals for the South Asian population. Non-negative integer.
failedFilterboolTrue if this variant failed any filters (Note: we do not list the failed filters)
lowComplexityRegionboolTrue if this variant is located in a low complexity region.

MITOMAP

"mitomap":[ 
{
"refAllele":"G",
"altAllele":"A",
"diseases":[
"Bipolar disorder",
"Melanoma"
],
"hasHomoplasmy":false,
"hasHeteroplasmy":true,
"status":"Reported",
"clinicalSignificance":"confirmed pathogenic",
"scorePercentile":83.30,
"numGenBankFullLengthSeqs":2,
"pubMedIds":["2316527","6299878","6301949"],
"isAlleleSpecific":true
}
]
FieldTypeNotes
refAllelestring
altAllelestring
diseasesstring arrayassociated diseases
hasHomoplasmyboolean
hasHeteroplasmyboolean
statusstringrecord status
clinicalSignificancestringpredicted pathogenicity
scorePercentilefloatMitoTIP score
numGenBankFullLengthSeqsinteger# of GenBank full-length sequences
pubMedIdsstring array
isAlleleSpecificbooleantrue when the current variant alternate allele matches the MITOMAP alternate allele

Primate AI

"primateAI":[
{
"hgnc":"TP53",
"scorePercentile":0.3,
}
]
FieldTypeNotes
hgncstring
scorePercentilefloatrange: 0 - 1.0

REVEL

"revel":{ 
"score":0.027
}
FieldTypeNotes
scorefloatRange: 0 - 1.0

Splice AI

"spliceAI":[ 
{
"hgnc":"BLCAP",
"acceptorGainDistance":-3,
"acceptorGainScore":0.3,
"donorLossDistance":7,
"donorLossScore":0.9
},
{
"hgnc":"NNAT",
"acceptorGainDistance":-1,
"acceptorGainScore":0.2,
"donorGainDistance":-2,
"donorGainScore":0.3
}
]
FieldTypeNotes
hgncstringHGNC gene symbol
acceptorGainDistanceint± bp from current position
acceptorGainScorefloatrange: 0 - 1.0. 1 decimal place
acceptorLossDistanceint± bp from current position
acceptorLossScorefloatrange: 0 - 1.0. 1 decimal place
donorGainDistanceint± bp from current position
donorGainScorefloatrange: 0 - 1.0. 1 decimal place
donorLossDistanceint± bp from current position
donorLossScorefloatrange: 0 - 1.0. 1 decimal place

TOPMed

"topmed":{ 
"allAc":20,
"allAn":125568,
"allAf":0.000159,
"allHc":0,
"failedFilter":true
}
FieldTypeNotes
allAcintTOPMed allele count
allAnintTOPMed allele number. Non-zero integer.
allAffloatTOPMed allele frequency (computed by Nirvana)
allHcintTOPMed homozygous count
failedFilterboolTrue if this variant failed any filters

Genes

Nirvana repots gene annotations for all genes that have an overlapping variant with the exception of flanking variants (i.e. variants that only cause upstream_gene_variant or downstream_gene_variant).

"genes":[
{
"name":"MSH6",
"hgncId":7329,
"summary":"This gene encodes a member of the DNA mismatch repair MutS family. In E. coli, the MutS protein helps in the recognition of mismatched nucleotides prior to their repair. A highly conserved region of approximately 150 aa, called the Walker-A adenine nucleotide binding motif, exists in MutS homologs. The encoded protein heterodimerizes with MSH2 to form a mismatch recognition complex that functions as a bidirectional molecular switch that exchanges ADP and ATP as DNA mismatches are bound and dissociated. Mutations in this gene may be associated with hereditary nonpolyposis colon cancer, colorectal cancer, and endometrial cancer. Transcripts variants encoding different isoforms have been described. [provided by RefSeq, Jul 2013]",
/* this is where gene-level data sources can be found e.g. OMIM */
}
]
FieldTypeNotes
namestringHGNC gene symbol
hgncIdintHGNC ID
summarystringshort description of the gene from OMIM

OMIM

"omim":[ 
{
"mimNumber":600678,
"geneName":"MutS, E. coli, homolog of, 6",
"description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",
"phenotypes":[
{
"mimNumber":614350,
"phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",
"description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal dominant"
]
},
{
"mimNumber":608089,
"phenotype":"Endometrial cancer, familial",
"mapping":"molecular basis of the disorder is known"
},
{
"mimNumber":276300,
"phenotype":"Mismatch repair cancer syndrome",
"description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal recessive"
],
"comments" : [
"contribute to susceptibility to multifactorial disorders or to susceptibility to infection",
"unconfirmed or possibly spurious mapping"
]
}
]
}
]
FieldTypeNotes
mimNumberintOMIM ID for gene
geneNamestringgene name
descriptionstring
phenotypesobject arraysee Phenotype entry below

Phenotype

FieldTypeNotes
mimNumberint
phenotypestring
descriptionstring
mappingstringsee possible values below
inheritancestring arraysee possible values below
commentsstring arraysee possible values below

Mapping

  1. disorder was positioned by mapping of the wild type gene
  2. disease phenotype itself was mapped
  3. molecular basis of the disorder is known
  4. disorder is a chromosome deletion or duplication syndrome

Inheritance

  • autosomal recessive
  • autosomal dominant

Comments

  • contributes to the susceptibility to multifactorial disorders
  • variations that lead to apparently abnormal laboratory test values
  • unconfirmed mapping

gnomAD LoF Gene Metrics

"gnomAD":{ 
"pLi":1.00e0,
"pNull":8.94e-40,
"pRec":1.84e-16,
"synZ":-8.44e-2,
"misZ":5.96e-1,
"loeuf":1.13e0
}
FieldTypeNotes
pLifloatprobability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)
pNullfloatprobability of being completely tolerant of loss of function variation (observed = expected)
pRecfloatprobability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)
synZfloatcorrected synonymous Z score
misZfloatcorrected missense Z score
loeuffloatloss of function observed/expected upper bound fraction (LOEUF)

ClinGen Disease Validity

"clingenGeneValidity":[
{
"diseaseId":"MONDO_0007893",
"disease":"Noonan syndrome with multiple lentigines",
"classification":"no reported evidence",
"classificationDate":"2018-06-07"
},
{
"diseaseId":"MONDO_0015280",
"disease":"cardiofaciocutaneous syndrome",
"classification":"no reported evidence",
"classificationDate":"2018-06-07"
}
]
FieldTypeNotes
clingenGeneValidityobject
diseaseIdstringMonarch Disease Ontology ID (MONDO)
diseasestringdisease label
classificationstringsee below for possible values
classificationDatestringyyyy-MM-dd

classification

  • no reported evidence
  • disputed
  • limited
  • moderate
  • definitive
  • strong
  • refuted
  • no known disease relationship
- - - - \ No newline at end of file diff --git a/3.18/index.html b/3.18/index.html deleted file mode 100644 index 1627db6a..00000000 --- a/3.18/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Introduction | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

Nirvana provides clinical-grade annotation of genomic variants (SNVs, MNVs, insertions, deletions, indels, STRs, gene fusions, and SVs (including CNVs). It can be run as a stand-alone package, as an AWS Lambda function, or integrated into larger software tools that require variant annotation.

The input to Nirvana are VCFs and the output is a structured JSON representation of all annotation and sample information (as extracted from the VCF). Nirvana handles multiple alternate alleles and multiple samples with ease.

The software is being developed under a rigorous SDLC and testing process to ensure accuracy of the results and enable embedding in other software with regulatory needs. Nirvana uses a continuous integration pipeline where millions of variant annotations are monitored against baseline values daily.

Fun Fact

Nirvana is a backronym for NImble and Robust VAriant aNnotAtor

What does Nirvana annotate?

We use Sequence Ontology consequences to describe how each variant impacts a given transcript:

In addition, we also use external data sources to provide additional context for each variant:

Licensing

Code

Nirvana source code is provided under the GPLv3 license. Nirvana includes several third party packages provided under other open source licenses, please see Dependencies for additional details.

Data

The data used by Nirvana is publicly available, however some data sources have special restrictions on use by non-academic entities.

Nirvana Team

Active Team

The Nirvana team works on the core functionality, AWS annotation services, in addition to keeping the annotation data sources up-to-date.

Current members of the Nirvana team are listed in alphabetical order below.

Fahd Siddiqui

Joined our team back in December 2021 and brings even more cloud and ML experience to our team.

Joseph Platzer

Test Lead. Joins Nirvana with a history of building sequencing tools and keeping the customer first.

Michael Strömberg

Nirvana founder and now ever grateful Nirvana cheerleader to those who actually write code for it.

Ningxin Ouyang

Our newest addition to the team with a wealth of experience in transcript factor footprinting.

Rajat Shuvro Roy

Lead developer. Loves to speed up things and make services available to all interested users.

Honorary Alumni

Nirvana would never be what it is today without the huge contributions from these folks who have moved on to bigger and greater things.

Haochen Li

Detail-oriented quick thinker that keeps cool even in the most stressful situations. Now working as a Senior Bioinformatics Data Scientist at GRAIL.

Julien Lajugie

Julien is a legend around these parts. When he's not taking down opponents in Taekwondo or melting riffs in his rock band, he's demolishing bugs and making the world a better place.

Shuli Kang

Oncology bioinformatician from USC before joining our team at Illumina. Now working as a Senior Bioinformatics Scientist at Novartis Gene Therapies.

Yu Jiang

Biostatistics genius from Duke University before joining our team at Illumina. Now working as a Research Engineer at Facebook AI Research.
- - - - \ No newline at end of file diff --git a/3.18/introduction/covid19/index.html b/3.18/introduction/covid19/index.html deleted file mode 100644 index 267d84bc..00000000 --- a/3.18/introduction/covid19/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Annotating COVID-19 | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

Annotating COVID-19

The Nirvana development team is mainly focused on providing annotations for the human genome. This focus allows us to maximize our resources towards understanding human health.

However, nothing in our architecture prevents us from supporting other genomes. Earlier this year, we had an opportunity to put that statement to the test - we added support for annotating the SARS-CoV-2 genome, the virus that causes the COVID-19 disease.

In addition to normal transcript annotation, we also supply:

  • allele frequencies
  • protein domains
SARS-CoV-2 Galaxy Project

The allele frequencies used by Nirvana were provided by the SARS-CoV-2 Galaxy Project. This is an international effort that provides ongoing analysis of COVID-19 using Galaxy, BioConda, and public research infrastructures.

Getting Nirvana

If you don't have Nirvana already, please consult our Getting Started page first.

Downloading the COVID-19 data files

Here's a data zip file containing new gene models, reference, and external data sources for SARS-CoV-2:

Just go to the directory that contains your Nirvana Data directory.

cd ~/Nirvana
curl -O https://illumina.github.io/NirvanaDocumentation/files/Covid19Data.zip
unzip Covid19Data.zip

Download a COVID-19 VCF file

Here's a COVID-19 VCF file you can play around with:

curl -O https://illumina.github.io/NirvanaDocumentation/files/Covid19Mutations.vcf.gz

Running Nirvana

Once you have downloaded the data sets, use the following command to annotate your VCF:

dotnet bin/Release/netcoreapp2.1/Nirvana.dll \
-c Data/Cache/SARS-CoV-2/SARS-CoV-2 \
--sd Data/SupplementaryAnnotation/SARS-CoV-2 \
-r Data/References/SARS-CoV-2.ASM985889v3.dat \
-i Covid19Mutations.vcf.gz \
-o Covid19Mutations
  • the -c argument specifies the cache prefix
  • the --sd argument specifies the supplementary annotation directory
  • the -r argument specifies the compressed reference path
  • the -i argument specifies the input VCF path
  • the -o argument specifies the output filename prefix

When running Nirvana, performance metrics are shown as it evaluates each chromosome in the input VCF file:

---------------------------------------------------------------------------
Nirvana (c) 2020 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0
---------------------------------------------------------------------------

Initialization Time Positions/s
---------------------------------------------------------------------------
Cache 00:00:00.0
SA Position Scan 00:00:00.0 1763

Reference Preload Annotation Variants/s
---------------------------------------------------------------------------
NC_045512 00:00:00.0 00:00:00.1 173

Summary Time Percent
---------------------------------------------------------------------------
Initialization 00:00:00.0 2.0 %
Preload 00:00:00.0 0.3 %
Annotation 00:00:00.1 6.0 %

Time: 00:00:01.5

The output will be a JSON file called Covid19Mutations.json.gz. Here's the full JSON file.

Investigating the Results

Here's an example of what a COVID-19 variant looks like in the JSON output:

{
"chromosome":"NC_045512.2",
"position":27323,
"refAllele":"C",
"altAlleles":[
"T"
],
"filters":[
"PASS"
],
"proteinDomains":[
{
"start":27202,
"end":27384,
"proteinId":"YP_009724394.1",
"domainId":"cl13556",
"domainName":"Sars6 super family",
"reciprocalOverlap":0.00546,
"annotationOverlap":0.00546
}
],
"variants":[
{
"vid":"NC_045512.2-27323-C-T",
"chromosome":"NC_045512.2",
"begin":27323,
"end":27323,
"refAllele":"C",
"altAllele":"T",
"variantType":"SNV",
"hgvsg":"NC_045512.2:g.27323C>T",
"alleleFrequency":{
"refAllele":"C",
"altAllele":"T",
"allAc":8,
"allAn":1058,
"allAf":0.007561
},
"transcripts":[
{
"transcript":"YP_009724394.1",
"source":"RefSeq",
"bioType":"protein_coding",
"codons":"tCt/tTt",
"aminoAcids":"S/F",
"cdnaPos":"122",
"cdsPos":"122",
"exons":"1/1",
"proteinPos":"41",
"geneId":"43740572",
"hgnc":"ORF6",
"consequence":[
"missense_variant"
],
"hgvsc":"YP_009724394.1:c.122C>T",
"hgvsp":"YP_009724394.1:p.(Ser41Phe)",
"proteinId":"YP_009724394.1"
},
{
"transcript":"YP_009724395.1",
"source":"RefSeq",
"bioType":"protein_coding",
"geneId":"43740573",
"hgnc":"ORF7a",
"consequence":[
"upstream_gene_variant"
],
"proteinId":"YP_009724395.1"
}
]
}
]
}
- - - - \ No newline at end of file diff --git a/3.18/introduction/dependencies/index.html b/3.18/introduction/dependencies/index.html deleted file mode 100644 index 49cc67c8..00000000 --- a/3.18/introduction/dependencies/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Dependencies | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

Dependencies

All of the following dependencies have been included in this repository.

NameLicenseUsage
Amazon.LambdaApacheAWS extensions for .NET CLI
AWSSDKApacheAWS Lambda, S3, SNS support
Json.NETMITJASIX utility
libdeflateMITBlockCompression library
MoqBSDMocking framework for unit tests
NDesk.OptionsMIT/X11CommandLine library
xUnitApacheUnit testing framework
zlib-ngzlibBlockCompression library
zstdBSDBlockCompression library
- - - - \ No newline at end of file diff --git a/3.18/introduction/getting-started/index.html b/3.18/introduction/getting-started/index.html deleted file mode 100644 index 81e846cf..00000000 --- a/3.18/introduction/getting-started/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Getting Started | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

Getting Started

Nirvana is written in C# using .NET Core (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files.

tip

Nirvana currently uses .NET Core 3.1 or later. Please make sure that you have the most current runtime from the .NET Core downloads page.

Quick Start

If you want to get started right away, we've created a script that downloads Nirvana, compiles it, and starts annotating a test file:

curl -O https://illumina.github.io/NirvanaDocumentation/files/TestNirvana.sh
bash ./TestNirvana.sh

We have verified that this script works on Windows (using Git Bash or WSL), Linux, and Mac OS X.

Getting Nirvana

Compile from Source

The following will grab the latest version of Nirvana from GitHub and compile it using the .NET Core compiler:

git clone https://github.com/Illumina/Nirvana.git
cd Nirvana
dotnet build -c Release

GitHub Release Notes

Alternatively, you can grab the latest binaries from our GitHub Releases page:

mkdir -p Nirvana/Data
cd Nirvana
unzip Nirvana-3.16.1-dotnet-3.1.0.zip

Docker

You can find us on Docker Hub under annotation/nirvana:

caution

We think Docker is fantastic. However, because our data files are usually accessed through a Docker volume, there is a noticeable performance penalty when running Nirvana in Docker.

mkdir -p Nirvana/Data
cd Nirvana
docker pull annotation/nirvana:3.14

For Docker, we have special instructions for running the Downloader:

sudo docker run --rm -it -v Data:/scratch annotation/nirvana:3.14 dotnet \
/opt/nirvana/Downloader.dll --ga GRCh37 -o /scratch

Similarly, we have special instructions for running Nirvana (Here's a toy VCF in case you need it):

sudo docker run --rm -it -v Data:/scratch annotation/nirvana:3.14 dotnet \
/opt/nirvana/Nirvana.dll -c /scratch/Cache/GRCh37/Both \
-r /scratch/References/Homo_sapiens.GRCh37.Nirvana.dat \
--sd /scratch/SupplementaryAnnotation/GRCh37 \
-i /scratch/HiSeq.10000.vcf.gz -o /scratch/HiSeq

Downloading the data files

To download the latest data sources (or update the ones that you already have), use the following command to automate the download from S3:

dotnet bin/Release/netcoreapp3.1/Downloader.dll \
--ga GRCh37 \
-o Data
  • the --ga argument specifies the genome assembly which can be GRCh37, GRCh38, or both.
  • the -o argument specifies the output directory
Glitches in the Matrix

Every once in a while, the download process does not go smoothly. Perhaps the internet connection cut out or you ran out of disk space. The Downloader attempts to detect these situations by checking the file sizes at the very end. If you see that a file was marked truncated, try fixing the root cause and running the downloader again.

tip

From time to time, you can re-run the Downloader to get the latest annotation files. It will only download the files that changed.

Download a test VCF file

Here's a toy VCF file you can play around with:

curl -O https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.vcf.gz

Running Nirvana

Once you have downloaded the data sets, use the following command to annotate your VCF:

dotnet bin/Release/netcoreapp3.1/Nirvana.dll \
-c Data/Cache/GRCh37/Both \
--sd Data/SupplementaryAnnotation/GRCh37 \
-r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \
-i HiSeq.10000.vcf.gz \
-o HiSeq.10000
  • the -c argument specifies the cache prefix
  • the --sd argument specifies the supplementary annotation directory
  • the -r argument specifies the compressed reference path
  • the -i argument specifies the input VCF path
  • the -o argument specifies the output filename prefix

When running Nirvana, performance metrics are shown as it evaluates each chromosome in the input VCF file:

---------------------------------------------------------------------------
Nirvana (c) 2021 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.16.1
---------------------------------------------------------------------------

Initialization Time Positions/s
---------------------------------------------------------------------------
Cache 00:00:01.2
SA Position Scan 00:00:00.1 55,270

Reference Preload Annotation Variants/s
---------------------------------------------------------------------------
chr1 00:00:00.1 00:00:01.5 6,323

Summary Time Percent
---------------------------------------------------------------------------
Initialization 00:00:01.3 23.9 %
Preload 00:00:00.1 2.9 %
Annotation 00:00:01.5 27.2 %

Peak memory usage: 1.434 GB
Time: 00:00:05.2

The output will be a JSON file called HiSeq.10000.json.gz. Here's the full JSON file.

- - - - \ No newline at end of file diff --git a/3.18/introduction/parsing-json/index.html b/3.18/introduction/parsing-json/index.html deleted file mode 100644 index a43d819f..00000000 --- a/3.18/introduction/parsing-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Parsing Nirvana JSON | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

Parsing Nirvana JSON

Why JSON?

VCF is a fantastic file format that was developed during the methods development activities within the 1000 Genomes Project. Prior to that, variant callers were outputting information into a variety of tab-delimited formats. Sometimes based on existing standards (like GFF), while most were proprietary. The primary intent of VCF files was to provide a human-readable, standardized representation of genetic variants. Similar to SAM/BAM files, VCF files used BCF files as their binary counterpart.

In the very beginning, Nirvana offered VCF output for annotation. While many variant annotators offer an option to output VCF files, one could argue if they are still human-readable. Here's an example from a VCF file produced by VEP v102:

chr3    107840527   .   A   ATTTTTTTTT,AT,ATTTTTTTT 153.51  PASS    AN=6;MQ=244.10;
SOR=1.739;QD=2.24;DP=57;AF=0.500,0.167,0.333;FS=0.000;AC=3,1,2;CSQ=TTTTTTTTT|
intron_variant&non_coding_transcript_variant|MODIFIER|LINC00635|ENSG00000241469|
Transcript|ENST00000608506.6|lncRNA||4/4|ENST00000608506.6:n.622-132_622-124dup|||||||
rs35564779||-1||HGNC|HGNC:27184|||5|||||||||Ensembl|||||||||||||||||||||||||||||||||||
|||||||||0.792|-0.109757,T|intron_variant&non_coding_transcript_variant|MODIFIER|
LINC00635|ENSG00000241469|Transcript|ENST00000608506.6|lncRNA||4/4|
ENST00000608506.6:n.622-124dup|||||||rs35564779||-1||HGNC|HGNC:27184|||5|||||||||
Ensembl||||||||||||||||||||||||||||||||||||||||||||0.932|-0.075622,TTTTTTTT|
intron_variant&non_coding_transcript_variant|MODIFIER|LINC00635|ENSG00000241469|
Transcript|ENST00000608506.6|lncRNA||4/4|ENST00000608506.6:n.622-131_622-124dup|||||||
rs35564779||-1||HGNC|HGNC:27184|||5|||||||||Ensembl|||||||||||||||||||||||||||||||||||
|||||||||0.808|-0.105490,TTTTTTTTT|intron_variant&non_coding_transcript_variant|
MODIFIER|LINC00636|ENSG00000240423|Transcript|ENST00000649048.1|lncRNA||2/3|
ENST00000649048.1:n.179+5223_179+5231dup|||||||rs35564779||1||HGNC|HGNC:27702|||||||||
|||Ensembl||||||||||||||||||||||||||||||||||||||||||||0.792|-0.109757, (etc.)

Originally Nirvana used the same VCF notation as VEP uses above. The problem is that you end up with a large amount of text that is difficult to parse out by eye and requires the use of several delimiters to divide the information into useful segments. When we originally annotated this variant using VEP, this single variant used 488,909 bytes (almost ½ MB). Surprisingly, we found that this broke some downstream tools that had preconceived notions of how long a single line could be in a VCF file.

caution

Whitespace is not allowed in the VCF INFO field. This means that if you wanted to express a gene description from OMIM: "HRAS PROTOONCOGENE, GTPase; HRAS", you would need to replace the spaces with something else like an underline. You would also need to hope that the VCF parser correctly handles embedded commas and semicolons in the description.

What do other annotators use?

Unfortunately, file format standardization has not made it all the way to variant annotation yet. The GA4GH Annotation group had many discussions on the topic several years ago. While a set of JSON schemas were created in that effort, there wasn't enough momentum to make this a new standard.

While there is some overlap in general file formats (JSON vs VCF vs TSV), none of those are compatible with each other. I.e. the VCF representation in VEP and snpEff is different just like the JSON schemas used by VEP, Nirvana, and GA4GH are different.

SourceFormats
VEPJSON, TSV, VCF
snpEffVCF
AnnovarTSV
NirvanaJSON
GA4GHJSON

We are interested in working together with others in the annotation space to develop a common annotation file format. Our belief is that this would accelerate methods development and benchmarking activities within annotation much in the same way the creation of SAM/BAM & VCF/BCF accelerated secondary analysis development.

What do we gain by using JSON?

  • JSON files are better at showing hierarchical and other relational data. For example when we output ClinVar data, we often want to output several overlapping RCV entries (variants coupled with a disease phenotype). In each, we would want to output a list of phenotypes, clinical significance, etc. That is difficult to accomplish in a human-readable way using VCF files (without resorting to growing lexicon of delimiters).
  • JSON files use JavaScript data types, while VCF INFO fields don't directly have data types. Instead, external metadata located in the VCF header is required to indicated the preferred data type.
  • JSON files are more verbose. Often this is seen as a negative, but compression largely compensates for this. Given the following excerpt from the VCF example above HGNC:27184|||5|||||||||Ensembl it's not immediately obvious what the 5 refers to (without checking the VCF header for details). With JSON files, you would always see a key name associated with a value.
  • JSON files can be natively imported into different search and analytics solutions like Elasticsearch and Snowflake.
  • JSON strings do not have any limitations on the use of whitespace.

Parsing JSON

Our JSON files are organized similarly to original VCF variants:

Nirvana JSON files can get very large and sometimes we receive feedback that a bioinformatician tried to read the JSON file into Python or R resulting in a program that ran out of available RAM. This happens because those parsers try to load everything into memory all at once.

To get around those issues, we play some clever tricks with newlines that enables our users to parse our JSON files quickly and efficiently.

Organization

Our JSON file is arranged as follows:

  • the header section is located on the first line
  • each line after that corresponds to a position (same as a row in a VCF file)
    • until you reach the genes section ],"genes":[
  • each line after that corresponds to a gene
    • until you reach the end ]}

Knowing this, you can load each position line as an independent JSON object and extract the information you need.

Jupyter Notebook

To demonstrate this, we have put together a Jupyter notebook demonstrating how to do this in Python and a R version as well.

JASIX

One of the tools that we really like in the VCF ecosystem is tabix. Unfortunately, tabix only works for tab-delimited file formats. As a result, we created a similar tool for Nirvana JSON files called JASIX.

Here's an example of how you might use JASIX:

dotnet bin/Release/netcoreapp2.1/Jasix.dll -i dragen.json.gz -q chr1:942450-942455
  • the -i argument specifies the Nirvana JSON path
  • the -q argument specifies a genomic range (you can use as many of these as you want)

JASIX also includes additional options for showing the Nirvana header or for extracting different sections (like the genes section).

The output from JASIX is compliant JSON object shown in pretty-printed form:

{"positions":[
{
"chromosome": "chr1",
"position": 942451,
"refAllele": "T",
"altAlleles": [
"C"
],
"quality": 484.23,
"filters": [
"PASS"
],
"cytogeneticBand": "1p36.33",
"samples": [
{
"genotype": "1/1",
"variantFrequencies": [
1
],
"totalDepth": 21,
"genotypeQuality": 60,
"alleleDepths": [
0,
21
]
},
{
"genotype": "1/1",
"variantFrequencies": [
1
],
"totalDepth": 32,
"genotypeQuality": 93,
"alleleDepths": [
0,
32
]
},
{
"genotype": "1/1",
"variantFrequencies": [
1
],
"totalDepth": 36,
"genotypeQuality": 105,
"alleleDepths": [
0,
36
]
}
],
"variants": [
{
"vid": "1-942451-T-C",
"chromosome": "chr1",
"begin": 942451,
"end": 942451,
"refAllele": "T",
"altAllele": "C",
"variantType": "SNV",
"hgvsg": "NC_000001.11:g.942451T>C",
"phylopScore": -0.1,
"clinvar": [
{
"id": "VCV000836156.1",
"reviewStatus": "criteria provided, single submitter",
"significance": [
"uncertain significance"
],
"refAllele": "T",
"altAllele": "T",
"lastUpdatedDate": "2020-08-20"
},
{
"id": "RCV001037211.1",
"variationId": 836156,
"reviewStatus": "criteria provided, single submitter",
"alleleOrigins": [
"germline"
],
"refAllele": "T",
"altAllele": "T",
"phenotypes": [
"not provided"
],
"medGenIds": [
"CN517202"
],
"significance": [
"uncertain significance"
],
"lastUpdatedDate": "2020-08-20",
"pubMedIds": [
"28492532"
]
}
],
"dbsnp": [
"rs6672356"
],
"gnomad": {
"coverage": 25,
"allAf": 0.999855,
"allAn": 123742,
"allAc": 123724,
"allHc": 61853,
"afrAf": 0.999416,
"afrAn": 10278,
"afrAc": 10272,
"afrHc": 5133,
"amrAf": 0.99995,
"amrAn": 20008,
"amrAc": 20007,
"amrHc": 10003,
"easAf": 1,
"easAn": 6054,
"easAc": 6054,
"easHc": 3027,
"finAf": 1,
"finAn": 8696,
"finAc": 8696,
"finHc": 4348,
"nfeAf": 0.999899,
"nfeAn": 49590,
"nfeAc": 49585,
"nfeHc": 24790,
"asjAf": 1,
"asjAn": 7208,
"asjAc": 7208,
"asjHc": 3604,
"sasAf": 0.99967,
"sasAn": 18160,
"sasAc": 18154,
"sasHc": 9074,
"othAf": 1,
"othAn": 3748,
"othAc": 3748,
"othHc": 1874,
"maleAf": 0.9999,
"maleAn": 69780,
"maleAc": 69773,
"maleHc": 34883,
"femaleAf": 0.999796,
"femaleAn": 53962,
"femaleAc": 53951,
"femaleHc": 26970,
"controlsAllAf": 0.999815,
"controlsAllAn": 48654,
"controlsAllAc": 48645
},
"oneKg": {
"allAf": 1,
"afrAf": 1,
"amrAf": 1,
"easAf": 1,
"eurAf": 1,
"sasAf": 1,
"allAn": 5008,
"afrAn": 1322,
"amrAn": 694,
"easAn": 1008,
"eurAn": 1006,
"sasAn": 978,
"allAc": 5008,
"afrAc": 1322,
"amrAc": 694,
"easAc": 1008,
"eurAc": 1006,
"sasAc": 978
},
"primateAI": [
{
"hgnc": "SAMD11",
"scorePercentile": 0.87
}
],
"revel": {
"score": 0.145
},
"topmed": {
"allAf": 0.999809,
"allAn": 125568,
"allAc": 125544,
"allHc": 62760
},
"transcripts": [
{
"transcript": "ENST00000420190.6",
"source": "Ensembl",
"bioType": "protein_coding",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"downstream_gene_variant"
],
"proteinId": "ENSP00000411579.2"
},
{
"transcript": "ENST00000342066.7",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "1110",
"cdsPos": "1027",
"exons": "10/14",
"proteinPos": "343",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000342066.7:c.1027T>C",
"hgvsp": "ENSP00000342313.3:p.(Trp343Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000342313.3",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000618181.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "732",
"cdsPos": "652",
"exons": "7/11",
"proteinPos": "218",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000618181.4:c.652T>C",
"hgvsp": "ENSP00000480870.1:p.(Trp218Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000480870.1",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000622503.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "1110",
"cdsPos": "1030",
"exons": "10/14",
"proteinPos": "344",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000622503.4:c.1030T>C",
"hgvsp": "ENSP00000482138.1:p.(Trp344Arg)",
"isCanonical": true,
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000482138.1",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000618323.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "cTg/cCg",
"aminoAcids": "L/P",
"cdnaPos": "712",
"cdsPos": "632",
"exons": "8/12",
"proteinPos": "211",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000618323.4:c.632T>C",
"hgvsp": "ENSP00000480678.1:p.(Leu211Pro)",
"polyPhenScore": 0,
"polyPhenPrediction": "unknown",
"proteinId": "ENSP00000480678.1",
"siftScore": 0.03,
"siftPrediction": "deleterious - low confidence"
},
{
"transcript": "ENST00000616016.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "ccT/ccC",
"aminoAcids": "P",
"cdnaPos": "944",
"cdsPos": "864",
"exons": "9/13",
"proteinPos": "288",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"synonymous_variant"
],
"hgvsc": "ENST00000616016.4:c.864T>C",
"hgvsp": "ENST00000616016.4:c.864T>C(p.(Pro288=))",
"proteinId": "ENSP00000478421.1"
},
{
"transcript": "ENST00000618779.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "921",
"cdsPos": "841",
"exons": "9/13",
"proteinPos": "281",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000618779.4:c.841T>C",
"hgvsp": "ENSP00000484256.1:p.(Trp281Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000484256.1",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000616125.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "783",
"cdsPos": "703",
"exons": "8/12",
"proteinPos": "235",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000616125.4:c.703T>C",
"hgvsp": "ENSP00000484643.1:p.(Trp235Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000484643.1",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000620200.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "cTg/cCg",
"aminoAcids": "L/P",
"cdnaPos": "427",
"cdsPos": "347",
"exons": "5/9",
"proteinPos": "116",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000620200.4:c.347T>C",
"hgvsp": "ENSP00000484820.1:p.(Leu116Pro)",
"polyPhenScore": 0,
"polyPhenPrediction": "unknown",
"proteinId": "ENSP00000484820.1",
"siftScore": 0.16,
"siftPrediction": "tolerated - low confidence"
},
{
"transcript": "ENST00000617307.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "867",
"cdsPos": "787",
"exons": "9/13",
"proteinPos": "263",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000617307.4:c.787T>C",
"hgvsp": "ENSP00000482090.1:p.(Trp263Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000482090.1",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "NM_152486.2",
"source": "RefSeq",
"bioType": "protein_coding",
"codons": "Cgg/Cgg",
"aminoAcids": "R",
"cdnaPos": "1107",
"cdsPos": "1027",
"exons": "10/14",
"proteinPos": "343",
"geneId": "148398",
"hgnc": "SAMD11",
"consequence": [
"synonymous_variant"
],
"hgvsc": "NM_152486.2:c.1027T>C",
"hgvsp": "NM_152486.2:c.1027T>C(p.(Arg343=))",
"isCanonical": true,
"proteinId": "NP_689699.2"
},
{
"transcript": "ENST00000341065.8",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "750",
"cdsPos": "751",
"exons": "8/12",
"proteinPos": "251",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000341065.8:c.750T>C",
"hgvsp": "ENSP00000349216.4:p.(Trp251Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000349216.4",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000455979.1",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "507",
"cdsPos": "508",
"exons": "4/7",
"proteinPos": "170",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000455979.1:c.507T>C",
"hgvsp": "ENSP00000412228.1:p.(Trp170Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000412228.1",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000478729.1",
"source": "Ensembl",
"bioType": "processed_transcript",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"downstream_gene_variant"
]
},
{
"transcript": "ENST00000474461.1",
"source": "Ensembl",
"bioType": "retained_intron",
"cdnaPos": "389",
"exons": "3/4",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"non_coding_transcript_exon_variant"
],
"hgvsc": "ENST00000474461.1:n.389T>C"
},
{
"transcript": "ENST00000466827.1",
"source": "Ensembl",
"bioType": "retained_intron",
"cdnaPos": "191",
"exons": "2/2",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"non_coding_transcript_exon_variant"
],
"hgvsc": "ENST00000466827.1:n.191T>C"
},
{
"transcript": "ENST00000464948.1",
"source": "Ensembl",
"bioType": "retained_intron",
"cdnaPos": "286",
"exons": "1/2",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"non_coding_transcript_exon_variant"
],
"hgvsc": "ENST00000464948.1:n.286T>C"
},
{
"transcript": "NM_015658.3",
"source": "RefSeq",
"bioType": "protein_coding",
"geneId": "26155",
"hgnc": "NOC2L",
"consequence": [
"downstream_gene_variant"
],
"isCanonical": true,
"proteinId": "NP_056473.2"
},
{
"transcript": "ENST00000483767.5",
"source": "Ensembl",
"bioType": "retained_intron",
"geneId": "ENSG00000188976",
"hgnc": "NOC2L",
"consequence": [
"downstream_gene_variant"
]
},
{
"transcript": "ENST00000327044.6",
"source": "Ensembl",
"bioType": "protein_coding",
"geneId": "ENSG00000188976",
"hgnc": "NOC2L",
"consequence": [
"downstream_gene_variant"
],
"isCanonical": true,
"proteinId": "ENSP00000317992.6"
},
{
"transcript": "ENST00000477976.5",
"source": "Ensembl",
"bioType": "retained_intron",
"geneId": "ENSG00000188976",
"hgnc": "NOC2L",
"consequence": [
"downstream_gene_variant"
]
},
{
"transcript": "ENST00000496938.1",
"source": "Ensembl",
"bioType": "processed_transcript",
"geneId": "ENSG00000188976",
"hgnc": "NOC2L",
"consequence": [
"downstream_gene_variant"
]
}
]
}
]
}
]}
- - - - \ No newline at end of file diff --git a/3.18/utilities/jasix/index.html b/3.18/utilities/jasix/index.html deleted file mode 100644 index c08cce06..00000000 --- a/3.18/utilities/jasix/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Jasix | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

Jasix

Overview

The Jasix index is aimed at providing TABIX like indexing capabilities for the Nirvana JSON output.

Creating the Jasix index

The Jasix index (that comes in a .jsi) file is generated on-the-fly with Nirvana output. It can also be generated independently by running the Jasix command line utility on the JSON output file. Please note that the Jasix utility can only consume JSON files that follow the Nirvana JSON output format. The following code blocks demonstrate the help menu and index generating functionalities of Jasix.

Example

dotnet Jasix.dll -h
USAGE: dotnet Jasix.dll -i in.json.gz [options]
Indexes a Nirvana annotated JSON file

OPTIONS:
--header, -t print also the header lines
--only-header, -H print only the header lines
--chromosomes, -l list chromosome names
--index, -c create index
--in, -i <VALUE> input
--out, -o <VALUE> compressed output file name (default:console)
--query, -q <VALUE> query range
--section, -s <VALUE> complete section (positions or genes) to output
--help, -h displays the help menu
--version, -v displays the version
dotnet Jasix.dll --index -i input.json.gz
---------------------------------------------------------------------------
Jasix (c) 2017 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 2.0.0
---------------------------------------------------------------------------

Ref Sequence chrM indexed in 00:00:00.2
Ref Sequence chr1 indexed in 00:00:05.8
Ref Sequence chr2 indexed in 00:00:06.0
.
.
.
Peak memory usage: 28.5 MB
Time: 00:01:14.8

Querying the index

The Jasix query format is chr:start-end. If not provided, it assumes end=start. If only chr is provided, all entries for that chromosome will be provided.

dotnet Jasix.dll -i input.json.gz chrM:5000-7000
{
"positions":[
{
"chromosome":"chrM",
"refAllele":"C",
"position":5581,
"quality":3070.00,
"filters":[
"LowGQXHomSNP"
],
"altAlleles":[
"T"
],
"samples":[
{
"variantFreq":1,
"totalDepth":1625,
"genotypeQuality":1,
"alleleDepths":[
0,
1625
],
"genotype":"1/1"
}
],
"variants":[
{
"altAllele":"T",
"refAllele":"C",
"begin":5581,
"chromosome":"chrM",
"end":5581,
"variantType":"SNV",
"vid":"MT:5581:T"
}
]
},
{
"chromosome":"chrM",
"refAllele":"A",
"position":6267,
"quality":1637.00,
"filters":[
"LowGQXHetSNP"
],
"altAlleles":[
"G"
],
"samples":[
{
"variantFreq":0.6873,
"totalDepth":323,
"genotypeQuality":1,
"alleleDepths":[
101,
222
],
"genotype":"0/1"
}
],
"variants":[
{
"altAllele":"G",
"refAllele":"A",
"begin":6267,
"chromosome":"chrM",
"end":6267,
"variantType":"SNV",
"vid":"MT:6267:G"
}
]
}
]
}

The default output stream is Console. However, if an output filename is provided, Jasix outputs the results to that file in a bgzip compressed format. The output is always a valid JSON entry. If requested (via -t option) the header of the indexed file will be provided. Multiple queries can be submitted in the same command and the output will contain them within the same "positions" block in order of the submitted queries (Warning: if the queries are out of order, or overlapping, the output will be out or order and intersecting).

dotnet Jasix.dll -i input.json.gz  -q chrM:5000-7000 -q chrM:8500-9500 -t
{
"header":{
"annotator":"Illumina Annotation Engine 1.6.2.0",
"creationTime":"2017-08-30 11:42:57",
"genomeAssembly":"GRCh37",
"schemaVersion":6,
"dataVersion":"84.24.39",
"dataSources":[
{
"name":"VEP",
"version":"84",
"description":"Ensembl",
"releaseDate":"2017-01-16"
}
],
"samples":[
"Mother"
]
},
"positions":[
{
"chromosome":"chrM",
"refAllele":"C",
"position":5581,
"quality":3070.00,
"filters":[
"LowGQXHomSNP"
],
"altAlleles":[
"T"
],
"samples":[
{
"variantFreq":1,
"totalDepth":1625,
"genotypeQuality":1,
"alleleDepths":[
0,
1625
],
"genotype":"1/1"
}
],
"variants":[
{
"altAllele":"T",
"refAllele":"C",
"begin":5581,
"chromosome":"chrM",
"end":5581,
"variantType":"SNV",
"vid":"MT:5581:T"
}
]
},
{
"chromosome":"chrM",
"refAllele":"A",
"position":6267,
"quality":1637.00,
"filters":[
"LowGQXHetSNP"
],
"altAlleles":[
"G"
],
"samples":[
{
"variantFreq":0.6873,
"totalDepth":323,
"genotypeQuality":1,
"alleleDepths":[
101,
222
],
"genotype":"0/1"
}
],
"variants":[
{
"altAllele":"G",
"refAllele":"A",
"begin":6267,
"chromosome":"chrM",
"end":6267,
"variantType":"SNV",
"vid":"MT:6267:G"
}
]
},
{
"chromosome":"chrM",
"refAllele":"G",
"position":8702,
"quality":3070.00,
"filters":[
"LowGQXHomSNP"
],
"altAlleles":[
"A"
],
"samples":[
{
"variantFreq":0.9987,
"totalDepth":1534,
"genotypeQuality":1,
"alleleDepths":[
2,
1532
],
"genotype":"1/1"
}
],
"variants":[
{
"altAllele":"A",
"refAllele":"G",
"begin":8702,
"chromosome":"chrM",
"end":8702,
"variantType":"SNV",
"vid":"MT:8702:A"
}
]
},
{
"chromosome":"chrM",
"refAllele":"G",
"position":9378,
"quality":3070.00,
"filters":[
"LowGQXHomSNP"
],
"altAlleles":[
"A"
],
"samples":[
{
"variantFreq":1,
"totalDepth":1018,
"genotypeQuality":1,
"alleleDepths":[
0,
1018
],
"genotype":"1/1"
}
],
"variants":[
{
"altAllele":"A",
"refAllele":"G",
"begin":9378,
"chromosome":"chrM",
"end":9378,
"variantType":"SNV",
"vid":"MT:9378:A"
}
]
}
]
}

Extracting a section

The Nirvana JSON file has three sections: header, positions and genes. Header can be printed using the -H option. If you are interested in only the positions or genes section, you can use the -s or --section option.

dotnet Jasix.dll -i input.json.gz  -s genes
[
{
"name": "ABCB10",
"omim": [
{
"mimNumber": 605454,
"geneName": "ATP-binding cassette, subfamily B, member 10"
}
]
},
{
"name": "ABCD3",
"omim": [
{
"mimNumber": 170995,
"geneName": "ATP-binding cassette, subfamily D, member 3 (peroxisomal membrane protein 1, 70kD)",
"description": "The ABCD3 gene encodes a peroxisomal membrane transporter involved in the transport of branched-chain fatty acids and C27 bile acids into the peroxisome; the latter function is a crucial step in bile acid biosynthesis (summary by Ferdinandusse et al., 2015).",
"phenotypes": [
{
"mimNumber": 616278,
"phenotype": "?Bile acid synthesis defect, congenital, 5",
"mapping": "molecular basis of the disorder is known",
"inheritances": [
"Autosomal recessive"
],
"comments": [
"unconfirmed or possibly spurious mapping"
]
}
]
}
]
}
]
- - - - \ No newline at end of file diff --git a/3.18/utilities/sautils/index.html b/3.18/utilities/sautils/index.html deleted file mode 100644 index c9b31873..00000000 --- a/3.18/utilities/sautils/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -SAUtils | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.18

SAUtils

Overview

SAUtils is a utility tool that creates binary supplementary annotation files (.nsa, .gsa, .npd, .nsi, etc.) from original data files (e.g. VCFs, TSVs, XML, HTML, etc.) for various data sources (e.g. ClinVar, dbSNP, gnomAD, etc.). These binary files can be fed into the Nirvana Annotation engine to provide supplementary annotations in the output.

The SAUtils Menu

SAUtils supports building binary files for many data sources. The help menu lists them out in the form of sub-commands.

dotnet Nirvana/bin/Debug/netcoreapp3.1/SAUtils.dll
---------------------------------------------------------------------------
SAUtils (c) 2021 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.18.0
---------------------------------------------------------------------------

Utilities focused on supplementary annotation

USAGE: dotnet SAUtils.dll <command> [options]

COMMAND: AaCon create AA conservation database
ancestralAllele create Ancestral allele database from 1000Genomes data
ClinGen create ClinGen database
clinvar create ClinVar database
concat merge multiple NSA files for the same data source having non-overlapping regions
Cosmic create COSMIC database
CosmicSv create COSMIC SV database
CosmicFusion create COSMIC gene fusion database
CustomGene create custom gene annotation database
CustomVar create custom variant annotation database
Dann create DANN database
Dbsnp create dbSNP database
Dgv create DGV database
DiseaseValidity create disease validity database
DosageMapRegions create dosage map regions
DosageSensitivity create dosage sensitivity database
DownloadOmim download OMIM database
ExacScores create ExAC gene scores database
ExtractMiniSA extracts mini SA
ExtractMiniXml extracts mini XML (ClinVar)
FilterSpliceNetTsv filter SpliceNet predictions
FusionCatcher create FusionCatcher database
Gerp create GERP conservation database
GlobalMinor create global minor allele database
GME Variome create GME Variome database
Gnomad create gnomAD database
Gnomad-lcr create gnomAD low complexity region database
GnomadGeneScores create gnomAD gene scores database
Index edit an index file
MitoHet create mitochondrial Heteroplasmy database
MitomapSvDb create MITOMAP structural variants database
MitomapVarDb create MITOMAP small variants database
Omim create OMIM database
OneKGen create 1000 Genome small variants database
OneKGenSv create 1000 Genomes structural variants database
OneKGenSvVcfToBed convert 1000 Genomes structural variants VCF file into a BED-like file
PhyloP create PhyloP database
PrimateAi create PrimateAI database
RefMinor create Reference Minor database from 1000 Genome
RemapWithDbsnp remap a VCF file given source and destination rsID mappings
Revel create REVEL database
SpliceAi create SpliceAI database
TopMed create TOPMed database

You can get further detailed help for each sub-command by typing in the subcommand. For example:

dotnet Nirvana/bin/Debug/netcoreapp3.1/SAUtils.dll clinvar
---------------------------------------------------------------------------
SAUtils (c) 2021 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.18.0
---------------------------------------------------------------------------

USAGE: dotnet SAUtils.dll clinvar [options]
Creates a supplementary database with ClinVar annotations

OPTIONS:
--ref, -r <VALUE> compressed reference sequence file
--rcv, -i <VALUE> ClinVar Full release XML file
--vcv, -c <VALUE> ClinVar Variation release XML file
--out, -o <VALUE> output directory
--help, -h displays the help menu
--version, -v displays the version

More detailed instructions about each sub-command can be found in documentation of respective data sources.

Output File Formats

The format of the binary file SAUtils produce depend on the type of annotation data represented in that file (e.g. small variant vs. structural variants vs. genes).

File ExtensionDescription
.nsaSmall variant annotations (e.g. SNV, insertions, deletions, etc.)
.gsaCompact variant annotations (e.g. SNV, insertions, deletions, etc.)
.idxIndex file
.nsiInterval annotations (e.g. SV, CNVs, intervals)
.ngaGene annotations
.npdConservation scores
.rmaReference Minor allele
.gfsGene fusions source
.gfjGene fusions JSON
.schemaJSON schema
- - - - \ No newline at end of file diff --git a/3.2.5/core-functionality/gene-fusions/index.html b/3.2.5/core-functionality/gene-fusions/index.html deleted file mode 100644 index 511f831b..00000000 --- a/3.2.5/core-functionality/gene-fusions/index.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - -Gene Fusion Detection | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.2.5

Gene Fusion Detection

Overview

Gene fusions often result from large genomic rearrangements such as structural variants. While WGS secondary analysis pipelines typically contain alignment and variant calling stages, very few of them contain dedicated gene fusion callers. When they are included, they are usually associated with RNA-Seq pipelines where gene fusions can be readily observed.

Since gene fusions are frequently observed in cancer and since many sequencing experiments do not include paired RNA-Seq data, we have added gene fusion detection and annotation to Nirvana.

The rich diversity in gene fusion architectures and their likely mechanisms can be seen below:

Publication

Kumar-Sinha, C., Kalyana-Sundaram, S. & Chinnaiyan, A.M. Landscape of gene fusions in epithelial cancers: seq and ye shall find. Genome Med 7, 129 (2015)

Approach

Nirvana uses structural variant calls to evaluate if they form either putative intra-chromosomal or inter-chromosomal gene fusions.

For each originating transcript, we report the following:

  • originating intron or exon number
  • for each partner transcript fused with the originating transcript, we report:
    • HGVS coding notation
    • partner intron or exon number

Variant Types

Specifically we can identify gene fusions from the following structural variant types:

  • deletions (<DEL>)
  • tandem_duplications (<DUP:TANDEM>)
  • inversions (<INV>)
  • translocation breakpoints (AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[)

Criteria

The following criteria must be met for Nirvana to identify a gene fusion:

  1. Both transcripts must possess a coding region
  2. After accounting for genomic rearrangements, both transcripts must have the same orientation
  3. Both transcripts must be from the same transcript source (i.e. we won't mix and match between RefSeq and Ensembl transcripts)
  4. Both transcripts must belong to different genes
  5. Both transcripts cannot have a coding region that already overlaps without the variant (i.e. in cases where two genes naturally overlap, we don't want to call a gene fusion)
  6. The coding regions from the two genes must overlap -:::

ETV6/RUNX1 Example

ETV6/RUNX1 is the most common gene fusion in childhood B-cell precursor acute lymphoblastic leukemia (ALL). Patients with this translocation are associated with a good prognosis and excellent response to treatment.

VCF

Here's a simplified representation of the translocation breakends called by the Manta structural variant caller:

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
chr12 12026270 . C [chr21:36420865[C . PASS SVTYPE=BND
chr12 12026305 . A A]chr21:36420571] . PASS SVTYPE=BND
chr21 36420571 . C C]chr12:12026305] . PASS SVTYPE=BND
chr21 36420865 . C [chr12:12026270[C . PASS SVTYPE=BND

Interpreting translocation breakends

REFALTMeaning
st[p[piece extending to the right of p is joined after t
st]p]reverse comp piece extending left of p is joined after t
s]p]tpiece extending to the left of p is joined before t
s[p[treverse comp piece extending right of p is joined before t

Visualization

JSON Output

The annotation for the first variant in the VCF looks like this:

    {
"chromosome": "chr12",
"position": 12026270,
"refAllele": "C",
"altAlleles": [
"[chr21:36420865[C"
],
"filters": [
"PASS"
],
"cytogeneticBand": "12p13.2",
"clingen": [
{
"chromosome": "12",
"begin": 173786,
"end": 34835837,
"variantType": "copy_number_gain",
"id": "nsv995956",
"clinicalInterpretation": "pathogenic",
"phenotypes": [
"Decreased calvarial ossification",
"Delayed gross motor development",
"Feeding difficulties",
"Frontal bossing",
"Morphological abnormality of the central nervous system",
"Patchy alopecia"
],
"phenotypeIds": [
"HP:0002007",
"HP:0002011",
"HP:0002194",
"HP:0002232",
"HP:0005474",
"HP:0011968",
"MedGen:C0232466",
"MedGen:C1862862",
"MedGen:CN001816",
"MedGen:CN001820",
"MedGen:CN001989",
"MedGen:CN004852"
],
"observedGains": 1,
"validated": true
}
],
"variants": [
{
"vid": "12-12026270-C-[chr21:36420865[C",
"chromosome": "chr12",
"begin": 12026270,
"end": 12026270,
"isStructuralVariant": true,
"refAllele": "C",
"altAllele": "[chr21:36420865[C",
"variantType": "translocation_breakend",
"transcripts": [
{
"transcript": "ENST00000396373.4",
"source": "Ensembl",
"bioType": "protein_coding",
"introns": "5/7",
"geneId": "ENSG00000139083",
"hgnc": "ETV6",
"consequence": [
"transcript_variant",
"unidirectional_gene_fusion"
],
"geneFusion": {
"intron": 5,
"fusions": [
{
"hgvsc": "RUNX1{ENST00000437180.1}:c.1_58+274_ETV6{ENST00000396373.4}:c.1009+3367_1359",
"intron": 2
},
{
"hgvsc": "RUNX1{ENST00000300305.3}:c.1_58+274_ETV6{ENST00000396373.4}:c.1009+3367_1359",
"intron": 1
},
{
"hgvsc": "RUNX1{ENST00000482318.1}:c.1_58+274_ETV6{ENST00000396373.4}:c.1009+3367_1359",
"intron": 2
},
{
"hgvsc": "RUNX1{ENST00000486278.2}:c.?_156195_ETV6{ENST00000396373.4}:c.1009+3367_1359",
"intron": 2
},
{
"hgvsc": "RUNX1{ENST00000455571.1}:c.1_58+274_ETV6{ENST00000396373.4}:c.1009+3367_1359",
"intron": 2
},
{
"hgvsc": "RUNX1{ENST00000475045.2}:c.1_58+274_ETV6{ENST00000396373.4}:c.1009+3367_1359",
"intron": 11
},
{
"hgvsc": "RUNX1{ENST00000416754.1}:c.1_58+274_ETV6{ENST00000396373.4}:c.1009+3367_1359",
"intron": 2
}
]
},
"isCanonical": true,
"proteinId": "ENSP00000379658.3"
},
{
"transcript": "NM_001987.4",
"source": "RefSeq",
"bioType": "protein_coding",
"introns": "5/7",
"geneId": "2120",
"hgnc": "ETV6",
"consequence": [
"transcript_variant",
"unidirectional_gene_fusion"
],
"geneFusion": {
"intron": 5,
"fusions": [
{
"hgvsc": "RUNX1{NM_001754.4}:c.1_58+274_ETV6{NM_001987.4}:c.1009+3367_1359",
"intron": 2
}
]
},
"isCanonical": true,
"proteinId": "NP_001978.1"
}
]
}
]
}

Consequences

When a gene fusion is identified, we add the following Sequence Ontology consequence:

              "consequence": [
"transcript_variant",
"unidirectional_gene_fusion"
],

Introns & Exons

In this section we describe all the pairwise gene fusions that obey the criteria outlined above. In the case of ENST00000396373.4, there 7 other Ensembl transcripts that would produce a gene fusion. For NM_001987.4, there was only one transcript (NM_001754.4) that produce a gene fusion.

In each case, Nirvana outputs which intron or exon contained the breakpoint in both of the transcripts that form the gene fusion.

HGVS coding notation

Finally, Nirvana also describes the gene fusion using HGVS c. notation:

                "fusions": [
{
"hgvsc": "RUNX1{NM_001754.4}:c.1_58+274_ETV6{NM_001987.4}:c.1009+3367_1359",
"intron": 2
}

This means that gene fusion uses CDS positions 1-58 from NM_001754.4 (RUNX1) and CDS positions 1009-1359 from NM_001987.4 (ETV6). 1009+3367 indicates that the fusion occurred 3367 bp within intron 2.

- - - - \ No newline at end of file diff --git a/3.2.5/core-functionality/variant-ids/index.html b/3.2.5/core-functionality/variant-ids/index.html deleted file mode 100644 index 41701164..00000000 --- a/3.2.5/core-functionality/variant-ids/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Variant IDs | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.2.5

Variant IDs

Overview

Many downstream tools use a variant identifier to store annotation results.

Deprecated

This initial variant ID (VID) scheme was designed to be parsimonious and was not meant to be used to reconstitute the original VCF variant. In later versions of Nirvana, we migrated to the identifier scheme used at the Broad Institute (with some extensions to handle structural variants).

Conventions
  • all chromosomes use Ensembl style notation (i.e. 22 instead of chr22)
  • for a reference variant (i.e. no alt allele), replace the period (.) with the reference base
  • padding bases are used, neither the reference nor alternate allele can be empty
  • some large variant callers lazily output N for the reference allele. If this is the case, replace it with the true reference base

SNV

VCF Example

chr1    69224   .   A   C   .   .   .

Format

chromosome:position:alternate allele

VID Example

  • 1:69224:C

Insertion

VCF Example

chr1    69567   .   A   AT  .   .   .

Format

chromosome:position after insertion:position before insertion:alternate allele OR MD5 hash

If more than 32 bases are being inserted, the VID scheme uses an MD5 checksum instead

VID Example

  • 1:69568:69567:T
  • 1:69568:69567:B9ECE18C950AFBFA6B0FDBFA4FF731D3

Deletion

VCF Example

chr1    136647  .   GG  G   .   .   .

Format

chromosome:start position:end position

VID Example

  • 1:136645:136645

Delins

VCF Example

chr1    965025  .   GCAGTGCATGGTGCTGTGAGATCAGCATGTGTG   GTGCAGTGCATGGTGCTGTGAGATCAGCA   .   .   .

Format

chromosome:start position:end position:inserted bases

If more than 32 bases are being inserted, the VID scheme uses an MD5 checksum instead

VID Example

  • 1:965026:965057:TGCAGTGCATGGTGCTGTGAGATCAGCA
  • 1:965026:965057:5DC27E17BE0B0F184325DC8654E34F1F

MNV

VCF Example

chr1    979210  .   TGG TTT .   .   .

Format

chromosome:start position:end position:alternate allele

If more than 32 bases are being inserted, the VID scheme uses an MD5 checksum instead

VID Example

  • 1:979211:979212:TT
  • 1:979211:979212:DF1F3EDB9115ACB0A1E04209B7A9937B

CNV

VCF Example

chr1    854895  .   N   <CN0>,<CN3> .   PASS    SVTYPE=CNV;END=861879;CNVLEN=6984;CIPOS=-291,291;CIEND=-291,291 GT:RC:BC:CN:MCC:MCCQ:QS:FT:DQ   1/2:165.40:12:3:3:16.80:16.71:PASS:.
chr1 814866 . N <CNV> 4 q10;CLT10kb SVTYPE=CNV;END=824517 RC:BC:CN 214:7:4

Format

chromosome:start position:end position:copy number or "CNV"

VID Example

  • 1:854896:861879:3
  • 1:814867:824517:CNV

Inversion (SV)

VCF Example

chr1    17051724    .   C   <INV>   3070    MaxDepth    END=234912187;SVTYPE=INV;SVLEN=217860463    GT:GQ:PR:SR 0/1:3070:77,69:84,76

Format

chromosome:start position:end position:Inverse

VID Example

  • 1:17051725:234912187:Inverse

Translocation (SV)

VCF Example

chr1    797265  .   G   G]chr8:245687]  55  PASS    SVTYPE=BND;CIPOS=0,31   GT:GQ:PR:SR 0/1:55:39,6:20,3

Format

chromosome 1:breakpoint 1:orientation 1:chromosome 2:breakpoint 2:orientation 2

VID Example

  • 1:797265:+:8:245687:-

Deletion (SV)

VCF Example

chr1    2053194 .   G   <DEL>   38  PASS    END=2055480;SVTYPE=DEL;SVLEN=-2286;IMPRECISE;CIPOS=-143,144;CIEND=-102,102  GT:GQ:PR    0/1:38:3,5

Format

chromosome:start position:end position

VID Example

  • 1:2053195:2055480

Insertion (SV)

VCF Example

chr1    1925144 .   G   <INS>   1439    PASS    END=1925144;SVTYPE=INS;CIPOS=0,14;CIEND=0,14    GT:GQ:PR:SR 1/1:72:2,7:0,33

Format

chromosome:start position:end position:INS

VID Example

  • 1:1925145:1925144:INS

Tandem Duplication (SV)

VCF Example

chr1    2454149 .   G   <DUP:TANDEM>    976 MaxDepth    END=2454244;SVTYPE=DUP;SVLEN=95;CIPOS=0,10;CIEND=0,10   GT:GQ:PR:SR 0/1:976:6,0:80,52

Format

chromosome:start position:end position:TDUP

VID Example

  • 1:2454150:2454244:TDUP
- - - - \ No newline at end of file diff --git a/3.2.5/data-sources/1000Genomes-snv-json/index.html b/3.2.5/data-sources/1000Genomes-snv-json/index.html deleted file mode 100644 index 7a4ef5f9..00000000 --- a/3.2.5/data-sources/1000Genomes-snv-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -1000Genomes-snv-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.2.5

1000Genomes-snv-json

"oneKg":{
"allAf":0.200879,
"afrAf":0.210287,
"amrAf":0.139769,
"easAf":0.275794,
"eurAf":0.181909,
"sasAf":0.173824,
"allAn":5008,
"afrAn":1322,
"amrAn":694,
"easAn":1008,
"eurAn":1006,
"sasAn":978,
"allAc":1006,
"afrAc":278,
"amrAc":97,
"easAc":278,
"eurAc":183,
"sasAc":170
}
FieldTypeNotes
allAffloatallele frequency for all populations. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
allAnintallele number for all populations. Non-zero integer.
afrAffloatallele frequency for the African super population. Range: 0 - 1.0
afrAcintallele count for the African super population. Integer.
afrAnintallele number for the African super population. Non-zero integer.
amrAffloatallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
amrAcintallele count for the Ad Mixed American super population. Integer.
amrAnintallele number for the Ad Mixed American super population. Non-zero integer.
easAffloatallele frequency for the East Asian super population. Range: 0 - 1.0
easAcintallele count for the East Asian super population. Integer.
easAnintallele number for the East Asian super population. Non-zero integer.
eurAffloatallele frequency for the European super population. Range: 0 - 1.0
eurAcintallele count for the European super population. Integer.
eurAnintallele number for the European super population. Non-zero integer.
sasAffloatallele frequency for the South Asian super population. Range: 0 - 1.0
sasAcintallele count for the South Asian super population. Integer.
sasAnintallele number for the South Asian super population. Non-zero integer.
- - - - \ No newline at end of file diff --git a/3.2.5/data-sources/1000Genomes-sv-json/index.html b/3.2.5/data-sources/1000Genomes-sv-json/index.html deleted file mode 100644 index 68f1bd49..00000000 --- a/3.2.5/data-sources/1000Genomes-sv-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -1000Genomes-sv-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.2.5

1000Genomes-sv-json

"oneKg":[
{
"chromosome":"1",
"begin":1595369,
"end":1612441,
"variantType": "copy_number_variation",
"id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",
"allAn": 5008,
"allAc": 2702,
"allAf": 0.539537,
"afrAf": 0.6052,
"amrAf": 0.3675,
"eurAf": 0.5357,
"easAf": 0.5368,
"sasAf": 0.5797,
"reciprocalOverlap": 0.07555
}
],
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring
idstring
allAnfloating pointallele number for all populations. Non-zero integer.
allAcfloating pointallele count for all populations. Integer.
allAffloating pointallele frequency for all populations. Range: 0 - 1.0
afrAffloating pointallele frequency for the African super population. Range: 0 - 1.0
amrAffloating pointallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
eurAffloating pointallele frequency for the European super population. Range: 0 - 1.0
easAfintegerallele frequency for the East Asian super population. Range: 0 - 1.0
sasAfintegerallele frequency for the South Asian super population. Range: 0 - 1.0
reciprocalOverlapfloating pointrange: 0 - 1.
- - - - \ No newline at end of file diff --git a/3.2.5/data-sources/1000Genomes/index.html b/3.2.5/data-sources/1000Genomes/index.html deleted file mode 100644 index de43d934..00000000 --- a/3.2.5/data-sources/1000Genomes/index.html +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - - -1000 Genomes | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.2.5

1000 Genomes

Overview

The goal of the 1000 Genomes Project was to find most genetic variants with frequencies of at least 1% in the populations studied. It was the first project to sequence the genomes of a large number of people, to provide a comprehensive resource on human genetic variation. Data from the 1000 Genomes Project was quickly made available to the worldwide scientific community through freely accessible public databases.

Publication

Sudmant, P., Rausch, T., Gardner, E. et al. An integrated map of structural variation in 2,504 human genomes. Nature 526, 75–81 (2015). https://doi.org/10.1038/nature15394

Populations

Small Variants

VCF File Parsing

The original VCF files come with allele frequency fields (e.g. ALL_AF, AMR_AF) but we recompute them using allele counts and allele numbers in order to get 6 digit precision. The allele counts and allele numbers (e.g. AMR_AC, AMR_AN) are not expressed in the INFO field. Instead the genotypes need to be parsed to compute that information. Our team converted the original data to VCF entries with allele counts and allele numbers like the following.

#CHROM  POS ID  REF ALT QUAL    FILTER  INFO
1 15274 rs62636497 A G,T 100 PASS AC=1739,3210;AF=0.347244,0.640974;AN=5008;NS=2504;DP=23255;EAS_AF=0.4812,0.5188;AMR_AF=0.2752,0.7205;AFR_AF=0.323,0.6369;EUR_AF=0.2922,0.7078;SAS_AF=0.3497,0.6472;AA=g|||;VT=SNP;MULTI_ALLELIC;EAS_AN=1008;EAS_AC=485,523;EUR_AN=1006;EUR_AC=294,712;AFR_AN=1322;AFR_AC=427,842;AMR_AN=694;AMR_AC=191,500;SAS_AN=978;SAS_AC=342,633

The ancestral allele, if it exists, is the first value in the pipe separated AA fields (the Indel specific REF, ALT, IndelType fields are ignored).

We parse the VCF file and extract the following fields from INFO:

  • AA
  • AC
  • AN
  • EAS_AN
  • AMR_AN
  • AFR_AN
  • EUR_AN
  • SAS_AN
  • EAS_AC
  • AMR_AC
  • AFR_AC
  • EUR_AC
  • SAS_AC

Conflict Resolution

We have observed conflicting allele frequency information in the source. Take the following example:

#CHROM  POS ID  REF ALT QUAL    FILTER  INFO
1 20505705 rs35377696 C CTCTG,CTG,CTGTG 100 PASS AC=46,1513,152;AF=0.0091853,0.302117,0.0303514;
1 20505705 rs35377696 C CTG 100 PASS AC=4;AF=0.000798722;

That is, the variant 1-20505705-C-CTG has conflicting entries. To get an idea of how frequently we observe this, here is a table summarizing ChrX and all chromosomes. Note that almost all such entries are found in ChrX.

Chromosome# of alleles# of conflicting allelespercentage
chrX83480027330.33%
Total2141309827430.013%

Currently, we removed the allele frequency of the conflicting allele (i.e., insertion TG in the example) but keep allele frequencies of all other alleles in the VCF line.

Potential Alternate Solutions

  • Remove all alleles that are contained in the vcf lines which have conflicting allele. (Recommended by 1000 genome group Holly Zheng-Bradley, 7/29/2015)
  • Recalculate the allele frequency for the conflicting allele.
  • Pick the allele frequency that has the highest data support.

Download URL

GRCh37 -GRCh38

JSON Output

"oneKg":{
"allAf":0.200879,
"afrAf":0.210287,
"amrAf":0.139769,
"easAf":0.275794,
"eurAf":0.181909,
"sasAf":0.173824,
"allAn":5008,
"afrAn":1322,
"amrAn":694,
"easAn":1008,
"eurAn":1006,
"sasAn":978,
"allAc":1006,
"afrAc":278,
"amrAc":97,
"easAc":278,
"eurAc":183,
"sasAc":170
}
FieldTypeNotes
allAffloatallele frequency for all populations. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
allAnintallele number for all populations. Non-zero integer.
afrAffloatallele frequency for the African super population. Range: 0 - 1.0
afrAcintallele count for the African super population. Integer.
afrAnintallele number for the African super population. Non-zero integer.
amrAffloatallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
amrAcintallele count for the Ad Mixed American super population. Integer.
amrAnintallele number for the Ad Mixed American super population. Non-zero integer.
easAffloatallele frequency for the East Asian super population. Range: 0 - 1.0
easAcintallele count for the East Asian super population. Integer.
easAnintallele number for the East Asian super population. Non-zero integer.
eurAffloatallele frequency for the European super population. Range: 0 - 1.0
eurAcintallele count for the European super population. Integer.
eurAnintallele number for the European super population. Non-zero integer.
sasAffloatallele frequency for the South Asian super population. Range: 0 - 1.0
sasAcintallele count for the South Asian super population. Integer.
sasAnintallele number for the South Asian super population. Non-zero integer.

Structural Variants

VCF File Parsing

The VCF files contain entries like the following:

#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103
22 16050654 esv3647175;esv3647176;esv3647177;esv3647178 A <CN0>,<CN2>,<CN3>,<CN4> 100 PASS AC=9,87,599,20;AF=0.00179712,0.0173722,0.119609,0.00399361;AN=5008;CS=DUP_gs;END=16063474;NS=2504;SVTYPE=CNV;DP=22545;EAS_AF=0.001,0.0169,0.2361,0.0099;AMR_AF=0,0.0101,0.219,0.0072;AFR_AF=0.0061,0.0363,0.0053,0;EUR_AF=0,0.007,0.0944,0.003;SAS_AF=0,0.0082,0.1094,0.002;VT=SV GT 3|0 0|0 0|0 0|0 0|0 0|0 0|4

Please note that, CNVs are allele-specific. For example, HG00096 is effectively copy number 4, which would be a net gain on chr22.

1000 Genomes contains 5 types of structural variants:

  • CNV
  • DEL
  • DUP
  • INS
  • INV

Since data of 1000 genomes is provided in VCF format, we assume that the coordinates follow the vcf format, i.e., there is a padding base for symbolic alleles. So all the interval can be interpreted as [BEGIN+1, END]. -Similarly, for all other variant types except insertion, END is far larger than BEGIN. The distribution of BEGIN and END for insertions is summarized below.

Insertion issues

  • END = BEGIN for 6/165
  • END = BEGIN+2 for 93/165
  • END = BEGIN+3 for 11/165
  • END = BEGIN+4 for 11/165
  • END – BEGIN range from 5 to 1156 for others.

Converting VCF svTypes to SO sequence alterations

The svType will be captured in our JSON file under the sequenceAlteration key. Here's the translation we'll use according to svType in 1000 Genomes.

svTypeAlternative Alleles contain <CN*>sequenceAlteration
ALUFALSEmobile_element_insertion
DUPTRUEcopy_number_gain
CNVTRUEcopy_number_gain (observed_gains >0 and observed_losses =0)
copy_number_loss (observed_gains = 0 and observed_losses > 0)
copy_number_variation (otherwise)
DELTRUEcopy_number_loss
LINE1FALSEmobile_element_insertion
SVAFALSEmobile_element_insertion
INVFALSEinversion
INSFALSEinsertion

Exceptions

We discard structural variants without END

#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103
21 9495848 esv3646347 A <INS:ME:LINE1> 100 PASS AC=1543;AF=0.308107;AN=5008;CS=L1_umary;MEINFO=LINE1,5669,6005,+;NS=2504;SVLEN=336;SVTYPE=LINE1;TSD=null;DP=20015;EAS_AF=0.3125;AMR_AF=0.2911;AFR_AF=0.3026;EUR_AF=0.2922;SAS_AF=0.3395;VT=SV GT 0|0 1|1 1|0 0|1 1|0 1|0 0|0

CNVs in chrY

  • No other types of structural variants exist in chrY
  • Since copy number is provided in genotype field, we directly parse the copy number from "CN" field.
  • For most CNVs in chrY, the reference copy number is 1, but the refence number for CNVs in segmental duplication sites is 2 (<CN2> in the 2nd example). All segmental duplication calls have identifiers starting with GS_SD_M2.
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  HG00096 HG00101 HG00103 HG00105 HG00107 HG00108
Y 2888555 CNV_Y_2888555_3014661 T <CN2> 100 PASS AC=1;AF=0.000817661;AN=1223;END=3014661;NS=1233;SVTYPE=CNV;AMR_AF=0.0000;AFR_AF=0.0000;EUR_AF=0.0000;SAS_AF=0.0019;EAS_AF=0.0000;VT=SV GT:CN:CNL:CNP:CNQ:GP:GQ:PL 0:1:-1000,0,-58.45:-1000,0,-61.55:99:0,-61.55:99:0,585 0:1:-296.36,0,-16.6:-300.46,0,-19.7:99:0,-19.7:99:0,166 0:1:-1000,0,-39.44:-1000,0,-42.54:99:0,-42.54:99:0,394
Y 6128381 GS_SD_M2_Y_6128381_6230094_Y_9650284_9752225 C <CN1>,<CN3> 100 PASS AC=4,2;AF=0.00327065,0.00163532;AN=1223;END=6230094;NS=1233;SVTYPE=CNV;AMR_AF=0.0029,0.0029;AFR_AF=0.0016,0.0016;EUR_AF=0.0000,0.0000;SAS_AF=0.0038,0.0000;EAS_AF=0.0000,0.0000;VT=SV;EX_TARGET GT:CN:CNL:CNP:CNQ:GP:GQ 0:2:-1000,-138.78,0,-38.53:-1000,-141.27,0,-41.33:99:0,-141.27,-41.33:99 0:2:-1000,-53.32,0,-17.85:-1000,-55.81,0,-20.64:99:0,-55.81,-20.64:99 0:2:-1000,-71.83,0,-32.5:-1000,-74.32,0,-35.29:99:0,-74.32,-35.29:99 0:2:-1000,-60.96,0,-20.29:-1000,-63.45,0,-23.08:99:0,-63.45,-23.08:99 0:2:-1000,-77.6,0,-31.45:-1000,-80.09,0,-34.24:99:0,-80.09,-34.24:99

JSON Output

"oneKg":[
{
"chromosome":"1",
"begin":1595369,
"end":1612441,
"variantType": "copy_number_variation",
"id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",
"allAn": 5008,
"allAc": 2702,
"allAf": 0.539537,
"afrAf": 0.6052,
"amrAf": 0.3675,
"eurAf": 0.5357,
"easAf": 0.5368,
"sasAf": 0.5797,
"reciprocalOverlap": 0.07555
}
],
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring
idstring
allAnfloating pointallele number for all populations. Non-zero integer.
allAcfloating pointallele count for all populations. Integer.
allAffloating pointallele frequency for all populations. Range: 0 - 1.0
afrAffloating pointallele frequency for the African super population. Range: 0 - 1.0
amrAffloating pointallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
eurAffloating pointallele frequency for the European super population. Range: 0 - 1.0
easAfintegerallele frequency for the East Asian super population. Range: 0 - 1.0
sasAfintegerallele frequency for the South Asian super population. Range: 0 - 1.0
reciprocalOverlapfloating pointrange: 0 - 1.
- - - - \ No newline at end of file diff --git a/3.2.5/data-sources/clinvar-json/index.html b/3.2.5/data-sources/clinvar-json/index.html deleted file mode 100644 index 36205177..00000000 --- a/3.2.5/data-sources/clinvar-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -clinvar-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.2.5

clinvar-json

"clinvar":[
{
"id":"RCV000030258.4",
"reviewStatus":"reviewed by expert panel",
"alleleOrigins":[
"germline"
],
"refAllele":"G",
"altAllele":"A",
"phenotypes":[
"Lynch syndrome"
],
"medGenIds":[
"C1333990"
],
"omimIds":[
"120435"
],
"significance":[
"benign"
],
"lastUpdatedDate":"2017-05-01",
"isAlleleSpecific":true
}
]
FieldTypeNotes
idstringClinVar ID
reviewStatusstringsee possible values below
alleleOriginsstring arraysee possible values below
refAllelestring
altAllelestring
phenotypesstring array
medGenIdsstring arrayMedGen IDs
omimIdsstring arrayOMIM IDs
orphanetIdsstring arrayOrphanet IDs
significancestring arraysee possible values below
lastUpdatedDatestringyyyy-MM-dd
pubMedIdsstring arrayPubMed IDs
isAlleleSpecificbooltrue when the current variant alternate allele matches the ClinVar alternate allele

reviewStatus:

  • no assertion provided
  • no assertion criteria provided
  • criteria provided, single submitter
  • practice guideline
  • classified by multiple submitters
  • criteria provided, conflicting interpretations
  • criteria provided, multiple submitters, no conflicts
  • no interpretation for the single variant

alleleOrigins:

  • unknown
  • other
  • germline
  • somatic
  • inherited
  • paternal
  • maternal
  • de-novo
  • biparental
  • uniparental
  • not-tested
  • tested-inconclusive

significance:

  • uncertain significance
  • not provided
  • benign
  • likely benign
  • likely pathogenic
  • pathogenic
  • drug response
  • histocompatibility
  • association
  • risk factor
  • protective
  • affects
  • conflicting data from submitters
  • other
  • no interpretation for the single variant
  • conflicting interpretations of pathogenicity
- - - - \ No newline at end of file diff --git a/3.2.5/data-sources/clinvar/index.html b/3.2.5/data-sources/clinvar/index.html deleted file mode 100644 index c102c968..00000000 --- a/3.2.5/data-sources/clinvar/index.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - -ClinVar | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.2.5

ClinVar

Overview

ClinVar is a freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence. ClinVar thus facilitates access to and communication about the relationships asserted between human variation and observed health status, and the history of that interpretation.

Publication

Melissa J Landrum, Jennifer M Lee, Mark Benson, Garth R Brown, Chen Chao, Shanmuga Chitipiralla, Baoshan Gu, Jennifer Hart, Douglas Hoffman, Wonhee Jang, Karen Karapetyan, Kenneth Katz, Chunlei Liu, Zenith Maddipatla, Adriana Malheiro, Kurt McDaniel, Michael Ovetsky, George Riley, George Zhou, J Bradley Holmes, Brandi L Kattman, Donna R Maglott, ClinVar: improving access to variant interpretations and supporting evidence, Nucleic Acids Research, 46, Issue D1, 4 January 2018, Pages D1062–D1067, https://doi.org/10.1093/nar/gkx1153

RCV File

Example

Here's a full RCV entry.

Parsing

In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output.

ID

<ClinVarSet>
<ReferenceClinVarAssertion>
<ClinVarAccession Acc="RCV000000001" Version="2">
</ClinVarSet>

The Acc and Version fields are merged to form the ID (RCV000000001.2)

LastUpdatedDate

<ClinVarSet>
<ReferenceClinVarAssertion DateCreated="2012-08-13" DateLastUpdated="2016-02-17" ID="57604" >
</ClinVarSet>

Significance

<ClinVarSet>
<ReferenceClinVarAssertion>
<ClinicalSignificance DateLastEvaluated="1996-04-01">
<ReviewStatus>no assertion criteria provided</ReviewStatus>
<Description>Pathogenic</Description>
</ClinicalSignificance>
</ClinVarSet>

ReviewStatus

<ClinVarSet>
<ReferenceClinVarAssertion>
<ClinicalSignificance DateLastEvaluated="1996-04-01">
<ReviewStatus>no assertion criteria provided</ReviewStatus>
<Description>Pathogenic</Description>
</ClinicalSignificance>
</ClinVarSet>

Phenotypes

<ReferenceClinVarAssertion>
<TraitSet Type="Disease" ID="62">
<Trait Type="Disease">
<Name>
<ElementValue Type="Preferred">Joubert syndrome 9</ElementValue>
</Name>
</Trait>
</TraitSet>
</ReferenceClinVarAssertion>

We only use the field with Type="Preferred". Multiple phenotypes may be reported

Location and Variant Id

<ReferenceClinVarAssertion>
<GenotypeSet Type="CompoundHeterozygote" ID="424709">
<MeasureSet Type="Variant" ID="81">
<Measure Type="single nucleotide variant" ID="15120">
<SequenceLocation Assembly="GRCh38" AssemblyAccessionVersion="GCF_000001405.38"
AssemblyStatus="current" Chr="10" Accession="NC_000010.11" start="89222510"
stop="89222510" display_start="89222510" display_stop="89222510" variantLength="1"
positionVCF="89222510" referenceAlleleVCF="C" alternateAlleleVCF="T"/>
<SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25"
AssemblyStatus="previous" Chr="10" Accession="NC_000010.10" start="90982267"
stop="90982267" display_start="90982267" display_stop="90982267" variantLength="1"
positionVCF="90982267" referenceAlleleVCF="C" alternateAlleleVCF="T"/>
</Measure>
</MeasureSet>
</GenotypeSet>
</ReferenceClinVarAssertion>
  • The variant position is extracted from the fields for their respective assemblies.
  • Updated records contain positionVCF, referenceAlleleVCF and alternateAlleleVCF fields and when present, we use them to create the variant.
  • For older records, since "start' and "stop" fields are not always available, we use the "display_start" and "display_end" fields.
  • If a required allele is not available, we extract it from the reference sequence.
  • Only variants having a dbSNP id are extracted.
  • Note that a ClinVar accession may have multiple variants associated with it (possible in different locations)
  • VariantId is extracted from the MeasureSet attributes.

MedGen, OMIM, Orphanet IDs

<ReferenceClinVarAssertion>
<TraitSet Type="Disease" ID="175">
<Trait ID="3036" Type="Disease">
<XRef ID="C0086651" DB="MedGen"/>
<XRef ID="309297" DB="Orphanet"/>
<XRef ID="582" DB="Orphanet"/>
<XRef Type="MIM" ID="253000" DB="OMIM"/>
</Trait>
</TraitSet>
</ReferenceClinVarAssertion>

AlleleOrigins

<ClinVarAssertion>
<Origin>germline</Origin>
</ClinVarAssertion>

We only extract all Allele Origins from Submissions (SCV) entries.

PubMedIds

<ClinVarAssertion>
<ClinicalSignificance DateLastEvaluated="1996-04-01">
<Citation Type="general">
<ID Source="PubMed">12114475</ID>
</Citation>
</ClinicalSignificance>
<AttributeSet>
<Attribute Type="AssertionMethod">LMM Criteria</Attribute>
<Citation>
<ID Source="PubMed">24033266</ID>
</Citation>
</AttributeSet>
<ObservedIn>
<ObservedData ID="9727445">
<Citation Type="general">
<ID Source="PubMed">9113933</ID>
</Citation>
</ObservedData>
</ObservedIn>
<Citation Type="general">
<ID Source="PubMed">23757202</ID>
</Citation>
</ClinVarAssertion>

We only extract all Pubmed Ids from Submissions (SCV) entries.

Parsing Significance

Extracting significance(s) may involve parsing multiple fields. Take the following snippets into consideration.

<ClinicalSignificance DateLastEvaluated="1996-04-01">
<ReviewStatus>no assertion criteria provided</ReviewStatus>
<Description>Pathogenic</Description>
</ClinicalSignificance>

<ClinicalSignificance DateLastEvaluated="2016-10-13">
<ReviewStatus>criteria provided, multiple submitters, no conflicts</ReviewStatus>
<Description>Pathogenic/Likely pathogenic</Description>
</ClinicalSignificance>

<ClinicalSignificance DateLastEvaluated="2012-06-07">
<ReviewStatus>no assertion criteria provided</ReviewStatus>
<Description>Conflicting interpretations of pathogenicity</Description>
<Explanation DataSource="ClinVar" Type="public">Pathogenic(1);Uncertain significance(1)</Explanation>
</ClinicalSignificance>

Given the evidence, we converted the significance field into an array of strings which may be parsed out of the Descriptions or Explanation fields.

Varying Delimiters

The delimiters in each field may vary. Currently, the delimiters for Description are , and /. The delimiters for Explanation are ; and /.

Known Issues

Known Issues
  • The XML file contains ~1k more entries (out of 162K) than the VCF file
  • The XML file does not have a field indicating that a record is associated with the reference base - something that was present in VCF
  • The XML file contains entries (e.g. RCV000016645 version=1) which have IUPAC ambiguous bases ("R", "Y", "H", -etc.) as their alternate allele

Download URL

ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz

JSON Output

"clinvar":[
{
"id":"RCV000030258.4",
"reviewStatus":"reviewed by expert panel",
"alleleOrigins":[
"germline"
],
"refAllele":"G",
"altAllele":"A",
"phenotypes":[
"Lynch syndrome"
],
"medGenIds":[
"C1333990"
],
"omimIds":[
"120435"
],
"significance":[
"benign"
],
"lastUpdatedDate":"2017-05-01",
"isAlleleSpecific":true
}
]
FieldTypeNotes
idstringClinVar ID
reviewStatusstringsee possible values below
alleleOriginsstring arraysee possible values below
refAllelestring
altAllelestring
phenotypesstring array
medGenIdsstring arrayMedGen IDs
omimIdsstring arrayOMIM IDs
orphanetIdsstring arrayOrphanet IDs
significancestring arraysee possible values below
lastUpdatedDatestringyyyy-MM-dd
pubMedIdsstring arrayPubMed IDs
isAlleleSpecificbooltrue when the current variant alternate allele matches the ClinVar alternate allele

reviewStatus:

  • no assertion provided
  • no assertion criteria provided
  • criteria provided, single submitter
  • practice guideline
  • classified by multiple submitters
  • criteria provided, conflicting interpretations
  • criteria provided, multiple submitters, no conflicts
  • no interpretation for the single variant

alleleOrigins:

  • unknown
  • other
  • germline
  • somatic
  • inherited
  • paternal
  • maternal
  • de-novo
  • biparental
  • uniparental
  • not-tested
  • tested-inconclusive

significance:

  • uncertain significance
  • not provided
  • benign
  • likely benign
  • likely pathogenic
  • pathogenic
  • drug response
  • histocompatibility
  • association
  • risk factor
  • protective
  • affects
  • conflicting data from submitters
  • other
  • no interpretation for the single variant
  • conflicting interpretations of pathogenicity
- - - - \ No newline at end of file diff --git a/3.2.5/data-sources/dbsnp-json/index.html b/3.2.5/data-sources/dbsnp-json/index.html deleted file mode 100644 index 76354528..00000000 --- a/3.2.5/data-sources/dbsnp-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -dbsnp-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.2.5

dbsnp-json

"dbsnp":[
"rs1042821"
]
FieldTypeNotes
dbsnpstring arraydbSNP rsIDs
- - - - \ No newline at end of file diff --git a/3.2.5/data-sources/dbsnp/index.html b/3.2.5/data-sources/dbsnp/index.html deleted file mode 100644 index 1531013e..00000000 --- a/3.2.5/data-sources/dbsnp/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -dbSNP | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.2.5

dbSNP

Overview

dbSNP contains human single nucleotide variations, microsatellites, and small-scale insertions and deletions along with publication, population frequency, molecular consequence, and genomic and RefSeq mapping information for both common variations and clinical mutations.

Publication

Sherry, S.T., Ward, M. and Sirotkin, K. (1999) dbSNP—Database for Single Nucleotide Polymorphisms and Other Classes of Minor Genetic Variation. Genome Res., 9, 677–679.

VCF File

Example

#CHROM  POS ID  REF ALT QUAL    FILTER  INFO
1 10177 rs367896724 A AC . . RS=367896724;RSPOS=10177;dbSNPBuildID=138; \
SSR=0;SAO=0;VP=0x050000020005130026000200;GENEINFO=DDX11L1:100287102;WGT=1; \
VC=DIV;R5;ASP;G5A;G5;KGPhase3;CAF=0.5747,0.4253;COMMON=1; \
TOPMED=0.76728147298674821,0.23271852701325178

Parsing

From the VCF file, we're mainly interested in the following:

  • rsID from the ID field
  • CAF from the INFO field

Global allele extraction

The global major and minor alleles are extracted based on the frequency of the alleles provided in the CAF field. The global minor allele frequency is the second highest value of the CAF comma delimited field (ignoring '.' values).

Tie Breaking: Global Major Allele

If there are two candidates for global major and the reference allele is one of them, we prefer the reference allele.

Tie Breaking: Global Minor Allele

If there are two candidates for global minor and the reference allele is one of them, we prefer the other allele. If the reference allele is not involved, they are chosen arbitrarily.

Equal Allele Frequency Example (2 alleles)

chr1    100 A   C   CAF=0.5,0.5

We will select A to be the global major allele and C to be the global minor allele.

Equal Allele Frequency Example (3 alleles)

chr1    100 A   C,T CAF=0.33,0.33,0.33

We will select A to be the global major allele and either C or T is chosen (arbitrarily) to be the global minor allele.

Equal Allele Frequency in Alternate Alleles

chr1    100 A   C,T CAF=0.2,0.4,0.4

We will select C or T to be arbitrarily assigned to be the global major or global minor allele.

Equal Allele Frequency Between Reference & Alternate Allele

chr1    100 A   C,T CAF=0.2,0.2,0.6

We will select T to be the global major allele and C to be the global minor allele.

Known Issues

Known Issues

If there are multiple entries with different CAF values for the same allele, we use the first CAF value.

Download URL

https://ftp.ncbi.nih.gov/snp/organisms/

JSON Output

"dbsnp":[
"rs1042821"
]
FieldTypeNotes
dbsnpstring arraydbSNP rsIDs
- - - - \ No newline at end of file diff --git a/3.2.5/data-sources/gnomad-exomes-small-variants-json/index.html b/3.2.5/data-sources/gnomad-exomes-small-variants-json/index.html deleted file mode 100644 index f8c4f5b5..00000000 --- a/3.2.5/data-sources/gnomad-exomes-small-variants-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -gnomad-exomes-small-variants-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.2.5

gnomad-exomes-small-variants-json

"gnomadExome":{ 
"coverage":20,
"allAf":0.190317,
"afrAf":0.222876,
"amrAf":0.121394,
"easAf":0.239802,
"finAf":0.136833,
"nfeAf":0.181282,
"asjAf":0.258278,
"othAf":0.186094,
"allAn":30796,
"afrAn":8664,
"amrAn":832,
"easAn":1618,
"finAn":3486,
"nfeAn":14916,
"asjAn":302,
"othAn":978,
"allAc":5861,
"afrAc":1931,
"amrAc":101,
"easAc":388,
"finAc":477,
"nfeAc":2704,
"asjAc":78,
"othAc":182,
"allHc":561,
"afrHc":208,
"amrHc":6,
"easHc":42,
"finHc":31,
"nfeHc":242,
"asjHc":13,
"othHc":19,
"failedFilter":true
}
FieldTypeNotes
coverageintaverage coverage (non-negative integer values)
allAffloatallele frequency for all populations. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
allAnintallele number for all populations. Non-zero integer.
allHcintcount of homozygous individuals for all populations. Non-negative integer.
afrAffloatallele frequency for the African / African American population. Range: 0 - 1.0
afrAcintallele count for the African / African American population. Integer.
afrAnintallele number for the African / African American population. Non-zero integer.
afrHcintcount of homozygous individuals for African / African American population. Non-negative integer.
amrAffloatallele frequency for the Latino population. Range: 0 - 1.0
amrAcintallele count for the Latino population. Integer.
amrAnintallele number for the Latino population. Non-zero integer.
amrHcintcount of homozygous individuals for Latino population. Non-negative integer.
easAffloatallele frequency for the East Asian population. Range: 0 - 1.0
easAcintallele count for the East Asian population. Integer.
easAnintallele number for the East Asian population. Non-zero integer.
easHcintcount of homozygous individuals for East Asian population. Non-negative integer.
finAffloatallele frequency for the Finnish population. Range: 0 - 1.0
finAcintallele count for the Finnish population. Integer.
finAnintallele number for the Finnish population. Non-zero integer.
finHcintcount of homozygous individuals for Finnish population. Non-negative integer
nfeAffloatallele frequency for the Non-Finnish European population. Range: 0 - 1.0
nfeAcintallele count for the Non-Finnish European population. Integer.
nfeAnintallele number for the Non-Finnish European population. Non-zero integer.
nfeHcintcount of homozygous individuals for Non-Finnish European population. Non-negative integer
othAffloatallele frequency for the Other population. Range: 0 - 1.0
othAcintallele count for the Other population. Integer.
othAnintallele number for the Other population. Non-zero integer.
othHcintcount of homozygous individuals for Other population. Non-negative integer
asjAffloatallele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0
asjAcintallele count for the Ashkenazi Jewish population Integer.
asjAnintallele number for the Ashkenazi Jewish population. Non-zero integer.
asjHcintcount of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer
sasAffloatallele frequency for the South Asian population. Range: 0 - 1.0
sasAcintallele count for the South Asian population Integer.
sasAnintallele number for the South Asian population. Non-zero integer.
sasHcintcount of homozygous individuals for the South Asian population. Non-negative integer.
failedFilterboolTrue if this variant failed any filters (Note: we do not list the failed filters)
- - - - \ No newline at end of file diff --git a/3.2.5/data-sources/gnomad-genomes-small-variants-json/index.html b/3.2.5/data-sources/gnomad-genomes-small-variants-json/index.html deleted file mode 100644 index de69ef4d..00000000 --- a/3.2.5/data-sources/gnomad-genomes-small-variants-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -gnomad-genomes-small-variants-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.2.5

gnomad-genomes-small-variants-json

"gnomad":{ 
"coverage":20,
"allAf":0.190317,
"afrAf":0.222876,
"amrAf":0.121394,
"easAf":0.239802,
"finAf":0.136833,
"nfeAf":0.181282,
"asjAf":0.258278,
"othAf":0.186094,
"allAn":30796,
"afrAn":8664,
"amrAn":832,
"easAn":1618,
"finAn":3486,
"nfeAn":14916,
"asjAn":302,
"othAn":978,
"allAc":5861,
"afrAc":1931,
"amrAc":101,
"easAc":388,
"finAc":477,
"nfeAc":2704,
"asjAc":78,
"othAc":182,
"allHc":561,
"afrHc":208,
"amrHc":6,
"easHc":42,
"finHc":31,
"nfeHc":242,
"asjHc":13,
"othHc":19,
"failedFilter":true
}
FieldTypeNotes
coverageintaverage coverage (non-negative integer values)
allAffloatallele frequency for all populations. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
allAnintallele number for all populations. Non-zero integer.
allHcintcount of homozygous individuals for all populations. Non-negative integer.
afrAffloatallele frequency for the African / African American population. Range: 0 - 1.0
afrAcintallele count for the African / African American population. Integer.
afrAnintallele number for the African / African American population. Non-zero integer.
afrHcintcount of homozygous individuals for African / African American population. Non-negative integer.
amrAffloatallele frequency for the Latino population. Range: 0 - 1.0
amrAcintallele count for the Latino population. Integer.
amrAnintallele number for the Latino population. Non-zero integer.
amrHcintcount of homozygous individuals for Latino population. Non-negative integer.
easAffloatallele frequency for the East Asian population. Range: 0 - 1.0
easAcintallele count for the East Asian population. Integer.
easAnintallele number for the East Asian population. Non-zero integer.
easHcintcount of homozygous individuals for East Asian population. Non-negative integer.
finAffloatallele frequency for the Finnish population. Range: 0 - 1.0
finAcintallele count for the Finnish population. Integer.
finAnintallele number for the Finnish population. Non-zero integer.
finHcintcount of homozygous individuals for Finnish population. Non-negative integer
nfeAffloatallele frequency for the Non-Finnish European population. Range: 0 - 1.0
nfeAcintallele count for the Non-Finnish European population. Integer.
nfeAnintallele number for the Non-Finnish European population. Non-zero integer.
nfeHcintcount of homozygous individuals for Non-Finnish European population. Non-negative integer
othAffloatallele frequency for the Other population. Range: 0 - 1.0
othAcintallele count for the Other population. Integer.
othAnintallele number for the Other population. Non-zero integer.
othHcintcount of homozygous individuals for Other population. Non-negative integer
asjAffloatallele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0
asjAcintallele count for the Ashkenazi Jewish population Integer.
asjAnintallele number for the Ashkenazi Jewish population. Non-zero integer.
asjHcintcount of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer
failedFilterboolTrue if this variant failed any filters (Note: we do not list the failed filters)
- - - - \ No newline at end of file diff --git a/3.2.5/data-sources/gnomad/index.html b/3.2.5/data-sources/gnomad/index.html deleted file mode 100644 index a3226510..00000000 --- a/3.2.5/data-sources/gnomad/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -gnomAD | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.2.5

gnomAD

Overview

The Genome Aggregation Database (gnomAD) is a resource developed by an international coalition of investigators, with the goal of aggregating and harmonizing both exome and genome sequencing data from a wide variety of large-scale sequencing projects, and making summary data available for the wider scientific community.

Small Variants

VCF extraction

We currently extract the following info fields from gnomAD genome and exome VCF files:

##INFO=<ID=AC,Number=A,Type=Integer,Description="Alternate allele count for samples">
##INFO=<ID=AN,Number=A,Type=Integer,Description="Total number of alleles in samples">
##INFO=<ID=nhomalt,Number=A,Type=Integer,Description="Count of homozygous individuals in samples">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Depth of informative coverage for each sample; reads with MQ=255 or with bad mates are filtered">
##INFO=<ID=lcr,Number=0,Type=Flag,Description="Variant falls within a low complexity region">
##INFO=<ID=AC_afr,Number=A,Type=Integer,Description="Alternate allele count for samples of African-American ancestry">
##INFO=<ID=AN_afr,Number=A,Type=Integer,Description="Total number of alleles in samples of African-American ancestry">
##INFO=<ID=AF_afr,Number=A,Type=Float,Description="Alternate allele frequency in samples of African-American ancestry">
##INFO=<ID=nhomalt_afr,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of African-American ancestry">
##INFO=<ID=AC_amr,Number=A,Type=Integer,Description="Alternate allele count for samples of Latino ancestry">
##INFO=<ID=AN_amr,Number=A,Type=Integer,Description="Total number of alleles in samples of Latino ancestry">
##INFO=<ID=nhomalt_amr,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of Latino ancestry">
##INFO=<ID=AC_eas,Number=A,Type=Integer,Description="Alternate allele count for samples of East Asian ancestry">
##INFO=<ID=AN_eas,Number=A,Type=Integer,Description="Total number of alleles in samples of East Asian ancestry">
##INFO=<ID=nhomalt_eas,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of East Asian ancestry">
##INFO=<ID=AC_female,Number=A,Type=Integer,Description="Alternate allele count for female samples">
##INFO=<ID=AN_female,Number=A,Type=Integer,Description="Total number of alleles in female samples">
##INFO=<ID=nhomalt_female,Number=A,Type=Integer,Description="Count of homozygous individuals in female samples">
##INFO=<ID=AC_nfe,Number=A,Type=Integer,Description="Alternate allele count for samples of non-Finnish European ancestry">
##INFO=<ID=AN_nfe,Number=A,Type=Integer,Description="Total number of alleles in samples of non-Finnish European ancestry">
##INFO=<ID=nhomalt_nfe,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of non-Finnish European ancestry">
##INFO=<ID=AC_fin,Number=A,Type=Integer,Description="Alternate allele count for samples of Finnish ancestry">
##INFO=<ID=AN_fin,Number=A,Type=Integer,Description="Total number of alleles in samples of Finnish ancestry">
##INFO=<ID=nhomalt_fin,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of Finnish ancestry">
##INFO=<ID=AC_asj,Number=A,Type=Integer,Description="Alternate allele count for samples of Ashkenazi Jewish ancestry">
##INFO=<ID=AN_asj,Number=A,Type=Integer,Description="Total number of alleles in samples of Ashkenazi Jewish ancestry">
##INFO=<ID=nhomalt_asj,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of Ashkenazi Jewish ancestry">
##INFO=<ID=AC_oth,Number=A,Type=Integer,Description="Alternate allele count for samples of uncertain ancestry">
##INFO=<ID=AN_oth,Number=A,Type=Integer,Description="Total number of alleles in samples of uncertain ancestry">
##INFO=<ID=nhomalt_oth,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of uncertain ancestry">
##INFO=<ID=AC_male,Number=A,Type=Integer,Description="Alternate allele count for male samples">
##INFO=<ID=AN_male,Number=A,Type=Integer,Description="Total number of alleles in male samples">
##INFO=<ID=nhomalt_male,Number=A,Type=Integer,Description="Count of homozygous individuals in male samples">
##INFO=<ID=controls_AC,Number=A,Type=Integer,Description="Alternate allele count for samples in the controls subset">
##INFO=<ID=controls_AN,Number=A,Type=Integer,Description="Total number of alleles in samples in the controls subset">

We also extract the following extra fields from gnomAD exome VCF file:

##INFO=<ID=AC_sas,Number=A,Type=Integer,Description="Alternate allele count for samples of South Asian ancestry">
##INFO=<ID=AN_sas,Number=A,Type=Integer,Description="Total number of alleles in samples of South Asian ancestry">
##INFO=<ID=nhomalt_sas,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of South Asian ancestry">

Computation

Using these, we compute the following:

  • Coverage
  • Allele count, Homozygous count, allele number and allele frequencies for:
    • Global population
    • African/African Americans
    • Admixed Americans
    • Ashkenazi Jews
    • East Asians
    • Finnish
    • Non-Finnish Europeans
    • South Asian
    • Others (population not assigned)
    • Male
    • Female
    • Controls
Note
  • Coverage = DP / AN. Frequencies are computed using AC/AN for each population.
  • Please note that currently there is no genome sequencing data of south asian (SAS) population available in gnomAD.
  • Allele Count, Homozygous count, allele number and allele frequencies for control groups are also provided for the global population.

VCF download instructions

https://gnomad.broadinstitute.org/downloads

JSON output

Genome and exome allele frequencies are provided in separate JSON sections.

Genomes

"gnomad":{ 
"coverage":20,
"allAf":0.190317,
"afrAf":0.222876,
"amrAf":0.121394,
"easAf":0.239802,
"finAf":0.136833,
"nfeAf":0.181282,
"asjAf":0.258278,
"othAf":0.186094,
"allAn":30796,
"afrAn":8664,
"amrAn":832,
"easAn":1618,
"finAn":3486,
"nfeAn":14916,
"asjAn":302,
"othAn":978,
"allAc":5861,
"afrAc":1931,
"amrAc":101,
"easAc":388,
"finAc":477,
"nfeAc":2704,
"asjAc":78,
"othAc":182,
"allHc":561,
"afrHc":208,
"amrHc":6,
"easHc":42,
"finHc":31,
"nfeHc":242,
"asjHc":13,
"othHc":19,
"failedFilter":true
}
FieldTypeNotes
coverageintaverage coverage (non-negative integer values)
allAffloatallele frequency for all populations. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
allAnintallele number for all populations. Non-zero integer.
allHcintcount of homozygous individuals for all populations. Non-negative integer.
afrAffloatallele frequency for the African / African American population. Range: 0 - 1.0
afrAcintallele count for the African / African American population. Integer.
afrAnintallele number for the African / African American population. Non-zero integer.
afrHcintcount of homozygous individuals for African / African American population. Non-negative integer.
amrAffloatallele frequency for the Latino population. Range: 0 - 1.0
amrAcintallele count for the Latino population. Integer.
amrAnintallele number for the Latino population. Non-zero integer.
amrHcintcount of homozygous individuals for Latino population. Non-negative integer.
easAffloatallele frequency for the East Asian population. Range: 0 - 1.0
easAcintallele count for the East Asian population. Integer.
easAnintallele number for the East Asian population. Non-zero integer.
easHcintcount of homozygous individuals for East Asian population. Non-negative integer.
finAffloatallele frequency for the Finnish population. Range: 0 - 1.0
finAcintallele count for the Finnish population. Integer.
finAnintallele number for the Finnish population. Non-zero integer.
finHcintcount of homozygous individuals for Finnish population. Non-negative integer
nfeAffloatallele frequency for the Non-Finnish European population. Range: 0 - 1.0
nfeAcintallele count for the Non-Finnish European population. Integer.
nfeAnintallele number for the Non-Finnish European population. Non-zero integer.
nfeHcintcount of homozygous individuals for Non-Finnish European population. Non-negative integer
othAffloatallele frequency for the Other population. Range: 0 - 1.0
othAcintallele count for the Other population. Integer.
othAnintallele number for the Other population. Non-zero integer.
othHcintcount of homozygous individuals for Other population. Non-negative integer
asjAffloatallele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0
asjAcintallele count for the Ashkenazi Jewish population Integer.
asjAnintallele number for the Ashkenazi Jewish population. Non-zero integer.
asjHcintcount of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer
failedFilterboolTrue if this variant failed any filters (Note: we do not list the failed filters)

Exomes

"gnomadExome":{ 
"coverage":20,
"allAf":0.190317,
"afrAf":0.222876,
"amrAf":0.121394,
"easAf":0.239802,
"finAf":0.136833,
"nfeAf":0.181282,
"asjAf":0.258278,
"othAf":0.186094,
"allAn":30796,
"afrAn":8664,
"amrAn":832,
"easAn":1618,
"finAn":3486,
"nfeAn":14916,
"asjAn":302,
"othAn":978,
"allAc":5861,
"afrAc":1931,
"amrAc":101,
"easAc":388,
"finAc":477,
"nfeAc":2704,
"asjAc":78,
"othAc":182,
"allHc":561,
"afrHc":208,
"amrHc":6,
"easHc":42,
"finHc":31,
"nfeHc":242,
"asjHc":13,
"othHc":19,
"failedFilter":true
}
FieldTypeNotes
coverageintaverage coverage (non-negative integer values)
allAffloatallele frequency for all populations. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
allAnintallele number for all populations. Non-zero integer.
allHcintcount of homozygous individuals for all populations. Non-negative integer.
afrAffloatallele frequency for the African / African American population. Range: 0 - 1.0
afrAcintallele count for the African / African American population. Integer.
afrAnintallele number for the African / African American population. Non-zero integer.
afrHcintcount of homozygous individuals for African / African American population. Non-negative integer.
amrAffloatallele frequency for the Latino population. Range: 0 - 1.0
amrAcintallele count for the Latino population. Integer.
amrAnintallele number for the Latino population. Non-zero integer.
amrHcintcount of homozygous individuals for Latino population. Non-negative integer.
easAffloatallele frequency for the East Asian population. Range: 0 - 1.0
easAcintallele count for the East Asian population. Integer.
easAnintallele number for the East Asian population. Non-zero integer.
easHcintcount of homozygous individuals for East Asian population. Non-negative integer.
finAffloatallele frequency for the Finnish population. Range: 0 - 1.0
finAcintallele count for the Finnish population. Integer.
finAnintallele number for the Finnish population. Non-zero integer.
finHcintcount of homozygous individuals for Finnish population. Non-negative integer
nfeAffloatallele frequency for the Non-Finnish European population. Range: 0 - 1.0
nfeAcintallele count for the Non-Finnish European population. Integer.
nfeAnintallele number for the Non-Finnish European population. Non-zero integer.
nfeHcintcount of homozygous individuals for Non-Finnish European population. Non-negative integer
othAffloatallele frequency for the Other population. Range: 0 - 1.0
othAcintallele count for the Other population. Integer.
othAnintallele number for the Other population. Non-zero integer.
othHcintcount of homozygous individuals for Other population. Non-negative integer
asjAffloatallele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0
asjAcintallele count for the Ashkenazi Jewish population Integer.
asjAnintallele number for the Ashkenazi Jewish population. Non-zero integer.
asjHcintcount of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer
sasAffloatallele frequency for the South Asian population. Range: 0 - 1.0
sasAcintallele count for the South Asian population Integer.
sasAnintallele number for the South Asian population. Non-zero integer.
sasHcintcount of homozygous individuals for the South Asian population. Non-negative integer.
failedFilterboolTrue if this variant failed any filters (Note: we do not list the failed filters)
- - - - \ No newline at end of file diff --git a/3.2.5/file-formats/nirvana-json-file-format/index.html b/3.2.5/file-formats/nirvana-json-file-format/index.html deleted file mode 100644 index 6423b349..00000000 --- a/3.2.5/file-formats/nirvana-json-file-format/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Nirvana JSON File Format | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.2.5

Nirvana JSON File Format

Overview

Conventions

In the Nirvana JSON representation, we try to maximize the amount of useful information that is relayed in the output file. As such, we have several conventions that are useful to know about:

  • With boolean key/value pairs, we only output the keys that have a true value. I.e. there's no reason to display "isStructuralVariant":false a few million times when annotating a small variant VCF.
  • When transferring data from the VCF file to the JSON (e.g. for allele depths (AD)), it is common to use a period (.) as a placeholder for missing data in the VCF file. Nirvana treats periods like empty or null strings and therefore will not output those entries.

JSON Layout

info

In general, each position corresponds to a row in the original VCF file.

For each gene that was referenced in the transcripts found in the positions section, there will be additional gene-level annotation in the gene section.

{ 
"header":{
"annotator":"Nirvana 3.2.5",
"creationTime":"2022-12-05 16:43:41",
"genomeAssembly":"GRCh37",
"schemaVersion":6,
"dataVersion":"91.26.50",
"dataSources":[
{
"name":"VEP",
"version":"91",
"description":"RefSeq",
"releaseDate":"2018-03-05"
},
{
"name":"ClinVar",
"version":"20190204",
"description":"A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence",
"releaseDate":"2019-02-04"
}
],
"samples":[
"NA12878",
"NA12891",
"NA12892"
]
},
FieldTypeNotes
annotatorstringthe name of the annotator and the current version
creationTimestringyyyy-MM-dd hh:mm:ss
genomeAssemblystringsee possible values below
schemaVersionintegerincremented whenever the core structure of the JSON file introduces breaking changes
dataVersionstring
dataSourcesobject arraysee Data Source entry below
samplesstring arraythe order of these sample names will be used throughout the JSON file when enumerating samples

Data Source

FieldTypeNotes
namestring
versionstring
descriptionstringoptional description of the data source
releaseDatestringyyyy-MM-dd

Genome Assemblies

  • GRCh37
  • GRCh38
  • hg19

Positions

"positions":[ 
{
"chromosome":"chr2",
"position":48010488,
"repeatUnit":"GGCCCC",
"refRepeatCount":3,
"svEnd":48020488,
"refAllele":"G",
"altAlleles":[
"A",
"GT"
],
"quality":461,
"filters":[
"PASS"
],
"ciPos":[
-170,
170
],
"ciEnd":[
-175,
175
],
"svLength":1000,
"strandBias":1.23,
"jointSomaticNormalQuality":29,
"cytogeneticBand":"2p16.3",
FieldTypeVariant TypeNotes
chromosomestringallexactly as displayed in the vcf
postionintegerallexactly as displayed in the vcf (1-based notation). Range: 1 - 250 million
repeatUnitstringSTRprovided by ExpansionHunter
refRepeatCountintegerSTRprovided by ExpansionHunter
svEndintegerSV
refAllelestringallexactly as displayed in the vcf
altAllelestring arrayallexactly as displayed in the vcf
qualityfloatallexactly as displayed in the vcf (Normally an integer, but some variant callers using floating point. Has been observed as high as 500k)
filtersstring arrayallexactly as displayed in the vcf
ciPosinteger arraySV
ciEndinteger arraySV
svLengthintegerSV
strandBiasfloatsmall variantprovided by GATK (from SB)
jointSomaticNormalQualityintegerSVprovided by the Manta variant caller (SOMATICSCORE)
cytogeneticBandstringalle.g. 17p13.1

1000 Genomes (SV)

"oneKg":[
{
"chromosome":"1",
"begin":1595369,
"end":1612441,
"variantType": "copy_number_variation",
"id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",
"allAn": 5008,
"allAc": 2702,
"allAf": 0.539537,
"afrAf": 0.6052,
"amrAf": 0.3675,
"eurAf": 0.5357,
"easAf": 0.5368,
"sasAf": 0.5797,
"reciprocalOverlap": 0.07555
}
],
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring
idstring
allAnfloating pointallele number for all populations. Non-zero integer.
allAcfloating pointallele count for all populations. Integer.
allAffloating pointallele frequency for all populations. Range: 0 - 1.0
afrAffloating pointallele frequency for the African super population. Range: 0 - 1.0
amrAffloating pointallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
eurAffloating pointallele frequency for the European super population. Range: 0 - 1.0
easAfintegerallele frequency for the East Asian super population. Range: 0 - 1.0
sasAfintegerallele frequency for the South Asian super population. Range: 0 - 1.0
reciprocalOverlapfloating pointrange: 0 - 1.

Samples

"samples":[
{
"genotype":"0/1",
"variantFrequencies":[
0.333,
0.5
],
"totalDepth":57,
"genotypeQuality":12,
"copyNumber":3,
"repeatUnitCounts":[
10,
20
],
"alleleDepths":[
10,
20,
30
],
"failedFilter":true,
"splitReadCounts":[
10,
20
],
"pairedEndReadCounts":[
10,
20
],
"diseaseAffectedStatuses":[
"-"
],
"artifactAdjustedQualityScore":89.3,
"likelihoodRatioQualityScore":78.2
}
]
FieldTypeNotes
genotypestring
repeatNumbersstringExpansionHunter-specific
repeatNumberSpansstringExpansionHunter-specific
variantFrequenciesfloat arrayrange: 0 - 1.0. One value per alternate allele
totalDepthintegernon-negative integer values
genotypeQualityintegernon-negative integer values. Typically maxes out at 99
copyNumberintegernon-negative integer values
alleleDepthsinteger arraynon-negative integer values
failedFilterbool
splitReadCountsinteger arrayManta-specific
pairedEndReadCountsinteger arrayManta-specific
lossOfHeterozygositybool
deNovoQualityfloat
mpileupAlleleDepthsint arraySMN1-specific
silentCarrierHaplotypestringSMN1-specific
paralogousEntrezGeneIdsint arraySMN1-specific
paralogousGeneCopyNumbersint arraySMN1-specific
diseaseClassificationSourcesstring arraySMN1-specific
diseaseIdsstring arraySMN1-specific
diseaseAffectedStatusesstring arraySMN1-specific
proteinAlteringVariantPositionsint arraySMN1-specific
isCompoundHetCompatibleboolSMN1-specific
artifactAdjustedQualityScorefloatPEPE-specific. Range: 0 - 100.0
likelihoodRatioQualityScorefloatPEPE-specific. Range: 0 - 100.0
Empty Samples

If a sample does not contain any entries, we will create a sample object that contains the isEmpty key. This ensures that sample ordering is preserved while indicating that a sample is intentionally empty.

"samples":[ 
{
"isEmpty":true
}
],

Variants

"variants":[ 
{
"vid":"2:48010488:A",
"chromosome":"chr2",
"begin":48010488,
"end":48010488,
"isReferenceMinorAllele":true,
"isStructuralVariant":true,
"refAllele":"G",
"altAllele":"A",
"variantType":"SNV",
"isDecomposedVariant":true,
"isRecomposedVariant":true,
"hgvsg":"NC_000002.11:g.48010488G>A",
"phylopScore":0.459
FieldTypeNotes
vidstringsee Variant Identifiers
chromosomestring
beginint1-based non-negative integer values. Range: 1 - 250 million
endint1-based non-negative integer values. Range: 1 - 250 million
isReferenceMinorAllelebooltrue when this is a reference minor allele
isStructuralVariantbooltrue when the variant is a structural variant
refAllelestringparsimonious representation of the reference allele
altAllelestringparsimonious representation of the alternate allele.
variantTypestringuses Sequence Ontology sequence alterations
isDecomposedVariantbooltrue when the decomposed variant has been used to create another recomposed variant
isRecomposedVariantbooltrue when the variant is recomposed from two or more decomposed variants
hgvsgstringHGVS g. notation
phylopScorefloatphyloP conservation score. Range: -14.08 to 6.424
Reference Minor Alleles

Nirvana supports annotating reference minor alleles. In such a case, refAllele will be replaced by the global major allele and altAllele will be replaced with the original reference allele.

Flagging Decomposed & Recomposed Variants

When two or more decomposed variants are recomposed into an MNV, the decomposed variants will be marked with "isDecomposedVariant":true.

Similarly, the recomposed variant will be shown as a new VCF position. This recomposed variant will be flagged with "isRecomposedVariant":true.

Transcripts

"transcripts":[
{
"transcript":"ENST00000445503.1",
"source":"Ensembl",
"bioType":"nonsense_mediated_decay",
"codons":"gGg/gAg",
"aminoAcids":"G/E",
"cdnaPos":"268",
"cdsPos":"116",
"exons":"1/9",
"introns":"1/8",
"proteinPos":"39",
"geneId":"ENSG00000116062",
"hgnc":"MSH6",
"consequence":[
"missense_variant",
"NMD_transcript_variant"
],
"hgvsc":"ENST00000445503.1:c.116G>A",
"hgvsp":"ENSP00000405294.1:p.(Gly39Glu)",
"geneFusion":{
"exon":6,
"intron":5,
"fusions":[
{
"hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000437180.1}:c.58+568_1443",
"exon":3,
"intron":2
},
{
"hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000300305.3}:c.58+568_1443",
"exon":2,
"intron":1
}
]
},
"isCanonical":true,
"polyPhenScore":0.95,
"polyPhenPrediction":"probably damaging",
"proteinId":"ENSP00000405294.1",
"siftScore":0.61,
"siftPrediction":"tolerated",
"completeOverlap":true
}
]
FieldTypeNotes
transcriptstringtranscript ID. e.g. ENST00000445503.1
sourcestringRefSeq / Ensembl
bioTypestringdescriptions of the biotypes from Ensembl
codonsstring
aminoAcidsstring
cdnaPosstring
cdsPosstring
exonsstringexons affected by the variant
intronsstringintrons affected by the variant
proteinPosstring
geneIdstringgene ID. e.g. ENSG00000116062
hgncstringgene symbol. e.g. MSH6
consequencestring arraySequence Ontology Consequences
hgvscstringHGVS coding nomenclature
hgvspstringHGVS protein nomenclature
geneFusionobjectsee Gene Fusions entry below
isCanonicalbooltrue when this is a canonical transcript
polyPhenScorefloatrange: 0 - 1.0
polyPhenPredictionstringsee possible values below
proteinIdstringprotein ID. E.g. ENSP00000405294.1
siftScorefloatrange: 0 - 1.0
siftPredictionstringsee possible values below
completeOverlapbooltrue when this transcript is completely overlapped by the variant

PolyPhen

  • probably damaging
  • possibly damaging
  • benign
  • unknown

SIFT

  • tolerated
  • deleterious
  • tolerated - low confidence
  • deleterious - low confidence

Gene Fusions

FieldTypeNotes
exonintactual exon where the breakpoint was located
intronintactual intron where the breakpoint was located
fusionsobject arraysee Fusion entry below

Fusion

FieldTypeNotes
exonintactual exon where the other breakpoint was located
intronintactual intron where the other breakpoint was located
hgvscstringHGVS coding nomenclature describing the two genes and the transcripts that are fused along with

Regulatory Regions

"regulatoryRegions":[ 
{
"id":"ENSR00001542175",
"type":"promoter",
"consequence":[
"regulatory_region_variant"
]
}
]
FieldTypeNotes
idstring
typestringsee possible values below
consequencestring arraysee possible values below

Regulatory Types

  • CTCF_binding_site
  • enhancer
  • open_chromatin_region
  • promoter
  • promoter_flanking_region
  • TF_binding_site

Regulatory Consequences

  • regulatory_region_variant
  • regulatory_region_ablation
  • regulatory_region_amplification
  • regulatory_region_truncation

ClinVar

"clinvar":[
{
"id":"RCV000030258.4",
"reviewStatus":"reviewed by expert panel",
"alleleOrigins":[
"germline"
],
"refAllele":"G",
"altAllele":"A",
"phenotypes":[
"Lynch syndrome"
],
"medGenIds":[
"C1333990"
],
"omimIds":[
"120435"
],
"significance":[
"benign"
],
"lastUpdatedDate":"2017-05-01",
"isAlleleSpecific":true
}
]
FieldTypeNotes
idstringClinVar ID
reviewStatusstringsee possible values below
alleleOriginsstring arraysee possible values below
refAllelestring
altAllelestring
phenotypesstring array
medGenIdsstring arrayMedGen IDs
omimIdsstring arrayOMIM IDs
orphanetIdsstring arrayOrphanet IDs
significancestring arraysee possible values below
lastUpdatedDatestringyyyy-MM-dd
pubMedIdsstring arrayPubMed IDs
isAlleleSpecificbooltrue when the current variant alternate allele matches the ClinVar alternate allele

reviewStatus:

  • no assertion provided
  • no assertion criteria provided
  • criteria provided, single submitter
  • practice guideline
  • classified by multiple submitters
  • criteria provided, conflicting interpretations
  • criteria provided, multiple submitters, no conflicts
  • no interpretation for the single variant

alleleOrigins:

  • unknown
  • other
  • germline
  • somatic
  • inherited
  • paternal
  • maternal
  • de-novo
  • biparental
  • uniparental
  • not-tested
  • tested-inconclusive

significance:

  • uncertain significance
  • not provided
  • benign
  • likely benign
  • likely pathogenic
  • pathogenic
  • drug response
  • histocompatibility
  • association
  • risk factor
  • protective
  • affects
  • conflicting data from submitters
  • other
  • no interpretation for the single variant
  • conflicting interpretations of pathogenicity

1000 Genomes

"oneKg":{
"allAf":0.200879,
"afrAf":0.210287,
"amrAf":0.139769,
"easAf":0.275794,
"eurAf":0.181909,
"sasAf":0.173824,
"allAn":5008,
"afrAn":1322,
"amrAn":694,
"easAn":1008,
"eurAn":1006,
"sasAn":978,
"allAc":1006,
"afrAc":278,
"amrAc":97,
"easAc":278,
"eurAc":183,
"sasAc":170
}
FieldTypeNotes
allAffloatallele frequency for all populations. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
allAnintallele number for all populations. Non-zero integer.
afrAffloatallele frequency for the African super population. Range: 0 - 1.0
afrAcintallele count for the African super population. Integer.
afrAnintallele number for the African super population. Non-zero integer.
amrAffloatallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
amrAcintallele count for the Ad Mixed American super population. Integer.
amrAnintallele number for the Ad Mixed American super population. Non-zero integer.
easAffloatallele frequency for the East Asian super population. Range: 0 - 1.0
easAcintallele count for the East Asian super population. Integer.
easAnintallele number for the East Asian super population. Non-zero integer.
eurAffloatallele frequency for the European super population. Range: 0 - 1.0
eurAcintallele count for the European super population. Integer.
eurAnintallele number for the European super population. Non-zero integer.
sasAffloatallele frequency for the South Asian super population. Range: 0 - 1.0
sasAcintallele count for the South Asian super population. Integer.
sasAnintallele number for the South Asian super population. Non-zero integer.

gnomAD (genomes)

"gnomad":{ 
"coverage":20,
"allAf":0.190317,
"afrAf":0.222876,
"amrAf":0.121394,
"easAf":0.239802,
"finAf":0.136833,
"nfeAf":0.181282,
"asjAf":0.258278,
"othAf":0.186094,
"allAn":30796,
"afrAn":8664,
"amrAn":832,
"easAn":1618,
"finAn":3486,
"nfeAn":14916,
"asjAn":302,
"othAn":978,
"allAc":5861,
"afrAc":1931,
"amrAc":101,
"easAc":388,
"finAc":477,
"nfeAc":2704,
"asjAc":78,
"othAc":182,
"allHc":561,
"afrHc":208,
"amrHc":6,
"easHc":42,
"finHc":31,
"nfeHc":242,
"asjHc":13,
"othHc":19,
"failedFilter":true
}
FieldTypeNotes
coverageintaverage coverage (non-negative integer values)
allAffloatallele frequency for all populations. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
allAnintallele number for all populations. Non-zero integer.
allHcintcount of homozygous individuals for all populations. Non-negative integer.
afrAffloatallele frequency for the African / African American population. Range: 0 - 1.0
afrAcintallele count for the African / African American population. Integer.
afrAnintallele number for the African / African American population. Non-zero integer.
afrHcintcount of homozygous individuals for African / African American population. Non-negative integer.
amrAffloatallele frequency for the Latino population. Range: 0 - 1.0
amrAcintallele count for the Latino population. Integer.
amrAnintallele number for the Latino population. Non-zero integer.
amrHcintcount of homozygous individuals for Latino population. Non-negative integer.
easAffloatallele frequency for the East Asian population. Range: 0 - 1.0
easAcintallele count for the East Asian population. Integer.
easAnintallele number for the East Asian population. Non-zero integer.
easHcintcount of homozygous individuals for East Asian population. Non-negative integer.
finAffloatallele frequency for the Finnish population. Range: 0 - 1.0
finAcintallele count for the Finnish population. Integer.
finAnintallele number for the Finnish population. Non-zero integer.
finHcintcount of homozygous individuals for Finnish population. Non-negative integer
nfeAffloatallele frequency for the Non-Finnish European population. Range: 0 - 1.0
nfeAcintallele count for the Non-Finnish European population. Integer.
nfeAnintallele number for the Non-Finnish European population. Non-zero integer.
nfeHcintcount of homozygous individuals for Non-Finnish European population. Non-negative integer
othAffloatallele frequency for the Other population. Range: 0 - 1.0
othAcintallele count for the Other population. Integer.
othAnintallele number for the Other population. Non-zero integer.
othHcintcount of homozygous individuals for Other population. Non-negative integer
asjAffloatallele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0
asjAcintallele count for the Ashkenazi Jewish population Integer.
asjAnintallele number for the Ashkenazi Jewish population. Non-zero integer.
asjHcintcount of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer
failedFilterboolTrue if this variant failed any filters (Note: we do not list the failed filters)

gnomAD (exomes)

"gnomadExome":{ 
"coverage":20,
"allAf":0.190317,
"afrAf":0.222876,
"amrAf":0.121394,
"easAf":0.239802,
"finAf":0.136833,
"nfeAf":0.181282,
"asjAf":0.258278,
"othAf":0.186094,
"allAn":30796,
"afrAn":8664,
"amrAn":832,
"easAn":1618,
"finAn":3486,
"nfeAn":14916,
"asjAn":302,
"othAn":978,
"allAc":5861,
"afrAc":1931,
"amrAc":101,
"easAc":388,
"finAc":477,
"nfeAc":2704,
"asjAc":78,
"othAc":182,
"allHc":561,
"afrHc":208,
"amrHc":6,
"easHc":42,
"finHc":31,
"nfeHc":242,
"asjHc":13,
"othHc":19,
"failedFilter":true
}
FieldTypeNotes
coverageintaverage coverage (non-negative integer values)
allAffloatallele frequency for all populations. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
allAnintallele number for all populations. Non-zero integer.
allHcintcount of homozygous individuals for all populations. Non-negative integer.
afrAffloatallele frequency for the African / African American population. Range: 0 - 1.0
afrAcintallele count for the African / African American population. Integer.
afrAnintallele number for the African / African American population. Non-zero integer.
afrHcintcount of homozygous individuals for African / African American population. Non-negative integer.
amrAffloatallele frequency for the Latino population. Range: 0 - 1.0
amrAcintallele count for the Latino population. Integer.
amrAnintallele number for the Latino population. Non-zero integer.
amrHcintcount of homozygous individuals for Latino population. Non-negative integer.
easAffloatallele frequency for the East Asian population. Range: 0 - 1.0
easAcintallele count for the East Asian population. Integer.
easAnintallele number for the East Asian population. Non-zero integer.
easHcintcount of homozygous individuals for East Asian population. Non-negative integer.
finAffloatallele frequency for the Finnish population. Range: 0 - 1.0
finAcintallele count for the Finnish population. Integer.
finAnintallele number for the Finnish population. Non-zero integer.
finHcintcount of homozygous individuals for Finnish population. Non-negative integer
nfeAffloatallele frequency for the Non-Finnish European population. Range: 0 - 1.0
nfeAcintallele count for the Non-Finnish European population. Integer.
nfeAnintallele number for the Non-Finnish European population. Non-zero integer.
nfeHcintcount of homozygous individuals for Non-Finnish European population. Non-negative integer
othAffloatallele frequency for the Other population. Range: 0 - 1.0
othAcintallele count for the Other population. Integer.
othAnintallele number for the Other population. Non-zero integer.
othHcintcount of homozygous individuals for Other population. Non-negative integer
asjAffloatallele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0
asjAcintallele count for the Ashkenazi Jewish population Integer.
asjAnintallele number for the Ashkenazi Jewish population. Non-zero integer.
asjHcintcount of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer
sasAffloatallele frequency for the South Asian population. Range: 0 - 1.0
sasAcintallele count for the South Asian population Integer.
sasAnintallele number for the South Asian population. Non-zero integer.
sasHcintcount of homozygous individuals for the South Asian population. Non-negative integer.
failedFilterboolTrue if this variant failed any filters (Note: we do not list the failed filters)

dbSNP

"dbsnp":[
"rs1042821"
]
FieldTypeNotes
dbsnpstring arraydbSNP rsIDs
- - - - \ No newline at end of file diff --git a/3.2.5/index.html b/3.2.5/index.html deleted file mode 100644 index 7f288ac5..00000000 --- a/3.2.5/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Introduction | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.2.5

Nirvana provides clinical-grade annotation of genomic variants (SNVs, MNVs, insertions, deletions, indels, STRs, gene fusions, and SVs (including CNVs). It can be run as a stand-alone package, as an AWS Lambda function, or integrated into larger software tools that require variant annotation.

The input to Nirvana are VCFs and the output is a structured JSON representation of all annotation and sample information (as extracted from the VCF). Nirvana handles multiple alternate alleles and multiple samples with ease.

The software is being developed under a rigorous SDLC and testing process to ensure accuracy of the results and enable embedding in other software with regulatory needs. Nirvana uses a continuous integration pipeline where millions of variant annotations are monitored against baseline values daily.

Fun Fact

Nirvana is a backronym for NImble and Robust VAriant aNnotAtor

What does Nirvana annotate?

We use Sequence Ontology consequences to describe how each variant impacts a given transcript:

In addition, we also use external data sources to provide additional context for each variant:

Licensing

Code

Nirvana source code is provided under the GPLv3 license. Nirvana includes several third party packages provided under other open source licenses, please see Dependencies for additional details.

Data

The data used by Nirvana is publicly available, however some data sources have special restrictions on use by non-academic entities.

Nirvana Team

Active Team

The Nirvana team works on the core functionality, AWS annotation services, in addition to keeping the annotation data sources up-to-date.

Current members of the Nirvana team are listed in alphabetical order below.

Haochen Li

Active developer. Detail-oriented quick thinker that keeps cool even in the most stressful situations.

Michael Strömberg

Nirvana founder and now ever grateful Nirvana cheerleader to those who actually write code for it.

Rajat Shuvro Roy

Lead developer. Loves to speed up things and make services available to all interested users.

Honorary Alumni

Nirvana would never be what it is today without the huge contributions from these folks who have moved on to bigger and greater things.

Julien Lajugie

Julien is a legend around these parts. When he's not taking down opponents in Taekwondo or melting riffs in his rock band, he's demolishing bugs and making the world a better place.

Shuli Kang

Oncology bioinformatician from USC before joining our team at Illumina. Now working as a Senior Bioinformatics Scientist at Novartis Gene Therapies.

Yu Jiang

Biostatistics genius from Duke University before joining our team at Illumina. Now working as a Research Engineer at Facebook AI Research.
- - - - \ No newline at end of file diff --git a/3.2.5/introduction/dependencies/index.html b/3.2.5/introduction/dependencies/index.html deleted file mode 100644 index 41c1c7be..00000000 --- a/3.2.5/introduction/dependencies/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Dependencies | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.2.5

Dependencies

All of the following dependencies have been included in this repository.

NameLicenseUsage
AWSSDKApacheAWS Lambda, S3, SNS support
Json.NETMITJASIX utility
libdeflateMITBlockCompression library
MoqBSDMocking framework for unit tests
NDesk.OptionsMIT/X11CommandLine library
xUnitApacheUnit testing framework
zlib-ngzlibBlockCompression library
zstdBSDBlockCompression library
- - - - \ No newline at end of file diff --git a/3.2.5/introduction/getting-started/index.html b/3.2.5/introduction/getting-started/index.html deleted file mode 100644 index 9b107c30..00000000 --- a/3.2.5/introduction/getting-started/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Getting Started | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.2.5

Getting Started

Nirvana is written in C# using .NET Core (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files.

tip

Nirvana currently uses .NET Core 2.1. Please make sure that you have the most current runtime from the .NET Core downloads page.

Quick Start

If you want to get started right away, we've created a script that downloads Nirvana, compiles it, and starts annotating a test file:

curl -O https://illumina.github.io/NirvanaDocumentation/files/TestNirvana.sh
sh ./TestNirvana.sh

We have verified that this script works on Windows (using Git Bash or WSL), Linux, and Mac OS X.

Getting Nirvana

Compile from Source

The following will grab the latest version of Nirvana from GitHub and compile it using the .NET Core compiler:

git clone https://github.com/Illumina/Nirvana.git
cd Nirvana
dotnet build -c Release

GitHub Release Notes

Alternatively, you can grab the latest binaries from our GitHub Releases page:

mkdir -p Nirvana/Data
cd Nirvana
unzip Nirvana-3.2.5-dotnet-2.1.0.zip

Downloading the data files

Downloader not available

Nirvana 3.2.5 does not include a downloader tool, but these files can be copied over from the TSO 500 or TSO Comprehensive data directory if you have those. Otherwise, an unsupported route is to use the downloader from Nirvana 3.13 to get the reference, cache, and supplementary annotation files.

Download a test VCF file

Here's a toy VCF file you can play around with:

curl -O https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.vcf.gz

Running Nirvana

Once you have downloaded the data sets, use the following command to annotate your VCF:

dotnet bin/Release/netcoreapp2.1/Nirvana.dll \
-c Data/Cache/GRCh37/Both \
--sd Data/SupplementaryAnnotation/GRCh37 \
-r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \
-i HiSeq.10000.vcf.gz \
-o HiSeq.10000
  • the -c argument specifies the cache prefix
  • the --sd argument specifies the supplementary annotation directory
  • the -r argument specifies the compressed reference path
  • the -i argument specifies the input VCF path
  • the -o argument specifies the output filename prefix

When running Nirvana, performance metrics are shown as it evaluates each chromosome in the input VCF file:

---------------------------------------------------------------------------
Nirvana (c) 2020 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0
---------------------------------------------------------------------------

Initialization Time Positions/s
---------------------------------------------------------------------------
Cache 00:00:01.8
SA Position Scan 00:00:00.7 12902

Reference Preload Annotation Variants/s
---------------------------------------------------------------------------
chr1 00:00:02.3 00:00:04.5 2176

Summary Time Percent
---------------------------------------------------------------------------
Initialization 00:00:02.6 16.5 %
Preload 00:00:02.3 15.2 %
Annotation 00:00:04.5 29.0 %

Time: 00:00:14.7

The output will be a JSON file called HiSeq.10000.json.gz. Here's the full JSON file.

- - - - \ No newline at end of file diff --git a/3.21/core-functionality/canonical-transcripts/index.html b/3.21/core-functionality/canonical-transcripts/index.html deleted file mode 100644 index f638f90b..00000000 --- a/3.21/core-functionality/canonical-transcripts/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Canonical Transcripts | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

Canonical Transcripts

Overview

One of the more polarizing topics within annotation is the notion of canonical transcripts. Because of alternative splicing, we often have several transcripts for each gene. In the human genome, there are an average of 3.4 transcripts per gene (Tung, 2020). As scientists, we seem to have a need for identifying a representative example of a gene - even if there's no biological basis for the motivation.

Golden Helix Blog

A few years ago, the guys over at Golden Helix wrote an excellent post about the pitfalls and issues surrounding the identification of canonical transcripts: What’s in a Name: The Intricacies of Identifying Variants.

In Nirvana, we wanted to identify an algorithm for determining the canonical transcript and apply it consistently to all of our transcript data sources.

Known Algorithms

UCSC

UCSC publishes a list of canonical transcripts in its knownCanonical table which is available via the TableBrowser. Of the RefSeq data sources, it was the only one we could find that provided canonical transcripts:

The canonical transcript is defined as either the longest CDS, if the gene has translated transcripts, or the longest cDNA.

If you were to implement this and compare it with the knownCanonical table, you would see a lot of exceptions to the rule.

Ensembl

The Ensembl glossary states:

The canonical transcript is used in the gene tree analysis in Ensembl and does not necessarily reflect the most biologically relevant transcript of a gene. For human, the canonical transcript for a gene is set according to the following hierarchy:

  1. Longest CCDS translation with no stop codons.
  2. If no (1), choose the longest Ensembl/Havana merged translation with no stop codons.
  3. If no (2), choose the longest translation with no stop codons.
  4. If no translation, choose the longest non-protein-coding transcript.

ACMG

From the ACMG Guidelines for the Interpretation of Sequence Variants:

A reference transcript for each gene should be used and provided in the report when describing coding variants. The transcript should represent either the longest known transcript and/or the most clinically relevant transcript.

ClinVar

From the ClinVar paper:

When there are multiple transcripts for a gene, ClinVar selects one HGVS expression to construct a preferred name. By default, this selection is based on the first reference standard transcript identified by the RefSeqGene/LRG (Locus Reference Genomic) collaboration.

Unified Approach

Our approach is almost identical to the one Golden Helix discussed in their article:

  1. If we're looking at RefSeq, only consider NM & NR transcripts as candidates for canonical transcripts.
  2. Sort the transcripts in the following order:
    1. Locus Reference Genomic (LRG) entries occur before non-LRG entries
    2. Descending CDS length
    3. Descending transcript length
    4. Ascending accession number
  3. Grab the first entry
- - - - \ No newline at end of file diff --git a/3.21/core-functionality/gene-fusions/index.html b/3.21/core-functionality/gene-fusions/index.html deleted file mode 100644 index 9af8d83e..00000000 --- a/3.21/core-functionality/gene-fusions/index.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - -Gene Fusion Detection | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

Gene Fusion Detection

Overview

Gene fusions often result from large genomic rearrangements such as structural variants. While WGS secondary analysis pipelines typically contain alignment and variant calling stages, very few of them contain dedicated gene fusion callers. When they are included, they are usually associated with RNA-Seq pipelines where gene fusions can be readily observed.

Since gene fusions are frequently observed in cancer and since many sequencing experiments do not include paired RNA-Seq data, we have added gene fusion detection and annotation to Nirvana.

The rich diversity in gene fusion architectures and their likely mechanisms can be seen below:

Publication

Kumar-Sinha, C., Kalyana-Sundaram, S. & Chinnaiyan, A.M. Landscape of gene fusions in epithelial cancers: seq and ye shall find. Genome Med 7, 129 (2015)

Approach

Nirvana uses structural variant calls to evaluate if they form either putative intra-chromosomal or inter-chromosomal gene fusions. Let's consider two transcripts, NM_014206.3 (TMEM258) and NM_013402.4 (FADS1). Both of these genes are on the reverse strand in the genome. The vertical bar indicates the breakpoint where these transcripts are fused:

TMEM258 &amp; FADS1 transcripts

The above explains where the transcripts are fused together, but it doesn't explain in which orientation. By using the directionality encoded in the translocation breakend, we can rearrange these two transcripts in four ways:

TMEM258 &amp; FADS1 gene fusions

Only two of the combinations yields a fusion containing both the transcription start site (TSS) and the stop codon. In one case, we can even detect an in-frame gene fusion. -If only unidirectional gene fusions are desired, only these two fusions can be detected. If enable-bidirectional-fusions is enabled, all four cases can be identified.

Interpreting translocation breakends

At first glance, translocation breakends are a bit daunting. However, once you understand how they work, they're actually quite simple. For more information, we recommend reading section 5.4 in the VCF 4.2 specification.

REFALTMeaning
st[p[piece extending to the right of p is joined after t
st]p]reverse comp piece extending left of p is joined after t
s]p]tpiece extending to the left of p is joined before t
s[p[treverse comp piece extending right of p is joined before t

Variant Types

Specifically we can identify gene fusions from the following structural variant types:

  • deletions (<DEL>)
  • tandem_duplications (<DUP:TANDEM>)
  • inversions (<INV>)
  • translocation breakpoints (AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[)

Criteria

The following criteria must be met for Nirvana to identify a gene fusion:

  1. After accounting for gene orientation and genomic rearrangements, both transcripts must have the same orientation if enable-bidirectional-fusions is not enabled. They can have the same or different orientations if enable-bidirectional-fusions is set.
  2. Both transcripts must be from the same transcript source (i.e. we won't mix and match between RefSeq and Ensembl transcripts)
  3. Both transcripts must belong to different genes
  4. Both transcripts cannot have a coding region that already overlaps without the variant (i.e. in cases where two genes naturally overlap, we don't want to call a gene fusion)

ETV6/RUNX1 Example

ETV6/RUNX1 is the most common gene fusion in childhood B-cell precursor acute lymphoblastic leukemia (ALL). Patients with this translocation are associated with a good prognosis and excellent response to treatment.

VCF

Here's a simplified representation of the translocation breakends called by the Manta structural variant caller:

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
chr12 12026270 . C [chr21:36420865[C . PASS SVTYPE=BND
chr12 12026305 . A A]chr21:36420571] . PASS SVTYPE=BND
chr21 36420571 . C C]chr12:12026305] . PASS SVTYPE=BND
chr21 36420865 . C [chr12:12026270[C . PASS SVTYPE=BND

When you put these calls together, the resulting genomic rearrangement looks something like this:

JSON Output

The annotation for the first variant in the VCF looks like this:

{
"chromosome": "chr12",
"position": 12026270,
"refAllele": "C",
"altAlleles": [
"[chr21:36420865[C"
],
"filters": [
"PASS"
],
"cytogeneticBand": "12p13.2",
"clingen": [
{
"chromosome": "12",
"begin": 173786,
"end": 34835837,
"variantType": "copy_number_gain",
"id": "nsv995956",
"clinicalInterpretation": "pathogenic",
"phenotypes": [
"Decreased calvarial ossification",
"Delayed gross motor development",
"Feeding difficulties",
"Frontal bossing",
"Morphological abnormality of the central nervous system",
"Patchy alopecia"
],
"phenotypeIds": [
"HP:0002007",
"HP:0002011",
"HP:0002194",
"HP:0002232",
"HP:0005474",
"HP:0011968",
"MedGen:C0232466",
"MedGen:C1862862",
"MedGen:CN001816",
"MedGen:CN001820",
"MedGen:CN001989",
"MedGen:CN004852"
],
"observedGains": 1,
"validated": true
}
],
"variants": [
{
"vid": "12-12026270-C-[chr21:36420865[C",
"chromosome": "chr12",
"begin": 12026270,
"end": 12026270,
"isStructuralVariant": true,
"refAllele": "C",
"altAllele": "[chr21:36420865[C",
"variantType": "translocation_breakend",
"cosmicGeneFusions": [
{
"id": "COSF2245",
"numSamples": 249,
"geneSymbols": [
"ETV6",
"RUNX1"
],
"hgvsr": "ENST00000396373.4(ETV6):r.1_1283::ENST00000300305.3(RUNX1):r.504_6222",
"histologies": [
{
"name": "acute lymphoblastic B cell leukaemia",
"numSamples": 169
},
{
"name": "acute lymphoblastic leukaemia",
"numSamples": 80
}
],
"sites": [
{
"name": "haematopoietic and lymphoid tissue",
"numSamples": 249
}
],
"pubMedIds": [
7761424,
7780150,
8609706,
8751464,
8982044,
9067587,
9207408,
9226156,
9628428,
10463610,
10774753,
11091202,
12621238,
12661004,
12750722,
15104290,
15642392,
24557455,
26925663
]
}
],
"fusionCatcher": [
{
"genes": {
"first": {
"hgnc": "ETV6",
"isOncogene": true
},
"second": {
"hgnc": "RUNX1",
"isOncogene": true
}
},
"somaticSources": [
"DepMap CCLE",
"Cancer Genome Project",
"ChimerKB 4.0",
"ChimerPub 4.0",
"ChimerSeq 4.0",
"Known",
"Mitelman DB",
"OncoKB",
"TICdb"
]
}
],
"transcripts": [
{
"transcript": "ENST00000396373.4",
"source": "Ensembl",
"bioType": "protein_coding",
"introns": "5/7",
"geneId": "ENSG00000139083",
"hgnc": "ETV6",
"consequence": [
"transcript_variant",
"unidirectional_gene_fusion"
],
"geneFusions": [
{
"transcript": "ENST00000437180.1",
"bioType": "protein_coding",
"intron": 2,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000437180.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?",
"directionality":"uniDirectional"
},
{
"transcript": "ENST00000300305.3",
"bioType": "protein_coding",
"intron": 1,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000300305.3(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?",
"directionality":"uniDirectional"
},
{
"transcript": "ENST00000482318.1",
"bioType": "nonsense_mediated_decay",
"intron": 2,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000482318.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?",
"directionality":"uniDirectional"
},
{
"transcript": "ENST00000486278.2",
"bioType": "protein_coding",
"intron": 2,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000486278.2(RUNX1):r.?_-15+274::ENST00000396373.4(ETV6):r.1009+3367_?",
"directionality":"uniDirectional"
},
{
"transcript": "ENST00000455571.1",
"bioType": "protein_coding",
"intron": 2,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000455571.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?",
"directionality":"uniDirectional"
},
{
"transcript": "ENST00000475045.2",
"bioType": "protein_coding",
"intron": 11,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000475045.2(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?",
"directionality":"uniDirectional"
},
{
"transcript": "ENST00000416754.1",
"bioType": "protein_coding",
"intron": 2,
"geneId": "ENSG00000159216",
"hgnc": "RUNX1",
"hgvsr": "ENST00000416754.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?",
"directionality":"uniDirectional"
}
],
"isCanonical": true,
"proteinId": "ENSP00000379658.3"
},
{
"transcript": "NM_001987.4",
"source": "RefSeq",
"bioType": "protein_coding",
"introns": "5/7",
"geneId": "2120",
"hgnc": "ETV6",
"consequence": [
"transcript_variant",
"unidirectional_gene_fusion"
],
"geneFusions": [
{
"transcript": "NM_001754.4",
"bioType": "protein_coding",
"intron": 2,
"geneId": "861",
"hgnc": "RUNX1",
"hgvsr": "NM_001754.4(RUNX1):r.?_58+274::NM_001987.4(ETV6):r.1009+3367_?",
"directionality":"uniDirectional"
}
],
"isCanonical": true,
"proteinId": "NP_001978.1"
}
]
}
]
}
FieldTypeNotes
transcriptstringtranscript ID
bioTypestringdescriptions of the biotypes from Ensembl
exonintexon that contained fusion breakpoint
intronintintron that contained fusion breakpoint
geneIdstringgene ID. e.g. ENSG00000116062
hgncstringgene symbol. e.g. MSH6
hgvsrstringHGVS RNA nomenclature

Gene Fusion Data Sources

To provide more context to our gene fusions, we provide the following gene fusion data sources:

Consequences

When a gene fusion is identified, we add the following Sequence Ontology consequence:

              "consequence": [
"transcript_variant",
"gene_fusion"
],
  • If both transcripts have the same orientation, we label it as unidirectional_gene_fusion, if they have different orientations, we label it as bidirectional_gene_fusion
  • If both unidirectional and bidirectional ones are detected, we label it as gene_fusion.

Gene Fusions Section

The geneFusions section is contained within the object of the originating transcript. It will contain all the pairwise gene fusions that obey the criteria outline above. In the case of ENST00000396373.4, there 7 other Ensembl transcripts that would produce a gene fusion. For NM_001987.4, there was only one transcript (NM_001754.4) that produce a gene fusion.

For each originating transcript, we report the following for each partner transcript:

  • transcript ID
  • gene ID
  • HGNC gene symbol
  • transcript bio type (e.g. protein_coding)
  • intron or exon number containing the breakpoint
  • HGVS RNA notation
  • gene fusion directionality
tip

Before Nirvana 3.15, we provided HGVS coding notation. However, HGVS r. notation is more appropriate for these types fusion splicing events (see HGVS SVD-WG007).

          "geneFusions": [
{
"transcript": "NM_001754.4",
"bioType": "protein_coding",
"intron": 2,
"geneId": "861",
"hgnc": "RUNX1",
"hgvsr": "NM_001754.4(RUNX1):r.?_58+274::NM_001987.4(ETV6):r.1009+3367_?",
"directionality":"uniDirectional"
}
],

The HGVS RNA notation above indicates that the gene fusion starts with NM_001754.4 (RUNX1) until CDS position 58 and continues with NM_001987.4 (ETV6). 1009+3367 indicates that the fusion occurred 3367 bp within intron 2.

- - - - \ No newline at end of file diff --git a/3.21/core-functionality/mnv-recomposition/index.html b/3.21/core-functionality/mnv-recomposition/index.html deleted file mode 100644 index 3e8034e0..00000000 --- a/3.21/core-functionality/mnv-recomposition/index.html +++ /dev/null @@ -1,22 +0,0 @@ - - - - - - - -MNV Recomposition | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

MNV Recomposition

Overview

Most annotation tools handle variants independently. The problem with this approach is that nearby variants could affect the same codon leading to a very different annotation. For example, consider the following example (Danecek, 2017):

When handled independently, the two variants (C→T & G→A) would be annotated as missense annotations. However, if we consider them together, the resulting MNV would yield a stop gain.

By default, Nirvana identifies these types of cases where two or more SNVs would affect the same codon. In addition, it's able to perform this operation on VCFs containing large numbers of samples (we've tested this on 2,500+ samples using the 1000 Genomes Project VCF files).

Publication

Petr Danecek, Shane A McCarthy, BCFtools/csq: haplotype-aware variant consequences, Bioinformatics, Volume 33, Issue 13, 1 July 2017, Pages 2037–2039

Supported variant types

At the moment, Nirvana only supports recomposing multiple SNVs into an MNV. The Danecek paper makes a compelling case for supporting frameshifting variants paired with frame-restoring variants. We've also received requests for supporting the recomposition of an SNV with insertions and deletions. While this is something we've looked into, it represents functionality that many of our clinical customers are not yet comfortable with.

Criteria

Nirvana will recompose a set of SNVs if two or more SNVs are located in the same codon for any codon in any of the overlapping transcripts.

The following criteria must also be met for at least one sample:

  1. Genotypes are provided for the VCF variants and all variants are in phase or homozygous variant.
  2. All the available phase set IDs are the same (homozygous variants are available to all phase sets)
  3. The genotype ploidy for all the variants are the same.
  4. No unsupported variant type (i.e. insertion or deletion) overlaps the recomposed variants
  5. The first and last base in at least one of the recomposed alleles must be non-reference.

Examples

During variant recomposition, if two SNVs affect the same codon, it becomes the seed codon. If there are SNVs in the adjacent codons, they will be aggregated into the seed codon.

  • Three SNVs in two adjacent codons. The recomposed alternate allele is ATAG: -

  • Three SNVs in two adjacent codons (larger distance). The recomposed alternate allele is ATATCC: -

  • Nirvana can use multiple reading frames to aggregate the seed codon. In this example, the seed codon is highlighted in green. If we look at reading frame 1, we see that the T→A variant occurs in the ACT codon. The adjacent codon to the left also has a variant C→T. As a result, there can be up to four bases between SNVs when aggregating the flanking codons. The recomposed alternate allele is TTCACATAGCACTCAC: -

  • Nothing will be recomposed if there's no seed codon: -

Multiple Samples

Recomposing variants while handling multiple samples can be complex. The recomposition criteria described above often leads to sample-specific recomposed variants. Here we show the recomposition of three variants with sample-specific criteria marked in bold:

POSREFALTSample 1Sample 2Sample 3
Decomposed Variant 1100AC0|10|11|1
Decomposed Variant 2101CG0/11|10|0
Decomposed Variant 3102TA1|1.0|1
Recomposed Variant 1100ACAG, CG.1|2.
Recomposed Variant 2100ACTCCT, CCA..1|2

In the example above, the heterozygous genotype in sample 1 at position 101 would prevent the MNVs from being recomposed. Similarly, the unknown genotype for sample 2 at position 102 would produce a smaller MNV than the one expressed for sample 3.

Phase Sets

Homozygous variants, same phase set

Recomposed phase set becomes . since homozygous variants belong to all phase sets.

POSREFALTGenotypePhase Set
Decomposed Variant 1100AT1|1567
Decomposed Variant 2101CG1|1567
Recomposed Variant100ACTG1|1.

Mixing phased and unphased variants

POSREFALTGenotypePhase Set
Decomposed Variant 1100AT0|1567
Decomposed Variant 2101CG1/1.
Recomposed Variant100ACAG,TG1|2567

Variants in different phase sets

POSREFALTGenotypePhase Set
Decomposed Variant 1100AT0|1567
Decomposed Variant 2101CG1|1890
Recomposed Variant100ACAG,TG1|2.

Unphased homozygous variants

POSREFALTGenotypePhase Set
Decomposed Variant 1100AT1/1.
Decomposed Variant 2101CG1/1.
Recomposed Variant100ACTG1/1.

Homozygous variants are not commutative

POSREFALTGenotypePhase Set
Decomposed Variant 1100AT0|1567
Decomposed Variant 2101CG1|1567
Decomposed Variant 3102GT0|1890

In this example, the homozygous variant at position 101 cannot bridge the gap between other two variants since there could be a switching error between phase sets 567 & 890. As a result, we have to create two overlapping MNVs:

POSREFALTGenotypePhase Set
Recomposed Variant 1100ACAG, TG1|2567
Recomposed Variant 2101CGGG, GT1|2890

Conflicting Genotypes

JSON Output

Given the following VCF entries:

#CHROM  POS ID  REF ALT QUAL    FILTER  INFO    FORMAT  S1  S2  S3
chr1 12861477 . T C . PASS . GT:PS 0/0:. 0/0:. 0|1:12861477
chr1 12861478 . G A . PASS . GT:PS 0/0:. 0/0:. 0|1:12861477

Each original variant would be annotated as usual. The difference is that both will now have a isDecomposedVariant flag set to true in addition to an entry in the linkedVids field that points to the new MNV:

{
"chromosome":"chr1",
"position":12861477,
"refAllele":"T",
"altAlleles":[
"C"
],
"filters":[
"PASS"
],
"samples":[
{
"genotype":"0/0",
},
{
"genotype":"0/0",
},
{
"genotype":"0|1",
}
],
"variants":[
{
"vid":"1-12861477-T-C",
"chromosome":"chr1",
"begin":12861477,
"end":12861477,
"refAllele":"T",
"altAllele":"C",
"variantType":"SNV",
"isDecomposedVariant":true,
"linkedVids":[
"1-12861477-TG-CA"
],
"hgvsg":"NC_000001.11:g.12861477T>C",
"transcripts":[ ... ]
}
]
},
{
"chromosome":"chr1",
"position":12861478,
"refAllele":"G",
"altAlleles":[
"A"
],
"filters":[
"PASS"
],
"samples":[
{
"genotype":"0/0",
},
{
"genotype":"0/0",
},
{
"genotype":"0|1",
}
],
"variants":[
{
"vid":"1-12861478-G-A",
"chromosome":"chr1",
"begin":12861478,
"end":12861478,
"refAllele":"G",
"altAllele":"A",
"variantType":"SNV",
"isDecomposedVariant":true,
"linkedVids":[
"1-12861477-TG-CA"
],
"hgvsg":"NC_000001.11:g.12861478G>A",
"transcripts":[ ... ]
}
]
}

The recomposed variant gets a separate entry where the isRecomposedVariant flag is set to true and the linkedVids field links to the constituent SNVs:

    {
"chromosome": "chr1",
"position": 12861477,
"refAllele": "TG",
"altAlleles": [
"CA"
],
"filters": [
"PASS"
],
"cytogeneticBand": "1p36.21",
"samples": [
{
"genotype": "0|0"
},
{
"genotype": "0|0"
},
{
"genotype": "0|1"
}
],
"variants": [
{
"vid": "1-12861477-TG-CA",
"chromosome": "chr1",
"begin": 12861477,
"end": 12861478,
"refAllele": "TG",
"altAllele": "CA",
"variantType": "MNV",
"isRecomposedVariant": true,
"linkedVids": [
"1-12861477-T-C",
"1-12861478-G-A"
],
"hgvsg": "NC_000001.11:g.12861477_12861478inv",
"transcripts":[ ... ]
]
}
]
},
Recomposed QUAL, FILTER, and GQ

Although the example above does not demonstrate it, Nirvana tries to set the quality score, filter, and genotype quality (GQ) for the recomposed variant. The QUAL score is calculated to be the minimum QUAL score for all the constituent SNVs. The same method is used for the genotype quality (GQ) scores. For the filters field, PASS will be used if all constituent variants passed their filters, otherwise we set it to FilteredVariantsRecomposed.

- - - - \ No newline at end of file diff --git a/3.21/core-functionality/variant-ids/index.html b/3.21/core-functionality/variant-ids/index.html deleted file mode 100644 index b2135eca..00000000 --- a/3.21/core-functionality/variant-ids/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Variant IDs | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

Variant IDs

Overview

Many downstream tools use a variant identifier to store annotation results. We've standardized on using variant identifiers (VIDs) that originated from the notation used by the Broad Institute.

The Broad VID scheme is not only simple, but it has the advantage that a user could create a bare bones VCF entry from the information captured in the identifier. One of the limitations of the Broad VID scheme is that it does not define how to handle structural variants. Our VID scheme attempts to fill that gap.

Conventions
  • all chromosomes use Ensembl style notation (i.e. 22 instead of chr22)
  • for a reference variant (i.e. no alt allele), replace the period (.) with the reference base
  • padding bases are used, neither the reference nor alternate allele can be empty
  • some large variant callers lazily output N for the reference allele. If this is the case, replace it with the true reference base

Small Variants

VCF Examples

chr1    66507   .   T   A   184.45  PASS    .
chr1 66521 . T TATATA 144.53 PASS .
chr1 66572 . GTA G,GTACTATATATTATA 45.45 PASS .

Format

chromosomepositionreference allelealternate allele

VID Examples

  • 1-66507-T-A
  • 1-66521-T-TATATA
  • 1-66572-GTA-G
  • 1-66572-G-GTACTATATATTA

Translocation Breakends

VCF Example

chr1    2617277 .   A   AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[  .   PASS    SVTYPE=BND

Format

chromosomepositionreference allelealternate allele

VID Example

  • 1-2617277-A-AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[

All Other Structural Variants

VCF Examples

chr1    1000    .   G   <ROH>   .   PASS    END=3001000;SVTYPE=ROH
chr1 1350082 . G <DEL> . PASS END=1351320;SVTYPE=DEL
chr1 1477854 . C <DUP:TANDEM> . PASS END=1477984;SVTYPE=DUP
chr1 1477968 . T <INS> . PASS END=1477968;SVTYPE=INS
chr1 1715898 . N <DUP> . PASS SVTYPE=CNV;END=1750149
chr1 2650426 . N <DEL> . PASS SVTYPE=CNV;END=2653074
chr2 321682 . T <INV> . PASS SVTYPE=INV;END=421681
chr20 2633403 . G <STR2> . PASS END=2633421

Format

chromosomepositionend positionreference allelealternate alleleSVTYPE

VID Examples

  • 1-1000-3001000-G-<ROH>-ROH
  • 1-1350082-1351320-G-<DEL>-DEL
  • 1-1477854-1477984-C-<DUP:TANDEM>-DUP
  • 1-1477968-1477968-T-<INS>-INS
  • 1-1715898-1750149-A-<DUP>-CNV (replace the N with A)
  • 1-2650426-2653074-N-<DEL>-CNV (keep the N)
  • 2-321682-421681-T-<INV>-INV
  • 20-2633403-2633421-G-<STR2>-STR
- - - - \ No newline at end of file diff --git a/3.21/data-sources/1000Genomes-snv-json/index.html b/3.21/data-sources/1000Genomes-snv-json/index.html deleted file mode 100644 index 0d3d271b..00000000 --- a/3.21/data-sources/1000Genomes-snv-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -1000Genomes-snv-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

1000Genomes-snv-json

"oneKg":{
"allAf":0.200879,
"afrAf":0.210287,
"amrAf":0.139769,
"easAf":0.275794,
"eurAf":0.181909,
"sasAf":0.173824,
"allAn":5008,
"afrAn":1322,
"amrAn":694,
"easAn":1008,
"eurAn":1006,
"sasAn":978,
"allAc":1006,
"afrAc":278,
"amrAc":97,
"easAc":278,
"eurAc":183,
"sasAc":170
}
FieldTypeNotes
allAffloatallele frequency for all populations. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
allAnintallele number for all populations. Non-zero integer.
afrAffloatallele frequency for the African super population. Range: 0 - 1.0
afrAcintallele count for the African super population. Integer.
afrAnintallele number for the African super population. Non-zero integer.
amrAffloatallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
amrAcintallele count for the Ad Mixed American super population. Integer.
amrAnintallele number for the Ad Mixed American super population. Non-zero integer.
easAffloatallele frequency for the East Asian super population. Range: 0 - 1.0
easAcintallele count for the East Asian super population. Integer.
easAnintallele number for the East Asian super population. Non-zero integer.
eurAffloatallele frequency for the European super population. Range: 0 - 1.0
eurAcintallele count for the European super population. Integer.
eurAnintallele number for the European super population. Non-zero integer.
sasAffloatallele frequency for the South Asian super population. Range: 0 - 1.0
sasAcintallele count for the South Asian super population. Integer.
sasAnintallele number for the South Asian super population. Non-zero integer.
- - - - \ No newline at end of file diff --git a/3.21/data-sources/1000Genomes-sv-json/index.html b/3.21/data-sources/1000Genomes-sv-json/index.html deleted file mode 100644 index cfc3a77e..00000000 --- a/3.21/data-sources/1000Genomes-sv-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -1000Genomes-sv-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

1000Genomes-sv-json

"oneKg":[
{
"chromosome":"1",
"begin":1595369,
"end":1612441,
"variantType": "copy_number_variation",
"id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",
"allAn": 5008,
"allAc": 2702,
"allAf": 0.539537,
"afrAf": 0.6052,
"amrAf": 0.3675,
"eurAf": 0.5357,
"easAf": 0.5368,
"sasAf": 0.5797,
"reciprocalOverlap": 0.07555
}
],
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring
idstring
allAnintegerallele number for all populations. Non-zero integer.
allAcintegerallele count for all populations. Integer.
allAffloating pointallele frequency for all populations. Range: 0 - 1.0
afrAffloating pointallele frequency for the African super population. Range: 0 - 1.0
amrAffloating pointallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
eurAffloating pointallele frequency for the European super population. Range: 0 - 1.0
easAffloating pointallele frequency for the East Asian super population. Range: 0 - 1.0
sasAffloating pointallele frequency for the South Asian super population. Range: 0 - 1.0
reciprocalOverlapfloating pointrange: 0 - 1.
- - - - \ No newline at end of file diff --git a/3.21/data-sources/1000Genomes/index.html b/3.21/data-sources/1000Genomes/index.html deleted file mode 100644 index ac41ebfd..00000000 --- a/3.21/data-sources/1000Genomes/index.html +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - - -1000 Genomes | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

1000 Genomes

Overview

The goal of the 1000 Genomes Project was to find most genetic variants with frequencies of at least 1% in the populations studied. It was the first project to sequence the genomes of a large number of people, to provide a comprehensive resource on human genetic variation. Data from the 1000 Genomes Project was quickly made available to the worldwide scientific community through freely accessible public databases.

Publication

Sudmant, P., Rausch, T., Gardner, E. et al. An integrated map of structural variation in 2,504 human genomes. Nature 526, 75–81 (2015). https://doi.org/10.1038/nature15394

Populations

Small Variants

VCF File Parsing

The original VCF files come with allele frequency fields (e.g. ALL_AF, AMR_AF) but we recompute them using allele counts and allele numbers in order to get 6 digit precision. The allele counts and allele numbers (e.g. AMR_AC, AMR_AN) are not expressed in the INFO field. Instead the genotypes need to be parsed to compute that information. Our team converted the original data to VCF entries with allele counts and allele numbers like the following.

#CHROM  POS ID  REF ALT QUAL    FILTER  INFO
1 15274 rs62636497 A G,T 100 PASS AC=1739,3210;AF=0.347244,0.640974;AN=5008;NS=2504;DP=23255;EAS_AF=0.4812,0.5188;AMR_AF=0.2752,0.7205;AFR_AF=0.323,0.6369;EUR_AF=0.2922,0.7078;SAS_AF=0.3497,0.6472;AA=g|||;VT=SNP;MULTI_ALLELIC;EAS_AN=1008;EAS_AC=485,523;EUR_AN=1006;EUR_AC=294,712;AFR_AN=1322;AFR_AC=427,842;AMR_AN=694;AMR_AC=191,500;SAS_AN=978;SAS_AC=342,633

The ancestral allele, if it exists, is the first value in the pipe separated AA fields (the Indel specific REF, ALT, IndelType fields are ignored).

We parse the VCF file and extract the following fields from INFO:

  • AA
  • AC
  • AN
  • EAS_AN
  • AMR_AN
  • AFR_AN
  • EUR_AN
  • SAS_AN
  • EAS_AC
  • AMR_AC
  • AFR_AC
  • EUR_AC
  • SAS_AC

Conflict Resolution

We have observed conflicting allele frequency information in the source. Take the following example:

#CHROM  POS ID  REF ALT QUAL    FILTER  INFO
1 20505705 rs35377696 C CTCTG,CTG,CTGTG 100 PASS AC=46,1513,152;AF=0.0091853,0.302117,0.0303514;
1 20505705 rs35377696 C CTG 100 PASS AC=4;AF=0.000798722;

That is, the variant 1-20505705-C-CTG has conflicting entries. To get an idea of how frequently we observe this, here is a table summarizing ChrX and all chromosomes. Note that almost all such entries are found in ChrX.

Chromosome# of alleles# of conflicting allelespercentage
chrX83480027330.33%
Total2141309827430.013%

Currently, we removed the allele frequency of the conflicting allele (i.e., insertion TG in the example) but keep allele frequencies of all other alleles in the VCF line.

Potential Alternate Solutions

  • Remove all alleles that are contained in the vcf lines which have conflicting allele. (Recommended by 1000 genome group Holly Zheng-Bradley, 7/29/2015)
  • Recalculate the allele frequency for the conflicting allele.
  • Pick the allele frequency that has the highest data support.

Download URL

GRCh37 -GRCh38

JSON Output

"oneKg":{
"allAf":0.200879,
"afrAf":0.210287,
"amrAf":0.139769,
"easAf":0.275794,
"eurAf":0.181909,
"sasAf":0.173824,
"allAn":5008,
"afrAn":1322,
"amrAn":694,
"easAn":1008,
"eurAn":1006,
"sasAn":978,
"allAc":1006,
"afrAc":278,
"amrAc":97,
"easAc":278,
"eurAc":183,
"sasAc":170
}
FieldTypeNotes
allAffloatallele frequency for all populations. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
allAnintallele number for all populations. Non-zero integer.
afrAffloatallele frequency for the African super population. Range: 0 - 1.0
afrAcintallele count for the African super population. Integer.
afrAnintallele number for the African super population. Non-zero integer.
amrAffloatallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
amrAcintallele count for the Ad Mixed American super population. Integer.
amrAnintallele number for the Ad Mixed American super population. Non-zero integer.
easAffloatallele frequency for the East Asian super population. Range: 0 - 1.0
easAcintallele count for the East Asian super population. Integer.
easAnintallele number for the East Asian super population. Non-zero integer.
eurAffloatallele frequency for the European super population. Range: 0 - 1.0
eurAcintallele count for the European super population. Integer.
eurAnintallele number for the European super population. Non-zero integer.
sasAffloatallele frequency for the South Asian super population. Range: 0 - 1.0
sasAcintallele count for the South Asian super population. Integer.
sasAnintallele number for the South Asian super population. Non-zero integer.

Structural Variants

VCF File Parsing

The VCF files contain entries like the following:

#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103
22 16050654 esv3647175;esv3647176;esv3647177;esv3647178 A <CN0>,<CN2>,<CN3>,<CN4> 100 PASS AC=9,87,599,20;AF=0.00179712,0.0173722,0.119609,0.00399361;AN=5008;CS=DUP_gs;END=16063474;NS=2504;SVTYPE=CNV;DP=22545;EAS_AF=0.001,0.0169,0.2361,0.0099;AMR_AF=0,0.0101,0.219,0.0072;AFR_AF=0.0061,0.0363,0.0053,0;EUR_AF=0,0.007,0.0944,0.003;SAS_AF=0,0.0082,0.1094,0.002;VT=SV GT 3|0 0|0 0|0 0|0 0|0 0|0 0|4

Please note that, CNVs are allele-specific. For example, HG00096 is effectively copy number 4, which would be a net gain on chr22.

1000 Genomes contains 5 types of structural variants:

  • CNV
  • DEL
  • DUP
  • INS
  • INV

Since data of 1000 genomes is provided in VCF format, we assume that the coordinates follow the vcf format, i.e., there is a padding base for symbolic alleles. So all the interval can be interpreted as [BEGIN+1, END]. -Similarly, for all other variant types except insertion, END is far larger than BEGIN. The distribution of BEGIN and END for insertions is summarized below.

Insertion issues

  • END = BEGIN for 6/165
  • END = BEGIN+2 for 93/165
  • END = BEGIN+3 for 11/165
  • END = BEGIN+4 for 11/165
  • END – BEGIN range from 5 to 1156 for others.

Converting VCF svTypes to SO sequence alterations

The svType will be captured in our JSON file under the sequenceAlteration key. Here's the translation we'll use according to svType in 1000 Genomes.

svTypeAlternative Alleles contain <CN*>sequenceAlteration
ALUFALSEmobile_element_insertion
DUPTRUEcopy_number_gain
CNVTRUEcopy_number_gain (observed_gains >0 and observed_losses =0)
copy_number_loss (observed_gains = 0 and observed_losses > 0)
copy_number_variation (otherwise)
DELTRUEcopy_number_loss
LINE1FALSEmobile_element_insertion
SVAFALSEmobile_element_insertion
INVFALSEinversion
INSFALSEinsertion

Exceptions

We discard structural variants without END

#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103
21 9495848 esv3646347 A <INS:ME:LINE1> 100 PASS AC=1543;AF=0.308107;AN=5008;CS=L1_umary;MEINFO=LINE1,5669,6005,+;NS=2504;SVLEN=336;SVTYPE=LINE1;TSD=null;DP=20015;EAS_AF=0.3125;AMR_AF=0.2911;AFR_AF=0.3026;EUR_AF=0.2922;SAS_AF=0.3395;VT=SV GT 0|0 1|1 1|0 0|1 1|0 1|0 0|0

CNVs in chrY

  • No other types of structural variants exist in chrY
  • Since copy number is provided in genotype field, we directly parse the copy number from "CN" field.
  • For most CNVs in chrY, the reference copy number is 1, but the refence number for CNVs in segmental duplication sites is 2 (<CN2> in the 2nd example). All segmental duplication calls have identifiers starting with GS_SD_M2.
#CHROM  POS     ID      REF     ALT     QUAL    FILTER  INFO    FORMAT  HG00096 HG00101 HG00103 HG00105 HG00107 HG00108
Y 2888555 CNV_Y_2888555_3014661 T <CN2> 100 PASS AC=1;AF=0.000817661;AN=1223;END=3014661;NS=1233;SVTYPE=CNV;AMR_AF=0.0000;AFR_AF=0.0000;EUR_AF=0.0000;SAS_AF=0.0019;EAS_AF=0.0000;VT=SV GT:CN:CNL:CNP:CNQ:GP:GQ:PL 0:1:-1000,0,-58.45:-1000,0,-61.55:99:0,-61.55:99:0,585 0:1:-296.36,0,-16.6:-300.46,0,-19.7:99:0,-19.7:99:0,166 0:1:-1000,0,-39.44:-1000,0,-42.54:99:0,-42.54:99:0,394
Y 6128381 GS_SD_M2_Y_6128381_6230094_Y_9650284_9752225 C <CN1>,<CN3> 100 PASS AC=4,2;AF=0.00327065,0.00163532;AN=1223;END=6230094;NS=1233;SVTYPE=CNV;AMR_AF=0.0029,0.0029;AFR_AF=0.0016,0.0016;EUR_AF=0.0000,0.0000;SAS_AF=0.0038,0.0000;EAS_AF=0.0000,0.0000;VT=SV;EX_TARGET GT:CN:CNL:CNP:CNQ:GP:GQ 0:2:-1000,-138.78,0,-38.53:-1000,-141.27,0,-41.33:99:0,-141.27,-41.33:99 0:2:-1000,-53.32,0,-17.85:-1000,-55.81,0,-20.64:99:0,-55.81,-20.64:99 0:2:-1000,-71.83,0,-32.5:-1000,-74.32,0,-35.29:99:0,-74.32,-35.29:99 0:2:-1000,-60.96,0,-20.29:-1000,-63.45,0,-23.08:99:0,-63.45,-23.08:99 0:2:-1000,-77.6,0,-31.45:-1000,-80.09,0,-34.24:99:0,-80.09,-34.24:99

JSON Output

"oneKg":[
{
"chromosome":"1",
"begin":1595369,
"end":1612441,
"variantType": "copy_number_variation",
"id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",
"allAn": 5008,
"allAc": 2702,
"allAf": 0.539537,
"afrAf": 0.6052,
"amrAf": 0.3675,
"eurAf": 0.5357,
"easAf": 0.5368,
"sasAf": 0.5797,
"reciprocalOverlap": 0.07555
}
],
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring
idstring
allAnintegerallele number for all populations. Non-zero integer.
allAcintegerallele count for all populations. Integer.
allAffloating pointallele frequency for all populations. Range: 0 - 1.0
afrAffloating pointallele frequency for the African super population. Range: 0 - 1.0
amrAffloating pointallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
eurAffloating pointallele frequency for the European super population. Range: 0 - 1.0
easAffloating pointallele frequency for the East Asian super population. Range: 0 - 1.0
sasAffloating pointallele frequency for the South Asian super population. Range: 0 - 1.0
reciprocalOverlapfloating pointrange: 0 - 1.
- - - - \ No newline at end of file diff --git a/3.21/data-sources/amino-acid-conservation-json/index.html b/3.21/data-sources/amino-acid-conservation-json/index.html deleted file mode 100644 index 8d8f0e24..00000000 --- a/3.21/data-sources/amino-acid-conservation-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -amino-acid-conservation-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

amino-acid-conservation-json

"aminoAcidConservation": {
"scores": [0.34]
}
FieldTypeNotes
aminoAcidConservationobject
scoresobject array of doublespercent conserved with respect to human amino acid residue. Range: 0.01 - 1.00
- - - - \ No newline at end of file diff --git a/3.21/data-sources/amino-acid-conservation/index.html b/3.21/data-sources/amino-acid-conservation/index.html deleted file mode 100644 index fb924e7e..00000000 --- a/3.21/data-sources/amino-acid-conservation/index.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - -Amino Acid Conservation | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

Amino Acid Conservation

Overview

Amino acid conservation scores are obtained from multiple alignments of vertebrate exomes to the human ones. The score indicate the frequency with which a particular AA is observed in Humans.

Publication

Siepel A, Bejerano G, Pedersen JS, Hinrichs AS, Hou M, Rosenbloom K, Clawson H, Spieth J, Hillier LW, Richards S, et al. Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. Genome Res. 2005 Aug;15(8):1034-50. (http://www.genome.org/cgi/doi/10.1101/gr.3715005)

FASTA File

The exon alignments are provided in FASTA files as follows:

>ENST00000641515.2_hg38_1_2 3 0 0 chr1:65565-65573+
MKK
>ENST00000641515.2_panTro4_1_2 3 0 0 chrUn_GL393541:146907-146915+
MKK
>ENST00000641515.2_gorGor3_1_2 3 0 0
---
>ENST00000641515.2_ponAbe2_1_2 3 0 0 chr15:99141417-99141425-
MKK
>ENST00000641515.2_hg38_2_2 324 0 0 chr1:69037-70008+
VTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLLHFFGGSEMVILIAMGFDRYIAICKPLHYTTIMCGNACVGIMAVTWGIGFLHSVSQLAFAVHLLFCGPNEVDSFYCDLPRVIKLACTDTYRLDIMVIANSGVLTVCSFVLLIISYTIILMTIQHRPLDKSSKALSTLTAHITVVLLFFGPCVFIYAWPFPIKSLDKFLAVFYSVITPLLNPIIYTLRNKDMKTAIRQLRKWDAHSSVKFZ
>ENST00000641515.2_panTro4_2_2 324 0 0 chrUn_GL393541:151333-152303+

Parsing FASTA

For each Ensembl transcript, we will need to aggregate all the exons together for each of the 100 species. From there, we should get a full alignment that can be used to determine conservation. For example, for ENST00000641515.2 we have:

Human (hg38) MKKVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLL
Chimp MKKVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFL-MLFFVFYGGIVFGNLLIVRIVVSDSHLHSPMYFLLANLSLIDLSLCSVTAPKMITDFFSQRKVISFKGCLVQIFLL
Gorilla ----------------------------------------------------------------------------------------------------------------------
Orangutan MKKVTAEAISWNESTSKTNNSVVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVIIVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLL
Gibbon ----------------------------------------------------------------------------------------------------------------------
Rhesus MKKVTEAAISWNESTSETNNSIVTEFIFLGLSDSQELQIFLFVLFLVFYGGIVFGNLLIVITVVSDSHLHSPMYLLLANLSVVDLSLSSVTAPKMITDFFSQRKAISFKGCLVQIFLL
Macaque MKKVTEAAISWNESTSETNNSIVTEFIFLGLSDSQELQIFLFVLFLVFYGGIVFGNLLIVITVVSDSHLHSPMYLLLANLSVIDLSLSSVTAPKMITDFFSQRKAISFKGCLVQIFLL

If we look at position 6, we see that humans have an Alanine (A) residue. This residue is shared by Chimp and Orangutan. However, Rhesus and Macaque have a Glutamic acid (E) residue at that position. Moreover, Gorilla and Gibbon don't even have data for that transcript. -For position 6, we would say that we have 43% conservation (3/7) since three organisms share the same residue as humans.

Assigning scores to Nirvana transcripts

The source FASTA file comes with Ensembl/UCSC transcript ids of the transcripts used for alignments. The Nirvana cache has RefSeq and Ensembl transcripts and our first attempt was to map the given Ensembl/UCSC ids to their equivalent RefSeq/Ensembl ids. This attempt was unsuccessful since UCSC Table Browser provided mapping without version numbers. So we proceeded as follows:

  • Take proteins which have a unique mapping (and hence one set of conservation scores). For ones that mapped to both ChrX and ChrY, we accepted the one from ChrX.
  • A Nirvana transcript having an exact peptide sequence match with a uniquely aligned protein is assigned the corresponding conservation scores.

Unfortunately this left us with a very small number of transcripts having conservation scores.

GRCh37

  • Source FASTA contained 41957 protein alignments.
  • 38165 proteins had unique scores.
  • 88 aligned proteins existed in Nirvana cache.
  • 118 transcripts had conservation scores.

GRCh38

  • Source FASTA contained 110024 protein alignments.
  • 88961 proteins had unique scores.
  • 11688 aligned proteins existed in Nirvana cache.
  • 12098 transcripts had conservation scores.

Download URL

GRCh37: http://hgdownload.soe.ucsc.edu/goldenPath/hg19/multiz100way/alignments/knownGene.exonAA.fa.gz

GRCh38: http://hgdownload.soe.ucsc.edu/goldenPath/hg38/multiz100way/alignments/knownGene.exonAA.fa.gz

JSON Output

Conservation scores are reported in the transcript section. One score is reported for each alt allele

"aminoAcidConservation": {
"scores": [0.34]
}
FieldTypeNotes
aminoAcidConservationobject
scoresobject array of doublespercent conserved with respect to human amino acid residue. Range: 0.01 - 1.00
- - - - \ No newline at end of file diff --git a/3.21/data-sources/cancer-hotspots/index.html b/3.21/data-sources/cancer-hotspots/index.html deleted file mode 100644 index c9808b5d..00000000 --- a/3.21/data-sources/cancer-hotspots/index.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - -Cancer Hotspots | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

Cancer Hotspots

Overview

Cancer Hotspots, a resource for statistically significant mutations in cancer. It provides information about statistically significantly recurrent mutations identified in large scale cancer genomics data.

Publication

Chang MT, Bhattarai TS, Schram AM, Bielski CM, Donoghue MTA, Jonsson P, Chakravarty D, Phillips S, Kandoth C, Penson A, Gorelick A, Shamu T, Patel S, Harris C, Gao J, Sumer SO, Kundra R, Razavi P, Li BT, Reales DN, Socci ND, Jayakumaran G, Zehir A, Benayed R, Arcila ME, Chandarlapaty S, Ladanyi M, Schultz N, Baselga J, Berger MF, Rosen N, Solit DB, Hyman DM, Taylor BS. Accelerating Discovery of Functional Mutant Alleles in Cancer. Cancer Discov. 2018 Feb;8(2):174-183. doi: 10.1158/2159-8290.CD-17-0321. Epub 2017 Dec 15. PMID: 29247016; PMCID: PMC5809279.

Chang MT, Asthana S, Gao SP, Lee BH, Chapman JS, Kandoth C, Gao J, Socci ND, Solit DB, Olshen AB, Schultz N, Taylor BS. Identifying recurrent mutations in cancer reveals widespread lineage diversity and mutational specificity. Nat Biotechnol. 2016 Feb;34(2):155-63. doi: 10.1038/nbt.3391. Epub 2015 Nov 30. PMID: 26619011; PMCID: PMC4744099.

Data extraction

Nirvana currently parses SNV and indel tabs from hotspots_v2.xls file to extract the relevant content.

Example

SNV

Hugo_Symbol     Amino_Acid_Position     log10_pvalue    Mutation_Count  Reference_Amino_Acid    Total_Mutations_in_Gene Median_Allele_Freq_Rank Allele_Freq_Rank        Variant_Amino_Acid   Codon_Change     Genomic_Position        Detailed_Cancer_Types   Organ_Types     Tri-nucleotides Mutability      mu_protein      Total_Samples   Analysis_Type   qvalue  tm      qvalue_pancanIs_repeat        seq     length  align100        pad12entropy    pad24entropy    pad36entropy    TP      reason  n_MSK   n_Retro judgement       inNBT   inOncokb        ref     qvaluect     ct       Samples
NRAS 61 -1237.69143477067 422 Q:422 620 0.333333333333333 295|0.692307692307692:0.733333333333333:0.2:0.933333333333333:1:0.25:0.666666666666667:1:0.25:0.571428571428571:1:1:0.5:0.363636363636364:0.428571428571429:0.0833333333333333:1:1:1:1:0.5:1:0.125:0.363636363636364:0.173913043478261:0.25:1:0.8:0.153846153846154:0.857142857142857:0.5:0.5:0.5:1:0.272727272727273:0.214285714285714:1:0.5:1:1:0.2:0.333333333333333:0.6875:0.708333333333333:0.25:0.266666666666667:0.111111111111111:1:1:0.333333333333333:0.428571428571429:0.666666666666667:0.25:0.5:0.833333333333333:0.5:0.735294117647059:0.0476190476190476:0.1:0.133333333333333:0.230769230769231:0.25:1:0.5:0.294117647058824:0.217391304347826:0.46875:0.5:1:0.2:0.166666666666667:0.666666666666667:1:0.8:0.407407407407407:1:0.0212765957446809:0.285714285714286:0.0909090909090909:0.333333333333333:0.2:0.333333333333333:0.5:0.5:1:0.111111111111111:0.5:0.903846153846154:0.5:0.2:1:1:0.0909090909090909:0.4:0.428571428571429:0.0625:0.25:0.833333333333333:1:0.956521739130435:0.111111111111111:0.6:0.212765957446809:0.5:0.207547169811321:1:0.75:0.294117647058824:0.666666666666667:1:0.333333333333333:0.714285714285714:0.142857142857143:1:0.3:0.416666666666667:0.272727272727273:0.25:0.333333333333333:0.345454545454545:0.0952380952380952:0.166666666666667:0.111111111111111:0.454545454545455:0.0666666666666667:1:0.636363636363636:0.636363636363636:0.25:0.272727272727273:0.824324324324324:1:0.75:0.545454545454545:1:1:0.0769230769230769:0.363636363636364:0.290322580645161:0.333333333333333:0.179487179487179:1:0.0666666666666667:0.333333333333333:1:0.478260869565217:0.166666666666667:1:1:0.0276497695852535:0.0716845878136201:0.0263736263736264:0.933333333333333:1:0.5:1:1:0.8125:0.361788617886179:0.113761467889908:0.113761467889908:0.157894736842105:0.333333333333333:0.0555555555555556:0.0357142857142857:0.375:0.111111111111111:0.584415584415584:0.0350877192982456:0.751111111111111:0.761245674740484:0.164989939637827:0.196652719665272:0.135549872122762:0.172113289760349:0.0240963855421687:0.0620767494356659:0.142268041237113:0.147441457068517:0.147959183673469:0.038961038961039:0.686274509803922:0.0929054054054054:0.364787111622555:0.331306990881459:0.691449814126394:0.691449814126394:0.0769230769230769:0.347826086956522:0.117647058823529:0.148148148148148:0.05:0.290030211480363:0.680272108843537:0.188679245283019:0.0701754385964912:0.801526717557252:0.236842105263158:0.1953125:0.0539906103286385:0.015625:0.0390492359932088:0.00790513833992095:0.0597826086956522:0.136783733826248:0.362359550561798:0.0713719270420301:0.328621908127208:0.0657672849915683:0.320099255583127:0.075:0.433021806853583:0.524818401937046:0.524818401937046:0.259259259259259:0.483695652173913:0.0269360269360269:0.100486223662885:0.785507246376812:0.137870855148342:0.472340425531915:0.194331983805668:0.0830769230769231:0.418055555555556:0.546296296296296:0.247596153846154:0.52:0.39832285115304:0.601866251944012:0.234016887816647:0.214007782101167:0.153153153153153:0.137180700094607:0.0666666666666667:0.037037037037037:0.1:0.2:0.458333333333333:0.0588235294117647:0.111111111111111:0.333333333333333:0.181818181818182:0.473684210526316:0.5:0.2:0.136363636363636:0.0769230769230769:0.142857142857143:0.285714285714286:0.25:0.445714285714286:0.149377593360996:0.0227790432801822:0.182278481012658:0.540123456790123:0.021505376344086:0.541666666666667:0.00429184549356223:0.473684210526316:0.103508771929825:0.0930232558139535:0.391304347826087:0.072:0.0113636363636364:0.148837209302326:0.448051948051948:0.761038961038961:0.530373831775701:0.222857142857143:0.433862433862434:0.0810810810810811:0.0723327305605787:0.410714285714286:0.247910863509749:0.384615384615385:0.125:0.24:0.783582089552239:0.0646651270207852:0.445569620253165:0.754777070063694:0.165137614678899:0.10732538330494:0.0375:0.538461538461538:0.0981387478849408:0.029126213592233:0.0833333333333333:0.443514644351464:0.0917431192660551:0.03125:0.674418604651163:0.3125:0.375:0.314285714285714 R:204 cAa/cGa:203|Caa/Aaa:140|cAa/cTa:46|caA/caT:14|caA/caC:13|ggACaa/ggCAaa:2|cAa/cCa:2|Caa/Taa:1|CAa/AGa:1 1:115256529_252|1:115256530_143|1:115256528_27 skcm:787:186|thpa:486:43|mm:275:27|thpd:58:18|coadread:683:16|luad:2057:15|coad:712:13|mup:42:7|aml:198:6|blca:852:5|thap:33:5|read:149:5|rms:50:5|uec:339:5|nsgct:152:5|cll:283:4|ihch:104:4|lgsoc:17:3|sem:59:3|thhc:21:3|erms:8:3|lggnos:544:3|utuc:76:2|cup:135:2|thfo:5:2|sarcl:13:2|mfh:53:2|gbm:688:2|soc:468:2|stad:748:2|thym:125:2|es:229:1|npc:66:1|unk:146:1|panet:86:1|hnsc:643:1|armm:21:1|tmt:3:1|acrm:23:1|thyc:9:1|odg:36:1|paasc:8:1|hnmucm:11:1|blad:7:1|esca:556:1|mixed:3:1|chol:152:1|hcc:620:1|sarc:280:1|chrcc:88:1|aca:93:1 skin:974:187|thyroid:618:71|blood:890:37|bowel:1782:35|lung:2761:17|unk:357:11|softtissue:739:11|testis:217:9|bladder:958:8|cnsbrain:2270:6|ovaryfallopiantube:699:5|biliarytract:358:5|uterus:618:5|headandneck:988:3|thymus:162:3|esophagusstomach:1407:3|pancreas:1059:2|bone:297:1|liver:636:1|kidney:1304:1|adrenalgland:291:1 TTG|ACA|CTT|TCG|CCC|CCA 0.0120300464273379 0.0267810594223141 24592 "pancan,skin,thyroid,bowel,blood,lung,softtissue,testis,bladder,cnsbrain,biliarytract,ovaryfallopiantube,uterus,thymus,headandneck,esophagusstomach" 0 NRAS 61 0 FALSE NA 1 1.16795714944678 1.26187131041539 1.29838371117394 TRUE 165 257 RETAIN TRUE TRUE Q 0 skin skin:88|thyroid:54|blood:15|bowel:8|testis:5|biliarytract:4|bladder:4|lung:4|ovaryfallopiantube:4|softtissue:3|unk:3|uterus:3|cnsbrain:2|esophagusstomach:2|headandneck:2|bone:1|pancreas:1|thymus:1
NRAS 61 -1237.69143477067 422 Q:422 620 0.333333333333333 295|0.692307692307692:0.733333333333333:0.2:0.933333333333333:1:0.25:0.666666666666667:1:0.25:0.571428571428571:1:1:0.5:0.363636363636364:0.428571428571429:0.0833333333333333:1:1:1:1:0.5:1:0.125:0.363636363636364:0.173913043478261:0.25:1:0.8:0.153846153846154:0.857142857142857:0.5:0.5:0.5:1:0.272727272727273:0.214285714285714:1:0.5:1:1:0.2:0.333333333333333:0.6875:0.708333333333333:0.25:0.266666666666667:0.111111111111111:1:1:0.333333333333333:0.428571428571429:0.666666666666667:0.25:0.5:0.833333333333333:0.5:0.735294117647059:0.0476190476190476:0.1:0.133333333333333:0.230769230769231:0.25:1:0.5:0.294117647058824:0.217391304347826:0.46875:0.5:1:0.2:0.166666666666667:0.666666666666667:1:0.8:0.407407407407407:1:0.0212765957446809:0.285714285714286:0.0909090909090909:0.333333333333333:0.2:0.333333333333333:0.5:0.5:1:0.111111111111111:0.5:0.903846153846154:0.5:0.2:1:1:0.0909090909090909:0.4:0.428571428571429:0.0625:0.25:0.833333333333333:1:0.956521739130435:0.111111111111111:0.6:0.212765957446809:0.5:0.207547169811321:1:0.75:0.294117647058824:0.666666666666667:1:0.333333333333333:0.714285714285714:0.142857142857143:1:0.3:0.416666666666667:0.272727272727273:0.25:0.333333333333333:0.345454545454545:0.0952380952380952:0.166666666666667:0.111111111111111:0.454545454545455:0.0666666666666667:1:0.636363636363636:0.636363636363636:0.25:0.272727272727273:0.824324324324324:1:0.75:0.545454545454545:1:1:0.0769230769230769:0.363636363636364:0.290322580645161:0.333333333333333:0.179487179487179:1:0.0666666666666667:0.333333333333333:1:0.478260869565217:0.166666666666667:1:1:0.0276497695852535:0.0716845878136201:0.0263736263736264:0.933333333333333:1:0.5:1:1:0.8125:0.361788617886179:0.113761467889908:0.113761467889908:0.157894736842105:0.333333333333333:0.0555555555555556:0.0357142857142857:0.375:0.111111111111111:0.584415584415584:0.0350877192982456:0.751111111111111:0.761245674740484:0.164989939637827:0.196652719665272:0.135549872122762:0.172113289760349:0.0240963855421687:0.0620767494356659:0.142268041237113:0.147441457068517:0.147959183673469:0.038961038961039:0.686274509803922:0.0929054054054054:0.364787111622555:0.331306990881459:0.691449814126394:0.691449814126394:0.0769230769230769:0.347826086956522:0.117647058823529:0.148148148148148:0.05:0.290030211480363:0.680272108843537:0.188679245283019:0.0701754385964912:0.801526717557252:0.236842105263158:0.1953125:0.0539906103286385:0.015625:0.0390492359932088:0.00790513833992095:0.0597826086956522:0.136783733826248:0.362359550561798:0.0713719270420301:0.328621908127208:0.0657672849915683:0.320099255583127:0.075:0.433021806853583:0.524818401937046:0.524818401937046:0.259259259259259:0.483695652173913:0.0269360269360269:0.100486223662885:0.785507246376812:0.137870855148342:0.472340425531915:0.194331983805668:0.0830769230769231:0.418055555555556:0.546296296296296:0.247596153846154:0.52:0.39832285115304:0.601866251944012:0.234016887816647:0.214007782101167:0.153153153153153:0.137180700094607:0.0666666666666667:0.037037037037037:0.1:0.2:0.458333333333333:0.0588235294117647:0.111111111111111:0.333333333333333:0.181818181818182:0.473684210526316:0.5:0.2:0.136363636363636:0.0769230769230769:0.142857142857143:0.285714285714286:0.25:0.445714285714286:0.149377593360996:0.0227790432801822:0.182278481012658:0.540123456790123:0.021505376344086:0.541666666666667:0.00429184549356223:0.473684210526316:0.103508771929825:0.0930232558139535:0.391304347826087:0.072:0.0113636363636364:0.148837209302326:0.448051948051948:0.761038961038961:0.530373831775701:0.222857142857143:0.433862433862434:0.0810810810810811:0.0723327305605787:0.410714285714286:0.247910863509749:0.384615384615385:0.125:0.24:0.783582089552239:0.0646651270207852:0.445569620253165:0.754777070063694:0.165137614678899:0.10732538330494:0.0375:0.538461538461538:0.0981387478849408:0.029126213592233:0.0833333333333333:0.443514644351464:0.0917431192660551:0.03125:0.674418604651163:0.3125:0.375:0.314285714285714 K:142 cAa/cGa:203|Caa/Aaa:140|cAa/cTa:46|caA/caT:14|caA/caC:13|ggACaa/ggCAaa:2|cAa/cCa:2|Caa/Taa:1|CAa/AGa:1 1:115256529_252|1:115256530_143|1:115256528_27 skcm:787:186|thpa:486:43|mm:275:27|thpd:58:18|coadread:683:16|luad:2057:15|coad:712:13|mup:42:7|aml:198:6|blca:852:5|thap:33:5|read:149:5|rms:50:5|uec:339:5|nsgct:152:5|cll:283:4|ihch:104:4|lgsoc:17:3|sem:59:3|thhc:21:3|erms:8:3|lggnos:544:3|utuc:76:2|cup:135:2|thfo:5:2|sarcl:13:2|mfh:53:2|gbm:688:2|soc:468:2|stad:748:2|thym:125:2|es:229:1|npc:66:1|unk:146:1|panet:86:1|hnsc:643:1|armm:21:1|tmt:3:1|acrm:23:1|thyc:9:1|odg:36:1|paasc:8:1|hnmucm:11:1|blad:7:1|esca:556:1|mixed:3:1|chol:152:1|hcc:620:1|sarc:280:1|chrcc:88:1|aca:93:1 skin:974:187|thyroid:618:71|blood:890:37|bowel:1782:35|lung:2761:17|unk:357:11|softtissue:739:11|testis:217:9|bladder:958:8|cnsbrain:2270:6|ovaryfallopiantube:699:5|biliarytract:358:5|uterus:618:5|headandneck:988:3|thymus:162:3|esophagusstomach:1407:3|pancreas:1059:2|bone:297:1|liver:636:1|kidney:1304:1|adrenalgland:291:1 TTG|ACA|CTT|TCG|CCC|CCA 0.0120300464273379 0.0267810594223141 24592 "pancan,skin,thyroid,bowel,blood,lung,softtissue,testis,bladder,cnsbrain,biliarytract,ovaryfallopiantube,uterus,thymus,headandneck,esophagusstomach" 0 NRAS 61 0 FALSE NA 1 1.16795714944678 1.26187131041539 1.29838371117394 TRUE 165 257 RETAIN TRUE TRUE Q 0 skin skin:62|bowel:18|thyroid:17|blood:12|softtissue:6|lung:5|unk:5|bladder:3|cnsbrain:2|thymus:2|adrenalgland:1|biliarytract:1|esophagusstomach:1|headandneck:1|kidney:1|liver:1|ovaryfallopiantube:1|pancreas:1|testis:1|uterus:1
NRAS 61 -1237.69143477067 422 Q:422 620 0.333333333333333 295|0.692307692307692:0.733333333333333:0.2:0.933333333333333:1:0.25:0.666666666666667:1:0.25:0.571428571428571:1:1:0.5:0.363636363636364:0.428571428571429:0.0833333333333333:1:1:1:1:0.5:1:0.125:0.363636363636364:0.173913043478261:0.25:1:0.8:0.153846153846154:0.857142857142857:0.5:0.5:0.5:1:0.272727272727273:0.214285714285714:1:0.5:1:1:0.2:0.333333333333333:0.6875:0.708333333333333:0.25:0.266666666666667:0.111111111111111:1:1:0.333333333333333:0.428571428571429:0.666666666666667:0.25:0.5:0.833333333333333:0.5:0.735294117647059:0.0476190476190476:0.1:0.133333333333333:0.230769230769231:0.25:1:0.5:0.294117647058824:0.217391304347826:0.46875:0.5:1:0.2:0.166666666666667:0.666666666666667:1:0.8:0.407407407407407:1:0.0212765957446809:0.285714285714286:0.0909090909090909:0.333333333333333:0.2:0.333333333333333:0.5:0.5:1:0.111111111111111:0.5:0.903846153846154:0.5:0.2:1:1:0.0909090909090909:0.4:0.428571428571429:0.0625:0.25:0.833333333333333:1:0.956521739130435:0.111111111111111:0.6:0.212765957446809:0.5:0.207547169811321:1:0.75:0.294117647058824:0.666666666666667:1:0.333333333333333:0.714285714285714:0.142857142857143:1:0.3:0.416666666666667:0.272727272727273:0.25:0.333333333333333:0.345454545454545:0.0952380952380952:0.166666666666667:0.111111111111111:0.454545454545455:0.0666666666666667:1:0.636363636363636:0.636363636363636:0.25:0.272727272727273:0.824324324324324:1:0.75:0.545454545454545:1:1:0.0769230769230769:0.363636363636364:0.290322580645161:0.333333333333333:0.179487179487179:1:0.0666666666666667:0.333333333333333:1:0.478260869565217:0.166666666666667:1:1:0.0276497695852535:0.0716845878136201:0.0263736263736264:0.933333333333333:1:0.5:1:1:0.8125:0.361788617886179:0.113761467889908:0.113761467889908:0.157894736842105:0.333333333333333:0.0555555555555556:0.0357142857142857:0.375:0.111111111111111:0.584415584415584:0.0350877192982456:0.751111111111111:0.761245674740484:0.164989939637827:0.196652719665272:0.135549872122762:0.172113289760349:0.0240963855421687:0.0620767494356659:0.142268041237113:0.147441457068517:0.147959183673469:0.038961038961039:0.686274509803922:0.0929054054054054:0.364787111622555:0.331306990881459:0.691449814126394:0.691449814126394:0.0769230769230769:0.347826086956522:0.117647058823529:0.148148148148148:0.05:0.290030211480363:0.680272108843537:0.188679245283019:0.0701754385964912:0.801526717557252:0.236842105263158:0.1953125:0.0539906103286385:0.015625:0.0390492359932088:0.00790513833992095:0.0597826086956522:0.136783733826248:0.362359550561798:0.0713719270420301:0.328621908127208:0.0657672849915683:0.320099255583127:0.075:0.433021806853583:0.524818401937046:0.524818401937046:0.259259259259259:0.483695652173913:0.0269360269360269:0.100486223662885:0.785507246376812:0.137870855148342:0.472340425531915:0.194331983805668:0.0830769230769231:0.418055555555556:0.546296296296296:0.247596153846154:0.52:0.39832285115304:0.601866251944012:0.234016887816647:0.214007782101167:0.153153153153153:0.137180700094607:0.0666666666666667:0.037037037037037:0.1:0.2:0.458333333333333:0.0588235294117647:0.111111111111111:0.333333333333333:0.181818181818182:0.473684210526316:0.5:0.2:0.136363636363636:0.0769230769230769:0.142857142857143:0.285714285714286:0.25:0.445714285714286:0.149377593360996:0.0227790432801822:0.182278481012658:0.540123456790123:0.021505376344086:0.541666666666667:0.00429184549356223:0.473684210526316:0.103508771929825:0.0930232558139535:0.391304347826087:0.072:0.0113636363636364:0.148837209302326:0.448051948051948:0.761038961038961:0.530373831775701:0.222857142857143:0.433862433862434:0.0810810810810811:0.0723327305605787:0.410714285714286:0.247910863509749:0.384615384615385:0.125:0.24:0.783582089552239:0.0646651270207852:0.445569620253165:0.754777070063694:0.165137614678899:0.10732538330494:0.0375:0.538461538461538:0.0981387478849408:0.029126213592233:0.0833333333333333:0.443514644351464:0.0917431192660551:0.03125:0.674418604651163:0.3125:0.375:0.314285714285714 L:46 cAa/cGa:203|Caa/Aaa:140|cAa/cTa:46|caA/caT:14|caA/caC:13|ggACaa/ggCAaa:2|cAa/cCa:2|Caa/Taa:1|CAa/AGa:1 1:115256529_252|1:115256530_143|1:115256528_27 skcm:787:186|thpa:486:43|mm:275:27|thpd:58:18|coadread:683:16|luad:2057:15|coad:712:13|mup:42:7|aml:198:6|blca:852:5|thap:33:5|read:149:5|rms:50:5|uec:339:5|nsgct:152:5|cll:283:4|ihch:104:4|lgsoc:17:3|sem:59:3|thhc:21:3|erms:8:3|lggnos:544:3|utuc:76:2|cup:135:2|thfo:5:2|sarcl:13:2|mfh:53:2|gbm:688:2|soc:468:2|stad:748:2|thym:125:2|es:229:1|npc:66:1|unk:146:1|panet:86:1|hnsc:643:1|armm:21:1|tmt:3:1|acrm:23:1|thyc:9:1|odg:36:1|paasc:8:1|hnmucm:11:1|blad:7:1|esca:556:1|mixed:3:1|chol:152:1|hcc:620:1|sarc:280:1|chrcc:88:1|aca:93:1 skin:974:187|thyroid:618:71|blood:890:37|bowel:1782:35|lung:2761:17|unk:357:11|softtissue:739:11|testis:217:9|bladder:958:8|cnsbrain:2270:6|ovaryfallopiantube:699:5|biliarytract:358:5|uterus:618:5|headandneck:988:3|thymus:162:3|esophagusstomach:1407:3|pancreas:1059:2|bone:297:1|liver:636:1|kidney:1304:1|adrenalgland:291:1 TTG|ACA|CTT|TCG|CCC|CCA 0.0120300464273379 0.0267810594223141 24592 "pancan,skin,thyroid,bowel,blood,lung,softtissue,testis,bladder,cnsbrain,biliarytract,ovaryfallopiantube,uterus,thymus,headandneck,esophagusstomach" 0 NRAS 61 0 FALSE NA 1 1.16795714944678 1.26187131041539 1.29838371117394 TRUE 165 257 RETAIN TRUE TRUE Q 0 skin skin:24|bowel:7|lung:6|blood:2|cnsbrain:2|unk:2|bladder:1|softtissue:1|uterus:1
NRAS 61 -1237.69143477067 422 Q:422 620 0.333333333333333 295|0.692307692307692:0.733333333333333:0.2:0.933333333333333:1:0.25:0.666666666666667:1:0.25:0.571428571428571:1:1:0.5:0.363636363636364:0.428571428571429:0.0833333333333333:1:1:1:1:0.5:1:0.125:0.363636363636364:0.173913043478261:0.25:1:0.8:0.153846153846154:0.857142857142857:0.5:0.5:0.5:1:0.272727272727273:0.214285714285714:1:0.5:1:1:0.2:0.333333333333333:0.6875:0.708333333333333:0.25:0.266666666666667:0.111111111111111:1:1:0.333333333333333:0.428571428571429:0.666666666666667:0.25:0.5:0.833333333333333:0.5:0.735294117647059:0.0476190476190476:0.1:0.133333333333333:0.230769230769231:0.25:1:0.5:0.294117647058824:0.217391304347826:0.46875:0.5:1:0.2:0.166666666666667:0.666666666666667:1:0.8:0.407407407407407:1:0.0212765957446809:0.285714285714286:0.0909090909090909:0.333333333333333:0.2:0.333333333333333:0.5:0.5:1:0.111111111111111:0.5:0.903846153846154:0.5:0.2:1:1:0.0909090909090909:0.4:0.428571428571429:0.0625:0.25:0.833333333333333:1:0.956521739130435:0.111111111111111:0.6:0.212765957446809:0.5:0.207547169811321:1:0.75:0.294117647058824:0.666666666666667:1:0.333333333333333:0.714285714285714:0.142857142857143:1:0.3:0.416666666666667:0.272727272727273:0.25:0.333333333333333:0.345454545454545:0.0952380952380952:0.166666666666667:0.111111111111111:0.454545454545455:0.0666666666666667:1:0.636363636363636:0.636363636363636:0.25:0.272727272727273:0.824324324324324:1:0.75:0.545454545454545:1:1:0.0769230769230769:0.363636363636364:0.290322580645161:0.333333333333333:0.179487179487179:1:0.0666666666666667:0.333333333333333:1:0.478260869565217:0.166666666666667:1:1:0.0276497695852535:0.0716845878136201:0.0263736263736264:0.933333333333333:1:0.5:1:1:0.8125:0.361788617886179:0.113761467889908:0.113761467889908:0.157894736842105:0.333333333333333:0.0555555555555556:0.0357142857142857:0.375:0.111111111111111:0.584415584415584:0.0350877192982456:0.751111111111111:0.761245674740484:0.164989939637827:0.196652719665272:0.135549872122762:0.172113289760349:0.0240963855421687:0.0620767494356659:0.142268041237113:0.147441457068517:0.147959183673469:0.038961038961039:0.686274509803922:0.0929054054054054:0.364787111622555:0.331306990881459:0.691449814126394:0.691449814126394:0.0769230769230769:0.347826086956522:0.117647058823529:0.148148148148148:0.05:0.290030211480363:0.680272108843537:0.188679245283019:0.0701754385964912:0.801526717557252:0.236842105263158:0.1953125:0.0539906103286385:0.015625:0.0390492359932088:0.00790513833992095:0.0597826086956522:0.136783733826248:0.362359550561798:0.0713719270420301:0.328621908127208:0.0657672849915683:0.320099255583127:0.075:0.433021806853583:0.524818401937046:0.524818401937046:0.259259259259259:0.483695652173913:0.0269360269360269:0.100486223662885:0.785507246376812:0.137870855148342:0.472340425531915:0.194331983805668:0.0830769230769231:0.418055555555556:0.546296296296296:0.247596153846154:0.52:0.39832285115304:0.601866251944012:0.234016887816647:0.214007782101167:0.153153153153153:0.137180700094607:0.0666666666666667:0.037037037037037:0.1:0.2:0.458333333333333:0.0588235294117647:0.111111111111111:0.333333333333333:0.181818181818182:0.473684210526316:0.5:0.2:0.136363636363636:0.0769230769230769:0.142857142857143:0.285714285714286:0.25:0.445714285714286:0.149377593360996:0.0227790432801822:0.182278481012658:0.540123456790123:0.021505376344086:0.541666666666667:0.00429184549356223:0.473684210526316:0.103508771929825:0.0930232558139535:0.391304347826087:0.072:0.0113636363636364:0.148837209302326:0.448051948051948:0.761038961038961:0.530373831775701:0.222857142857143:0.433862433862434:0.0810810810810811:0.0723327305605787:0.410714285714286:0.247910863509749:0.384615384615385:0.125:0.24:0.783582089552239:0.0646651270207852:0.445569620253165:0.754777070063694:0.165137614678899:0.10732538330494:0.0375:0.538461538461538:0.0981387478849408:0.029126213592233:0.0833333333333333:0.443514644351464:0.0917431192660551:0.03125:0.674418604651163:0.3125:0.375:0.314285714285714 H:27 cAa/cGa:203|Caa/Aaa:140|cAa/cTa:46|caA/caT:14|caA/caC:13|ggACaa/ggCAaa:2|cAa/cCa:2|Caa/Taa:1|CAa/AGa:1 1:115256529_252|1:115256530_143|1:115256528_27 skcm:787:186|thpa:486:43|mm:275:27|thpd:58:18|coadread:683:16|luad:2057:15|coad:712:13|mup:42:7|aml:198:6|blca:852:5|thap:33:5|read:149:5|rms:50:5|uec:339:5|nsgct:152:5|cll:283:4|ihch:104:4|lgsoc:17:3|sem:59:3|thhc:21:3|erms:8:3|lggnos:544:3|utuc:76:2|cup:135:2|thfo:5:2|sarcl:13:2|mfh:53:2|gbm:688:2|soc:468:2|stad:748:2|thym:125:2|es:229:1|npc:66:1|unk:146:1|panet:86:1|hnsc:643:1|armm:21:1|tmt:3:1|acrm:23:1|thyc:9:1|odg:36:1|paasc:8:1|hnmucm:11:1|blad:7:1|esca:556:1|mixed:3:1|chol:152:1|hcc:620:1|sarc:280:1|chrcc:88:1|aca:93:1 skin:974:187|thyroid:618:71|blood:890:37|bowel:1782:35|lung:2761:17|unk:357:11|softtissue:739:11|testis:217:9|bladder:958:8|cnsbrain:2270:6|ovaryfallopiantube:699:5|biliarytract:358:5|uterus:618:5|headandneck:988:3|thymus:162:3|esophagusstomach:1407:3|pancreas:1059:2|bone:297:1|liver:636:1|kidney:1304:1|adrenalgland:291:1 TTG|ACA|CTT|TCG|CCC|CCA 0.0120300464273379 0.0267810594223141 24592 "pancan,skin,thyroid,bowel,blood,lung,softtissue,testis,bladder,cnsbrain,biliarytract,ovaryfallopiantube,uterus,thymus,headandneck,esophagusstomach" 0 NRAS 61 0 FALSE NA 1 1.16795714944678 1.26187131041539 1.29838371117394 TRUE 165 257 RETAIN TRUE TRUE Q 0 skin skin:12|blood:7|bowel:2|lung:2|testis:2|softtissue:1|unk:1

Indel

Hugo_Symbol     Amino_Acid_Position     log10_pvalue    Mutation_Count  Reference_Amino_Acid    Total_Mutations_in_Gene Median_Allele_Freq_Rank Allele_Freq_Rank        SNP_ID  Variant_Amino_Acid    Codon_Change    Genomic_Position        Detailed_Cancer_Types   Organ_Types     Tri-nucleotides Mutability      mu_protein      ccf     Total_Samples   indel_size      qvalue  tm   Is_repeat        seq     length  align100        pad12entropy    pad24entropy    pad36entropy    TP      reason  n_MSK   n_Retro judgement       inNBT   inOncokb        Samples
SMARCA4 546 -7.75235638169585 5 QK:5 101 NA NA :NA K546del:5 cAGAag/cag:5 19:11106926_5 lgg:536:4|dlbcl:246:1 cnsbrain:2283:4|lymph:366:1 NA 0.0573226243518208 0.0473351872460284 NA 24592 1 0.000230672905611517 SMARCA4 546 FALSE NA NA 1 0.91489630957268 1.2950060272429 1.33965330506364 FALSE LOCAL_ENTROPY 1 4 RETAIN FALSE FALSE cnsbrain:4|lymph:1
CDKN2A 27-42 -6.82111516846557 12 VRALLEA:4|LEAGALP:3|ALPN:1|EV:1|GA:1|PNAPN:1|RALLEA:1 219 NA NA :NA V28_E33del:4 gTGCGGGCGCTGCTGGAGGcg/gcg:4|cTGGAGGCGGGGGCGCTGCcc/ccc:3|GGGGCG/-:1|gCGCTGCCCAac/gac:1|gAGGtg/gtg:1|CGGGCGCTGCTGGAGGCG/-:1|ccCAACGCACCGAAt/cct:1 9:21974727_4|9:21974715_3|9:21974745_1|9:21974725_1|9:21974719_1|9:21974712_1|9:21974702_1 luad:2071:3|esca:556:2|blca:852:1|skcm:192:1|icemu:1:1|paad:932:1|mel:595:1|stad:748:1|hnsc:650:1 esophagusstomach:1413:3|lung:2767:3|skin:974:2|bladder:955:1|cervix:234:1|pancreas:1059:1|headandneck:988:1 NA 0.0573226243518208 0.0473351872460284 NA 24592 15 8.77193090544841e-05 CDKN2A 27-42 FALSE NA NA 1 0.857780912379927 1.13008762297022 1.1577633500238 FALSE LOCAL_ENTROPY 6 6 RETAIN FALSE FALSE cervix:1|esophagusstomach:1|lung:1|pancreas:1
CDKN2A 27-42 -6.82111516846557 12 VRALLEA:4|LEAGALP:3|ALPN:1|EV:1|GA:1|PNAPN:1|RALLEA:1 219 NA NA :NA L32_L37del:3 gTGCGGGCGCTGCTGGAGGcg/gcg:4|cTGGAGGCGGGGGCGCTGCcc/ccc:3|GGGGCG/-:1|gCGCTGCCCAac/gac:1|gAGGtg/gtg:1|CGGGCGCTGCTGGAGGCG/-:1|ccCAACGCACCGAAt/cct:1 9:21974727_4|9:21974715_3|9:21974745_1|9:21974725_1|9:21974719_1|9:21974712_1|9:21974702_1 luad:2071:3|esca:556:2|blca:852:1|skcm:192:1|icemu:1:1|paad:932:1|mel:595:1|stad:748:1|hnsc:650:1 esophagusstomach:1413:3|lung:2767:3|skin:974:2|bladder:955:1|cervix:234:1|pancreas:1059:1|headandneck:988:1 NA 0.0573226243518208 0.0473351872460284 NA 24592 15 8.77193090544841e-05 CDKN2A 27-42 FALSE NA NA 1 0.857780912379927 1.13008762297022 1.1577633500238 FALSE LOCAL_ENTROPY 6 6 RETAIN FALSE FALSE skin:2|esophagusstomach:1
CDKN2A 27-42 -6.82111516846557 12 VRALLEA:4|LEAGALP:3|ALPN:1|EV:1|GA:1|PNAPN:1|RALLEA:1 219 NA NA :NA A36_N39delinsD:1 gTGCGGGCGCTGCTGGAGGcg/gcg:4|cTGGAGGCGGGGGCGCTGCcc/ccc:3|GGGGCG/-:1|gCGCTGCCCAac/gac:1|gAGGtg/gtg:1|CGGGCGCTGCTGGAGGCG/-:1|ccCAACGCACCGAAt/cct:1 9:21974727_4|9:21974715_3|9:21974745_1|9:21974725_1|9:21974719_1|9:21974712_1|9:21974702_1 luad:2071:3|esca:556:2|blca:852:1|skcm:192:1|icemu:1:1|paad:932:1|mel:595:1|stad:748:1|hnsc:650:1 esophagusstomach:1413:3|lung:2767:3|skin:974:2|bladder:955:1|cervix:234:1|pancreas:1059:1|headandneck:988:1 NA 0.0573226243518208 0.0473351872460284 NA 24592 15 8.77193090544841e-05 CDKN2A 27-42 FALSE NA NA 0.857780912379927 1.13008762297022 1.1577633500238 FALSE LOCAL_ENTROPY 6 6 RETAIN FALSE FALSE lung:1

Parsing

From the file, we're mainly interested in the following columns:

  • Hugo_Symbol
  • Amino_Acid_Position
  • Mutation_Count
  • Reference_Amino_Acid
  • Variant_Amino_Acid
  • qvalue

We map the gene symbol onto the canonical transcripts (RefSeq & Ensembl) for that gene. For SNVs, we obtain position, ref and alt amino acid from source file and generate substitution notation. For indels, we get protein change notation from Reference_Amino_Acid column. -Then we match each entry using these notations.

caution

We currently skip all variants labeled as splice from the source

JSON Output

The data source will be captured under the cancerHotspots key in the transcript section.

{
"transcript":"NM_002524.5",
"source":"RefSeq",
"bioType":"mRNA",
"aminoAcids":"Q/K",
"proteinPos":"61",
"geneId":"4893",
"hgnc":"NRAS",
"hgvsc":"NM_002524.5:c.181C>A",
"hgvsp":"NP_002515.1:p.(Gln61Lys)",
"isCanonical":true,
"proteinId":"NP_002515.1",
"cancerHotspots":{
"residue":"Q61",
"numSamples":422,
"numAltAminoAcidSamples":142,
"qValue":0
}
}
FieldTypeNotes
residuestring
numSamplesinthow many samples are associated with a variant at the same amino acid position
numAltAminoAcidSamplesinthow many samples are associated with a variant with the same position and alternate amino acid position
qValuedouble
- - - - \ No newline at end of file diff --git a/3.21/data-sources/clingen-dosage-json/index.html b/3.21/data-sources/clingen-dosage-json/index.html deleted file mode 100644 index 1f3d11f5..00000000 --- a/3.21/data-sources/clingen-dosage-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -clingen-dosage-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

clingen-dosage-json

"clingenDosageSensitivityMap": [{
"chromosome": "15",
"begin": 30900686,
"end": 32153204,
"haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",
"reciprocalOverlap": 0.00147,
"annotationOverlap": 0.33994
},
{
"chromosome": "15",
"begin": 31727418,
"end": 32153204,
"haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "dosage sensitivity unlikely",
"reciprocalOverlap": 0.00147,
"annotationOverlap": 1
}]
FieldTypeNotes
clingenDosageSensitivityMapobject array
chromosomestringEnsembl-style chromosome names
begininteger1-based position
endinteger1-based position
haploinsufficiencystringsee possible values below
triplosensitivitystring(same as haploinsufficiency) 
reciprocalOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).
annotationOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).

haploinsufficiency and triplosensitivity

  • no evidence to suggest that dosage sensitivity is associated with clinical phenotype
  • little evidence suggesting dosage sensitivity is associated with clinical phenotype
  • emerging evidence suggesting dosage sensitivity is associated with clinical phenotype
  • sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype
  • gene associated with autosomal recessive phenotype
  • dosage sensitivity unlikely
- - - - \ No newline at end of file diff --git a/3.21/data-sources/clingen-gene-validity-json/index.html b/3.21/data-sources/clingen-gene-validity-json/index.html deleted file mode 100644 index ba410309..00000000 --- a/3.21/data-sources/clingen-gene-validity-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -clingen-gene-validity-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

clingen-gene-validity-json

"clingenGeneValidity":[
{
"diseaseId":"MONDO_0007893",
"disease":"Noonan syndrome with multiple lentigines",
"classification":"no reported evidence",
"classificationDate":"2018-06-07"
},
{
"diseaseId":"MONDO_0015280",
"disease":"cardiofaciocutaneous syndrome",
"classification":"no reported evidence",
"classificationDate":"2018-06-07"
}
]
FieldTypeNotes
clingenGeneValidityobject
diseaseIdstringMonarch Disease Ontology ID (MONDO)
diseasestringdisease label
classificationstringsee below for possible values
classificationDatestringyyyy-MM-dd

classification

  • no reported evidence
  • disputed
  • limited
  • moderate
  • definitive
  • strong
  • refuted
  • no known disease relationship
- - - - \ No newline at end of file diff --git a/3.21/data-sources/clingen-json/index.html b/3.21/data-sources/clingen-json/index.html deleted file mode 100644 index b9c7ac24..00000000 --- a/3.21/data-sources/clingen-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -clingen-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

clingen-json

"clingen":[
{
"chromosome":"17",
"begin":525,
"end":14667519,
"variantType":"copy_number_gain",
"id":"nsv996083",
"clinicalInterpretation":"pathogenic",
"observedGains":1,
"validated":true,
"phenotypes":[
"Intrauterine growth retardation"
],
"phenotypeIds":[
"HP:0001511",
"MedGen:C1853481"
],
"reciprocalOverlap":0.00131
},
{
"chromosome":"17",
"begin":45835,
"end":7600330,
"variantType":"copy_number_loss",
"id":"nsv869419",
"clinicalInterpretation":"pathogenic",
"observedLosses":1,
"validated":true,
"phenotypes":[
"Developmental delay AND/OR other significant developmental or morphological phenotypes"
],
"reciprocalOverlap":0.00254
}
]
FieldTypeNotes
clingenobject array
chromosomestringEnsembl-style chromosome names
begininteger1-based position
endinteger1-based position
variantTypestringAny of the sequence alterations defined here.
idstringIdentifier from the data source. Alternatively a VID
clinicalInterpretationstringsee possible values below
observedGainsintegerRange: 0 - (231 - 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.
observedLossesintegerRange: 0 - (231 - 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.
validatedboolean
phenotypesstring arrayDescription of the phenotype.
phenotypeIdsstring arrayDescription of the phenotype IDs.
reciprocalOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).

clinicalInterpretation

  • benign
  • curated benign
  • curated pathogenic
  • likely benign
  • likely pathogenic
  • path gain
  • path loss
  • pathogenic
  • uncertain
- - - - \ No newline at end of file diff --git a/3.21/data-sources/clingen/index.html b/3.21/data-sources/clingen/index.html deleted file mode 100644 index 9b62737c..00000000 --- a/3.21/data-sources/clingen/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -ClinGen | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

ClinGen

Overview

ClinGen is a National Institutes of Health (NIH)-funded resource dedicated to building a central resource that defines the clinical relevance of genes and variants for use in precision medicine and research.

Publication

Heidi L. Rehm, Ph.D., Jonathan S. Berg, M.D., Ph.D., Lisa D. Brooks, Ph.D., Carlos D. Bustamante, Ph.D., James P. Evans, M.D., Ph.D., Melissa J. Landrum, Ph.D., David H. Ledbetter, Ph.D., Donna R. Maglott, Ph.D., Christa Lese Martin, Ph.D., Robert L. Nussbaum, M.D., Sharon E. Plon, M.D., Ph.D., Erin M. Ramos, Ph.D., Stephen T. Sherry, Ph.D., and Michael S. Watson, Ph.D., for ClinGen. ClinGen The Clinical Genome Resource. N Engl J Med 2015; 372:2235-2242 June 4, 2015 DOI: 10.1056/NEJMsr1406261.

ISCA Regions

TSV Extraction

ClinGen contains only copy number variation variants, since the coordinates in ClinGen original file follow the same rule as BED format, the coordinates had to be adjusted to [BEGIN+1, END].

#bin    chrom   chromStart      chromEnd        name    score   strand  thickStart      thickEnd        attrCount       attrTags        attrVals
nsv530705 1 564405 8597804 0 1 copy_number_loss pathogenic False Developmental delay AND/OR other significant developmental or morphological phenotypes
nsv530706 1 564424 3262790 0 1 copy_number_loss pathogenic False Abnormal facial shape,Abnormality of cardiac morphology,Global developmental delay,Muscular hypotonia HP:0001252,HP:0001263,HP:0001627,HP:0001999,MedGen:CN001147,MedGen:CN001157,MedGen:CN001482,MedGen:CN001810
nsv530707 1 564424 7068738 0 1 copy_number_loss pathogenic False Abnormality of cardiac morphology,Cleft upper lip,Failure to thrive,Global developmental delay,Intrauterine growth retardation,Microcephaly,Short stature HP:0000204,HP:0000252,HP:0001263,HP:0001508,HP:0001511,HP:0001627,HP:0004322,MedGen:C0349588,MedGen:C1845868,MedGen:C1853481,MedGen:C2364119,MedGen:CN000197,MedGen:CN001157,MedGen:CN001482
nsv533512 1 564435 649748 0 1 copy_number_loss benign False Developmental delay AND/OR other significant developmental or morphological phenotypes
nsv931338 1 714078 4958499 0 1 copy_number_loss pathogenic False Developmental delay AND/OR other significant developmental or morphological phenotypes
nsv530300 1 728138 5066371 1 0 copy_number_gain pathogenic False Abnormality of cardiac morphology,Cleft palate,Global developmental delay HP:0000175,HP:0001263,HP:0001627,MedGen:C2240378,MedGen:CN001157,MedGen:CN001482

Status levels

  • benign
  • curated benign
  • curated pathogenic
  • likely benign
  • likely pathogenic
  • path gain
  • path loss
  • pathogenic
  • uncertain

Parsing

We parse the ClinGen tsv file and extract the following:

  • chrom
  • chromStart (note this a 0-based coordinate)
  • chromEnd
  • attrTags
  • attrVals

attrTags and attrVals are comma separated lists. attrTags contains the field keys and attrVals contains the field values. We will parse the following keys from the two fields:

  • parent (this will be used as the ID in our JSON output)
  • clinical_int
  • validated
  • phenotype (this should be a string array)
  • phenotype_id (this should be a string array)

Observed losses and observed gains will be calculated from entries that share a common parent ID.

  • variants with a common parent ID and same coordinates are grouped
    • calculated observed losses, observed gains for each group
    • Clinical significance and validation status are collapsed using the priority strategy described below
  • Variants with the same parent ID can have different coordinates (mapped to hg38)
    • nsv491508 : chr14:105583663-106881350 and chr14:105605043-106766076 (only one example)
    • we kept both variants

Conflict Resolution

Clinical significance priority

When there are a mixture of variants belonging to the same parent ID, we will choose the most pathogenic clinical significance from the available values. i.e. if 3 samples were deemed pathogenic and 2 samples were likely pathogenic, we would list the variant as pathogenic.

Priority (high to low)

  • Priority
  • Pathogenic
  • Likely pathogenic
  • Benign
  • Likely benign
  • Uncertain significance

Validation Priority

When there are a mixture of variants belonging to same parent ID, we will set the validation status to true if any of the variants were validated.

Download URL

https://cirm.ucsc.edu/cgi-bin/hgTrackUi?db=hg19&g=iscaComposite

JSON Output

"clingen":[
{
"chromosome":"17",
"begin":525,
"end":14667519,
"variantType":"copy_number_gain",
"id":"nsv996083",
"clinicalInterpretation":"pathogenic",
"observedGains":1,
"validated":true,
"phenotypes":[
"Intrauterine growth retardation"
],
"phenotypeIds":[
"HP:0001511",
"MedGen:C1853481"
],
"reciprocalOverlap":0.00131
},
{
"chromosome":"17",
"begin":45835,
"end":7600330,
"variantType":"copy_number_loss",
"id":"nsv869419",
"clinicalInterpretation":"pathogenic",
"observedLosses":1,
"validated":true,
"phenotypes":[
"Developmental delay AND/OR other significant developmental or morphological phenotypes"
],
"reciprocalOverlap":0.00254
}
]
FieldTypeNotes
clingenobject array
chromosomestringEnsembl-style chromosome names
begininteger1-based position
endinteger1-based position
variantTypestringAny of the sequence alterations defined here.
idstringIdentifier from the data source. Alternatively a VID
clinicalInterpretationstringsee possible values below
observedGainsintegerRange: 0 - (231 - 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.
observedLossesintegerRange: 0 - (231 - 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.
validatedboolean
phenotypesstring arrayDescription of the phenotype.
phenotypeIdsstring arrayDescription of the phenotype IDs.
reciprocalOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).

clinicalInterpretation

  • benign
  • curated benign
  • curated pathogenic
  • likely benign
  • likely pathogenic
  • path gain
  • path loss
  • pathogenic
  • uncertain

Dosage Sensitivity Map

The Clinical Genome Resource (ClinGen) consortium is curating genes and regions of the genome to assess whether there is evidence to support that these genes/regions are dosage sensitive and should be targeted on a cytogenomic array. Nirvana reports these annotations for overlapping SVs.

Publication

Riggs ER, Nelson T, Merz A, Ackley T, Bunke B, Collins CD, Collinson MN, Fan YS, Goodenberger ML, Golden DM, Haglund-Hazy L, Krgovic D, Lamb AN, Lewis Z, Li G, Liu Y, Meck J, Neufeld-Kaiser W, Runke CK, Sanmann JN, Stavropoulos DJ, Strong E, Su M, Tayeh MK, Kokalj Vokac N, Thorland EC, Andersen E, Martin CL. Copy number variant discrepancy resolution using the ClinGen dosage sensitivity map results in updated clinical interpretations in ClinVar. Hum Mutat. 2018 Nov;39(11):1650-1659. doi: 10.1002/humu.23610. PMID: 30095202; PMCID: PMC7374944.

TSV Source files

Regions

#ClinGen Region Curation Results
#07 May,2019
#Genomic Locations are reported on GRCh38 (hg38): GCF_000001405.36
#https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen
#to create link: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/clingen_region.cgi?id=key
#ISCA ID ISCA Region Name cytoBand Genomic Location Haploinsufficiency Score Haploinsufficiency Description Haploinsufficiency PMID1 Haploinsufficiency PMID2 Haploinsufficiency PMID3 Triplosensitivity Score Triplosensitivity Description Triplosensitivity PMID1 Triplosensitivity PMID2 Triplosensitivity PMID3 Date Last Evaluated Loss phenotype OMIM ID Triplosensitive phenotype OMIM ID
ISCA-46299 Xp11.22 region (includes HUWE1) Xp11.22 tbd 0 No evidence available 3 Sufficient evidence for dosage pathogenicity 22840365 20655035 26692240 2018-11-19
ISCA-46295 15q13.3 recurrent region (D-CHRNA7 to BP5) (includes CHRNA7 and OTUD7A) 15q13.3 chr15:31727418-32153204 3 Sufficient evidence for dosage pathogenicity 19898479 20236110 22775350 40 Dosage sensitivity unlikely 26968334 22420048 2018-05-10
ISCA-46291 7q11.23 recurrent distal region (includes HIP1, YWHAG) 7q11.23 chr7:75528718-76433859 2 Some evidence for dosage pathogenicity 21109226 16971481 1 Little evidence for dosage pathogenicity 21109226 27867344 2018-12-31
ISCA-46290 Xp11.22p11.23 recurrent region (includes SHROOM4) Xp11.22-p11.23 chrX: 48447780-52444264 0 No evidence available 3 Sufficient evidence for dosage pathogenicity 19716111 21418194 25425167 2017-12-14 300801

Genes

#ClinGen Gene Curation Results
#24 May,2019
#Genomic Locations are reported on GRCh37 (hg19): GCF_000001405.13
#https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen
#to create link: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/clingen_gene.cgi?sym=Gene Symbol
#Gene Symbol Gene ID cytoBand Genomic Location Haploinsufficiency Score Haploinsufficiency Description Haploinsufficiency PMID1 Haploinsufficiency PMID2 Haploinsufficiency PMID3 Triplosensitivity Score Triplosensitivity Description Triplosensitivity PMID1 Triplosensitivity PMID2 Triplosensitivity PMID3 Date Last Evaluated Loss phenotype OMIM ID Triplosensitive phenotype OMIM ID
A4GALT 53947 22q13.2 chr22:43088121-43117307 30 Gene associated with autosomal recessive phenotype 0 No evidence available 2014-12-11 111400
AAGAB 79719 15q23 chr15:67493013-67547536 3 Sufficient evidence for dosage pathogenicity 23064416 23000146 0 No evidence available 2013-02-28 148600

Dosage Rating System

RatingPossible Clinical Interpretation
0No evidence to suggest that dosage sensitivity is associated with clinical phenotype
1Little evidence suggesting dosage sensitivity is associated with clinical phenotype
2Emerging evidence suggesting dosage sensitivity is associated with clinical phenotype
3Sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype
30Gene associated with autosomal recessive phenotype
40Dosage sensitivity unlikely

Reference: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/help.shtml

Download URL

ftp://ftp.clinicalgenome.org/

JSON Output

"clingenDosageSensitivityMap": [{
"chromosome": "15",
"begin": 30900686,
"end": 32153204,
"haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",
"reciprocalOverlap": 0.00147,
"annotationOverlap": 0.33994
},
{
"chromosome": "15",
"begin": 31727418,
"end": 32153204,
"haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "dosage sensitivity unlikely",
"reciprocalOverlap": 0.00147,
"annotationOverlap": 1
}]
FieldTypeNotes
clingenDosageSensitivityMapobject array
chromosomestringEnsembl-style chromosome names
begininteger1-based position
endinteger1-based position
haploinsufficiencystringsee possible values below
triplosensitivitystring(same as haploinsufficiency) 
reciprocalOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).
annotationOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).

haploinsufficiency and triplosensitivity

  • no evidence to suggest that dosage sensitivity is associated with clinical phenotype
  • little evidence suggesting dosage sensitivity is associated with clinical phenotype
  • emerging evidence suggesting dosage sensitivity is associated with clinical phenotype
  • sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype
  • gene associated with autosomal recessive phenotype
  • dosage sensitivity unlikely

Building the supplementary files

The gene dosage sensitivity .nga for Nirvana can be built using the SAUtils command's DosageSensitivity subcommand. The required data file is ClinGen_gene_curation_list_{ASSEMBLY}.tsv (url provided above) and its associated .version file.

NAME=ClinGen Dosage Sensitivity Map
VERSION=20211201
DATE=2021-12-01
DESCRIPTION=Dosage sensitivity map from ClinGen (dbVar)

Here is a sample run:

dotnet NirvanaBuild/SAUtils.dll DosageSensitivity
---------------------------------------------------------------------------
SAUtils (c) 2023 Illumina, Inc.
Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953
---------------------------------------------------------------------------

USAGE: dotnet SAUtils.dll dosagesensitivity [options]
Creates a gene annotation database from dbVar data

OPTIONS:
--tsv, -t <VALUE> input tsv file
--out, -o <VALUE> output directory
--help, -h displays the help menu
--version, -v displays the version

dotnet NirvanaBuild/SAUtils.dll DosageSensitivity --out SupplementaryDatabase/64/GRCh37 --tsv ClinGen_gene_curation_list_GRCh37.tsv
---------------------------------------------------------------------------
SAUtils (c) 2023 Illumina, Inc.
Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953
---------------------------------------------------------------------------


Time: 00:00:00.1

For building the .nsi files, we use the SAUtils command's DosageMapRegions subcommand. The required data file is ClinGen_region_curation_list_{ASSEMBLY}.tsv (url provided above) and its associated .version file.

NAME=ClinGen Dosage Sensitivity Map
VERSION=20211201
DATE=2021-12-01
DESCRIPTION=Dosage sensitivity map from ClinGen (dbVar)

Here is a sample run:

dotnet NirvanaBuild/SAUtils.dll DosageMapRegions 
---------------------------------------------------------------------------
SAUtils (c) 2023 Illumina, Inc.
Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953
---------------------------------------------------------------------------

USAGE: dotnet SAUtils.dll dosagemapregions [options]
Creates an interval annotation database from dbVar data

OPTIONS:
--tsv, -t <VALUE> input tsv file
--ref, -r <filename> input reference filename
--out, -o <VALUE> output directory
--help, -h displays the help menu
--version, -v displays the version

dotnet NirvanaBuild/SAUtils.dll DosageMapRegions --out SupplementaryDatabase/64/GRCh37 --ref References/7/Homo_sapiens.GRCh37.Nirvana.dat --tsv ClinGen_region_curation_list_GRCh37.tsv
---------------------------------------------------------------------------
SAUtils (c) 2023 Illumina, Inc.
Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953
---------------------------------------------------------------------------

Writing 505 intervals to database...

Time: 00:00:00.1

Gene-Disease Validity

The ClinGen Gene-Disease Clinical Validity curation process involves evaluating the strength of evidence supporting or refuting a claim that variation in a particular gene causes a particular disease. Nirvana reports these annotations for genes in the genes section of the JSON.

Publication

Strande NT, Riggs ER, Buchanan AH, et al. Evaluating the Clinical Validity of Gene-Disease Associations: An Evidence-Based Framework Developed by the Clinical Genome Resource. Am J Hum Genet. 2017;100(6):895-906. doi:10.1016/j.ajhg.2017.04.015

Source TSV

The source data comes in a CSV file that we convert to a TSV.

CLINGEN GENE VALIDITY CURATIONS
FILE CREATED: 2019-05-28
WEBPAGE: https://search.clinicalgenome.org/kb/gene-validity
+++++++++++,++++++++++++++,+++++++++++++,++++++++++++++++++,+++++++++,++++++++++++++,+++++++++++++,+++++++++++++++++++
GENE SYMBOL,GENE ID (HGNC),DISEASE LABEL,DISEASE ID (MONDO),SOP,CLASSIFICATION,ONLINE REPORT,CLASSIFICATION DATE
+++++++++++,++++++++++++++,+++++++++++++,++++++++++++++++++,+++++++++,++++++++++++++,+++++++++++++,+++++++++++++++++++
A2ML1,HGNC:23336,Noonan syndrome with multiple lentigines,MONDO_0007893,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/59b87033-dd91-4f1e-aec1-c9b1f5124b16--2018-06-07T14:37:47,2018-06-07T14:37:47.175Z
A2ML1,HGNC:23336,cardiofaciocutaneous syndrome,MONDO_0015280,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/fc3c41d8-8497-489b-a350-c9e30016bc6a--2018-06-07T14:31:03,2018-06-07T14:31:03.696Z
A2ML1,HGNC:23336,Costello syndrome,MONDO_0009026,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/ea72ba8d-cf62-44bc-86be-da64e3848eba--2018-06-07T14:34:05,2018-06-07T14:34:05.324Z

Download URL

https://search.clinicalgenome.org/kb/downloads#section_gene-disease-validity

Conflict Resolution

Multiple Classifications

Here is an example of multiple classifications.

$ grep MONDO_0010192 ClinGen-Gene-Disease-Summary-2019-12-02.csv  | grep EDNRB
EDNRB,HGNC:3180,Waardenburg syndrome type 4A,MONDO_0010192,SOP6,Moderate,https://search.clinicalgenome.org/kb/gene-validity/d7abbd45-7915-437b-849b-dea876bfc2f5--2018-05-08T04:00:00,2018-05-08T04:00:00.000Z
EDNRB,HGNC:3180,Waardenburg syndrome type 4A,MONDO_0010192,SOP6,Limited,https://search.clinicalgenome.org/kb/gene-validity/73ee9727-60c1-40fd-830f-08c2b513d2ee--2018-05-08T04:00:00,2018-05-08T04:00:00.000Z

In such cases, we select the more severe classification.

Multiple Dates

$ grep MONDO_0016419 ClinGen-Gene-Disease-Summary-2019-12-02.csv  | grep MUTYH
MUTYH,HGNC:7527,hereditary breast carcinoma,MONDO_0016419,SOP4,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/9904,2017-05-24T00:00:00
MUTYH,HGNC:7527,hereditary breast carcinoma,MONDO_0016419,SOP4,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/9902,2017-05-25T00:00:00

If the classifications are the same, we should select the latest classification date.

JSON Output

"clingenGeneValidity":[
{
"diseaseId":"MONDO_0007893",
"disease":"Noonan syndrome with multiple lentigines",
"classification":"no reported evidence",
"classificationDate":"2018-06-07"
},
{
"diseaseId":"MONDO_0015280",
"disease":"cardiofaciocutaneous syndrome",
"classification":"no reported evidence",
"classificationDate":"2018-06-07"
}
]
FieldTypeNotes
clingenGeneValidityobject
diseaseIdstringMonarch Disease Ontology ID (MONDO)
diseasestringdisease label
classificationstringsee below for possible values
classificationDatestringyyyy-MM-dd

classification

  • no reported evidence
  • disputed
  • limited
  • moderate
  • definitive
  • strong
  • refuted
  • no known disease relationship

Building the supplementary files

The gene disease validity .nga for Nirvana can be built using the SAUtils command's DiseaseValidity subcommand. The only required data file is Clingen-Gene-Disease-Summary-2021-12-01.tsv (url provided above) and its associated .version file.

NAME=ClinGen disease validity curations
VERSION=20211201
DATE=2021-12-01
DESCRIPTION=Disease validity curations from ClinGen (dbVar)

Here is a sample run:

 dotnet NirvanaBuild/SAUtils.dll DiseaseValidity
---------------------------------------------------------------------------
SAUtils (c) 2023 Illumina, Inc.
Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953
---------------------------------------------------------------------------

USAGE: dotnet SAUtils.dll diseasevalidity [options]
Creates a gene annotation database from ClinGen gene validity data

OPTIONS:
--csv, -i <VALUE> ClinGen gene validity file path
--cache, -c <directory>
input cache directory
--ref, -r <filename> input reference filename
--out, -o <VALUE> output directory
--help, -h displays the help menu
--version, -v displays the version

dotnet NirvanaBuild/SAUtils.dll DiseaseValidity --tsv Clingen-Gene-Disease-Summary-2021-12-01.tsv \\
--uga Cache --out SupplementaryDatabase
---------------------------------------------------------------------------
SAUtils (c) 2023 Illumina, Inc.
Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953
---------------------------------------------------------------------------

Number of geneIds missing from the cache:0 (0%)

Time: 00:00:00.2
- - - - \ No newline at end of file diff --git a/3.21/data-sources/clinvar-json/index.html b/3.21/data-sources/clinvar-json/index.html deleted file mode 100644 index 07a2729a..00000000 --- a/3.21/data-sources/clinvar-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -clinvar-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

clinvar-json

small variants:

"clinvar":[
{
"id":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"significance":[
"benign"
],
"refAllele":"G",
"altAllele":"A",
"lastUpdatedDate":"2020-03-01",
"isAlleleSpecific":true
},
{
"id":"RCV000030258.4",
"variationId":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"alleleOrigins":[
"germline"
],
"refAllele":"G",
"altAllele":"A",
"phenotypes":[
"Lynch syndrome"
],
"medGenIds":[
"C1333990"
],
"omimIds":[
"120435"
],
"significance":[
"benign"
],
"lastUpdatedDate":"2017-05-01",
"isAlleleSpecific":true
}
]

large variants:

"clinvar":[
{
"chromosome":"1",
"begin":629025,
"end":8537745,
"variantType":"copy_number_loss",
"id":"RCV000051993.4",
"variationId":"VCV000058242.1",
"reviewStatus":"criteria provided, single submitter",
"alleleOrigins":[
"not provided"
],
"phenotypes":[
"See cases"
],
"significance":[
"pathogenic"
],
"lastUpdatedDate":"2022-04-21",
"pubMedIds":[
"21844811"
]
},
{
"id":"VCV000058242.1",
"reviewStatus":"criteria provided, single submitter",
"significance":[
"pathogenic"
],
"lastUpdatedDate":"2022-04-21"
},
......
]
FieldTypeNotes
idstringClinVar ID
variationIdstringClinVar VCV ID
variantTypestringvariant type
reviewStatusstringsee possible values below
alleleOriginsstring arraysee possible values below
refAllelestring
altAllelestring
phenotypesstring array
medGenIdsstring arrayMedGen IDs
omimIdsstring arrayOMIM IDs
orphanetIdsstring arrayOrphanet IDs
significancestring arraysee possible values below
lastUpdatedDatestringyyyy-MM-dd
pubMedIdsstring arrayPubMed IDs
isAlleleSpecificbooltrue when the current variant alternate allele matches the ClinVar alternate allele

reviewStatus:

  • no assertion provided
  • no assertion criteria provided
  • criteria provided, single submitter
  • practice guideline
  • classified by multiple submitters
  • criteria provided, conflicting interpretations
  • criteria provided, multiple submitters, no conflicts
  • no interpretation for the single variant

alleleOrigins:

  • unknown
  • other
  • germline
  • somatic
  • inherited
  • paternal
  • maternal
  • de-novo
  • biparental
  • uniparental
  • not-tested
  • tested-inconclusive

significance:

  • uncertain significance
  • not provided
  • benign
  • likely benign
  • likely pathogenic
  • pathogenic
  • drug response
  • histocompatibility
  • association
  • risk factor
  • protective
  • affects
  • conflicting data from submitters
  • other
  • no interpretation for the single variant
  • conflicting interpretations of pathogenicity
- - - - \ No newline at end of file diff --git a/3.21/data-sources/clinvar/index.html b/3.21/data-sources/clinvar/index.html deleted file mode 100644 index f34e12b6..00000000 --- a/3.21/data-sources/clinvar/index.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - -ClinVar | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

ClinVar

Overview

ClinVar is a freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence. ClinVar thus facilitates access to and communication about the relationships asserted between human variation and observed health status, and the history of that interpretation.

Publication

Melissa J Landrum, Jennifer M Lee, Mark Benson, Garth R Brown, Chen Chao, Shanmuga Chitipiralla, Baoshan Gu, Jennifer Hart, Douglas Hoffman, Wonhee Jang, Karen Karapetyan, Kenneth Katz, Chunlei Liu, Zenith Maddipatla, Adriana Malheiro, Kurt McDaniel, Michael Ovetsky, George Riley, George Zhou, J Bradley Holmes, Brandi L Kattman, Donna R Maglott, ClinVar: improving access to variant interpretations and supporting evidence, Nucleic Acids Research, 46, Issue D1, 4 January 2018, Pages D1062–D1067, https://doi.org/10.1093/nar/gkx1153

RCV File

Example

Here's a full RCV entry.

Parsing

In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output.

ID

<ClinVarSet>
<ReferenceClinVarAssertion>
<ClinVarAccession Acc="RCV000000001" Version="2">
</ClinVarSet>

The Acc and Version fields are merged to form the ID (RCV000000001.2)

LastUpdatedDate

<ClinVarSet>
<ReferenceClinVarAssertion DateCreated="2012-08-13" DateLastUpdated="2016-02-17" ID="57604" >
</ClinVarSet>

Significance

<ClinVarSet>
<ReferenceClinVarAssertion>
<ClinicalSignificance DateLastEvaluated="1996-04-01">
<ReviewStatus>no assertion criteria provided</ReviewStatus>
<Description>Pathogenic</Description>
</ClinicalSignificance>
</ClinVarSet>

ReviewStatus

<ClinVarSet>
<ReferenceClinVarAssertion>
<ClinicalSignificance DateLastEvaluated="1996-04-01">
<ReviewStatus>no assertion criteria provided</ReviewStatus>
<Description>Pathogenic</Description>
</ClinicalSignificance>
</ClinVarSet>

Phenotypes

<ReferenceClinVarAssertion>
<TraitSet Type="Disease" ID="62">
<Trait Type="Disease">
<Name>
<ElementValue Type="Preferred">Joubert syndrome 9</ElementValue>
</Name>
</Trait>
</TraitSet>
</ReferenceClinVarAssertion>

We only use the field with Type="Preferred". Multiple phenotypes may be reported

Location, Variant Type and Variant Id

<ReferenceClinVarAssertion>
<GenotypeSet Type="CompoundHeterozygote" ID="424709">
<MeasureSet Type="Variant" ID="81">
<Measure Type="single nucleotide variant" ID="15120">
<SequenceLocation Assembly="GRCh38" AssemblyAccessionVersion="GCF_000001405.38"
AssemblyStatus="current" Chr="10" Accession="NC_000010.11" start="89222510"
stop="89222510" display_start="89222510" display_stop="89222510" variantLength="1"
positionVCF="89222510" referenceAlleleVCF="C" alternateAlleleVCF="T"/>
<SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25"
AssemblyStatus="previous" Chr="10" Accession="NC_000010.10" start="90982267"
stop="90982267" display_start="90982267" display_stop="90982267" variantLength="1"
positionVCF="90982267" referenceAlleleVCF="C" alternateAlleleVCF="T"/>
</Measure>
</MeasureSet>
</GenotypeSet>
</ReferenceClinVarAssertion>
  • The variant position is extracted from the fields for their respective assemblies.
  • Updated records contain positionVCF, referenceAlleleVCF and alternateAlleleVCF fields and when present, we use them to create the variant.
  • For older records, since "start' and "stop" fields are not always available, we use the "display_start" and "display_end" fields.
  • If a required allele is not available, we extract it from the reference sequence.
  • Only variants having a dbSNP id are extracted.
  • Note that a ClinVar accession may have multiple variants associated with it (possible in different locations)
  • VariantId is extracted from the MeasureSet attributes.
  • VariantType is extracted from the Measure attributes.
    unsupported variant types

    We currently don't support the following variant types:

    • Microsatellite
    • protein only
    • fusion
    • Complex
    • Variation
    • Translocation

MedGen, OMIM, Orphanet IDs

<ReferenceClinVarAssertion>
<TraitSet Type="Disease" ID="175">
<Trait ID="3036" Type="Disease">
<XRef ID="C0086651" DB="MedGen"/>
<XRef ID="309297" DB="Orphanet"/>
<XRef ID="582" DB="Orphanet"/>
<XRef Type="MIM" ID="253000" DB="OMIM"/>
</Trait>
</TraitSet>
</ReferenceClinVarAssertion>

AlleleOrigins

<ClinVarAssertion>
<Origin>germline</Origin>
</ClinVarAssertion>

We only extract all Allele Origins from Submissions (SCV) entries.

PubMedIds

<ClinVarAssertion>
<ClinicalSignificance DateLastEvaluated="1996-04-01">
<Citation Type="general">
<ID Source="PubMed">12114475</ID>
</Citation>
</ClinicalSignificance>
<AttributeSet>
<Attribute Type="AssertionMethod">LMM Criteria</Attribute>
<Citation>
<ID Source="PubMed">24033266</ID>
</Citation>
</AttributeSet>
<ObservedIn>
<ObservedData ID="9727445">
<Citation Type="general">
<ID Source="PubMed">9113933</ID>
</Citation>
</ObservedData>
</ObservedIn>
<Citation Type="general">
<ID Source="PubMed">23757202</ID>
</Citation>
</ClinVarAssertion>

We only extract all Pubmed Ids from Submissions (SCV) entries.

Parsing Significance

Extracting significance(s) may involve parsing multiple fields. Take the following snippets into consideration.

<ClinicalSignificance DateLastEvaluated="1996-04-01">
<ReviewStatus>no assertion criteria provided</ReviewStatus>
<Description>Pathogenic</Description>
</ClinicalSignificance>

<ClinicalSignificance DateLastEvaluated="2016-10-13">
<ReviewStatus>criteria provided, multiple submitters, no conflicts</ReviewStatus>
<Description>Pathogenic/Likely pathogenic</Description>
</ClinicalSignificance>

<ClinicalSignificance DateLastEvaluated="2012-06-07">
<ReviewStatus>no assertion criteria provided</ReviewStatus>
<Description>Conflicting interpretations of pathogenicity</Description>
<Explanation DataSource="ClinVar" Type="public">Pathogenic(1);Uncertain significance(1)</Explanation>
</ClinicalSignificance>

Given the evidence, we converted the significance field into an array of strings which may be parsed out of the Descriptions or Explanation fields.

Varying Delimiters

The delimiters in each field may vary. Currently, the delimiters for Description are , and /. The delimiters for Explanation are ; and /.

VCV File

Example

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<ClinVarVariationRelease xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://ftp.ncbi.nlm.nih.gov/pub/clinvar/xsd_public/clinvar_variation/variation_archive_1.4.xsd" ReleaseDate="2019-12-31">
<VariationArchive VariationID="431749" VariationName="GRCh37/hg19 1p36.31(chr1:6051187-6158763)" VariationType="copy number gain" DateCreated="2017-08-12" DateLastUpdated="2019-09-10" Accession="VCV000431749" Version="1" RecordType="included" NumberOfSubmissions="0" NumberOfSubmitters="0">
<RecordStatus>current</RecordStatus>
<Species>Homo sapiens</Species>
<IncludedRecord>
<SimpleAllele AlleleID="425239" VariationID="431749">
<GeneList>
<Gene Symbol="KCNAB2" FullName="potassium voltage-gated channel subfamily A regulatory beta subunit 2" GeneID="8514" HGNC_ID="HGNC:6229" Source="calculated" RelationshipType="genes overlapped by variant">
<Location>
<CytogeneticLocation>1p36.31</CytogeneticLocation>
<SequenceLocation Assembly="GRCh38" AssemblyAccessionVersion="GCF_000001405.38" AssemblyStatus="current" Chr="1" Accession="NC_000001.11" start="5992639" stop="6101186" display_start="5992639" display_stop="6101186" Strand="+"/>
<SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25" AssemblyStatus="previous" Chr="1" Accession="NC_000001.10" start="6052357" stop="6161252" display_start="6052357" display_stop="6161252" Strand="+"/>
</Location>
<OMIM>601142</OMIM>
</Gene>
<Gene Symbol="NPHP4" FullName="nephrocystin 4" GeneID="261734" HGNC_ID="HGNC:19104" Source="calculated" RelationshipType="genes overlapped by variant">
<Location>
<CytogeneticLocation>1p36.31</CytogeneticLocation>
<SequenceLocation Assembly="GRCh38" AssemblyAccessionVersion="GCF_000001405.38" AssemblyStatus="current" Chr="1" Accession="NC_000001.11" start="5862810" stop="5992425" display_start="5862810" display_stop="5992425" Strand="-"/>
<SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25" AssemblyStatus="previous" Chr="1" Accession="NC_000001.10" start="5922869" stop="6052532" display_start="5922869" display_stop="6052532" Strand="-"/>
</Location>
<OMIM>607215</OMIM>
</Gene>
</GeneList>
<Name>GRCh37/hg19 1p36.31(chr1:6051187-6158763)</Name>
<VariantType>copy number gain</VariantType>
<Location>
<CytogeneticLocation>1p36.31</CytogeneticLocation>
<SequenceLocation Assembly="GRCh37" AssemblyAccessionVersion="GCF_000001405.25" forDisplay="true" AssemblyStatus="previous" Chr="1" Accession="NC_000001.10" start="6051187" stop="6158763" display_start="6051187" display_stop="6158763"/> </Location>
<Interpretations>
<Interpretation NumberOfSubmissions="0" NumberOfSubmitters="0" Type="Clinical significance">
<Description>no interpretation for the single variant</Description>
</Interpretation>
</Interpretations>
<XRefList>
<XRef Type="Interpreted" ID="431733" DB="ClinVar"/>
</XRefList>
</SimpleAllele>
<ReviewStatus>no interpretation for the single variant</ReviewStatus>
<Interpretations>
<Interpretation NumberOfSubmissions="0" NumberOfSubmitters="0" Type="Clinical significance">
<Description>no interpretation for the single variant</Description>
</Interpretation>
</Interpretations>
<SubmittedInterpretationList>
<SCV Title="SUB1895145" Accession="SCV000296057" Version="1"/>
</SubmittedInterpretationList>
<InterpretedVariationList>
<InterpretedVariation VariationID="431733" Accession="VCV000431733" Version="1"/>
</InterpretedVariationList>
</IncludedRecord>
</VariationArchive>
</ClinVarVariationRelease>

Parsing

In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output.

id

<VariationArchive VariationID="431749" VariationName="GRCh37/hg19 1p36.31(chr1:6051187-6158763)" VariationType="copy number gain" DateCreated="2017-08-12" DateLastUpdated="2019-09-10" Accession="VCV000431749" Version="1" RecordType="included" NumberOfSubmissions="0" NumberOfSubmitters="0">

The Acc and Version fields are merged to form the ID (RCV000000001.2)

significance

<ClinVarVariationRelease>
<VariationArchive>
<IncludedRecord>
<SimpleAllele>
<Interpretations>
<Interpretation NumberOfSubmissions="0" NumberOfSubmitters="0" Type="Clinical significance">
<Description>no interpretation for the single variant</Description>
</Interpretation>
</Interpretations>
</SimpleAllele>
</IncludedRecord>
</VariationArchive>
</ClinVarVariationRelease>

May have multiple significances listed.

reviewStatus

<ClinVarVariationRelease>
<VariationArchive>
<IncludedRecord>
<ReviewStatus>no interpretation for the single variant</ReviewStatus>
</IncludedRecord>
</VariationArchive>
</ClinVarVariationRelease>

Known Issues

Known Issues
  • The XML file contains ~1k more entries (out of 162K) than the VCF file
  • The XML file does not have a field indicating that a record is associated with the reference base - something that was present in VCF
  • The XML file contains entries (e.g. RCV000016645 version=1) which have IUPAC ambiguous bases ("R", "Y", "H", -etc.) as their alternate allele

Download URLs

ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz

https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_00-latest.xml.gz

JSON Output

small variants:

"clinvar":[
{
"id":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"significance":[
"benign"
],
"refAllele":"G",
"altAllele":"A",
"lastUpdatedDate":"2020-03-01",
"isAlleleSpecific":true
},
{
"id":"RCV000030258.4",
"variationId":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"alleleOrigins":[
"germline"
],
"refAllele":"G",
"altAllele":"A",
"phenotypes":[
"Lynch syndrome"
],
"medGenIds":[
"C1333990"
],
"omimIds":[
"120435"
],
"significance":[
"benign"
],
"lastUpdatedDate":"2017-05-01",
"isAlleleSpecific":true
}
]

large variants:

"clinvar":[
{
"chromosome":"1",
"begin":629025,
"end":8537745,
"variantType":"copy_number_loss",
"id":"RCV000051993.4",
"variationId":"VCV000058242.1",
"reviewStatus":"criteria provided, single submitter",
"alleleOrigins":[
"not provided"
],
"phenotypes":[
"See cases"
],
"significance":[
"pathogenic"
],
"lastUpdatedDate":"2022-04-21",
"pubMedIds":[
"21844811"
]
},
{
"id":"VCV000058242.1",
"reviewStatus":"criteria provided, single submitter",
"significance":[
"pathogenic"
],
"lastUpdatedDate":"2022-04-21"
},
......
]
FieldTypeNotes
idstringClinVar ID
variationIdstringClinVar VCV ID
variantTypestringvariant type
reviewStatusstringsee possible values below
alleleOriginsstring arraysee possible values below
refAllelestring
altAllelestring
phenotypesstring array
medGenIdsstring arrayMedGen IDs
omimIdsstring arrayOMIM IDs
orphanetIdsstring arrayOrphanet IDs
significancestring arraysee possible values below
lastUpdatedDatestringyyyy-MM-dd
pubMedIdsstring arrayPubMed IDs
isAlleleSpecificbooltrue when the current variant alternate allele matches the ClinVar alternate allele

reviewStatus:

  • no assertion provided
  • no assertion criteria provided
  • criteria provided, single submitter
  • practice guideline
  • classified by multiple submitters
  • criteria provided, conflicting interpretations
  • criteria provided, multiple submitters, no conflicts
  • no interpretation for the single variant

alleleOrigins:

  • unknown
  • other
  • germline
  • somatic
  • inherited
  • paternal
  • maternal
  • de-novo
  • biparental
  • uniparental
  • not-tested
  • tested-inconclusive

significance:

  • uncertain significance
  • not provided
  • benign
  • likely benign
  • likely pathogenic
  • pathogenic
  • drug response
  • histocompatibility
  • association
  • risk factor
  • protective
  • affects
  • conflicting data from submitters
  • other
  • no interpretation for the single variant
  • conflicting interpretations of pathogenicity

Building the supplementary files

The ClinVar .nsa and .nsi for Nirvana can be built using the SAUtils command's clinvar subcommand.

Source data files

Two input .xml files and a .version file are required in order to build the .nsa and .nsi file. You should have the following files:

ClinVarFullRelease_00-latest.xml.gz     ClinVarVariationRelease_00-latest.xml.gz
ClinVarFullRelease_00-latest.xml.gz.version

The version file is a text file with the follwoing format.

NAME=ClinVar
VERSION=20220505
DATE=2022-05-05
DESCRIPTION=A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence

The help menu for the utility is as follows:

dotnet SAUtils.dll clinvar
---------------------------------------------------------------------------
SAUtils (c) 2022 Illumina, Inc.
Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.18.1
---------------------------------------------------------------------------

USAGE: dotnet SAUtils.dll clinvar [options]
Creates a supplementary database with ClinVar annotations

OPTIONS:
--ref, -r <VALUE> compressed reference sequence file
--rcv, -i <VALUE> ClinVar Full release XML file
--vcv, -c <VALUE> ClinVar Variation release XML file
--out, -o <VALUE> output directory
--help, -h displays the help menu
--version, -v displays the version

dotnet SAUtils.dll clinvar

Here is a sample execution:

dotnet ~/development/Nirvana/bin/Debug/net6.0/SAUtils.dll clinvar \\
--ref ~/development/References/7/Homo_sapiens.GRCh38.Nirvana.dat --rcv ClinVarFullRelease_00-latest.xml.gz \\
--vcv ClinVarVariationRelease_00-latest.xml.gz --out ~/development/SupplementaryDatabase/63/GRCh38
---------------------------------------------------------------------------
SAUtils (c) 2022 Illumina, Inc.
Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.18.1
---------------------------------------------------------------------------

Found 1535677 VCV records
Unknown vcv id:225946 found in RCV000211201.2
Unknown vcv id:225946 found in RCV000211253.2
Unknown vcv id:225946 found in RCV000211375.2
Unknown vcv id:976117 found in RCV001253316.1
Unknown vcv id:1321016 found in RCV001776995.2
3 unknown VCVs found in RCVs.
225946,976117,1321016
0 unknown VCVs found in RCVs.
Chromosome 1 completed in 00:00:15.1
Chromosome 2 completed in 00:00:20.0
Chromosome 3 completed in 00:00:09.7
Chromosome 4 completed in 00:00:05.9
Chromosome 5 completed in 00:00:09.8
Chromosome 6 completed in 00:00:08.3
Chromosome 7 completed in 00:00:08.7
Chromosome 8 completed in 00:00:06.2
Chromosome 9 completed in 00:00:08.6
Chromosome 10 completed in 00:00:07.0
Chromosome 11 completed in 00:00:11.7
Chromosome 12 completed in 00:00:08.0
Chromosome 13 completed in 00:00:06.3
Chromosome 14 completed in 00:00:06.0
Chromosome 15 completed in 00:00:06.6
Chromosome 16 completed in 00:00:10.8
Chromosome 17 completed in 00:00:13.8
Chromosome 18 completed in 00:00:02.9
Chromosome 19 completed in 00:00:08.7
Chromosome 20 completed in 00:00:03.6
Chromosome 21 completed in 00:00:02.4
Chromosome 22 completed in 00:00:03.6
Chromosome MT completed in 00:00:00.2
Chromosome X completed in 00:00:07.5
Chromosome Y completed in 00:00:00.0
Maximum bp shifted for any variant:2
Writing 37097 intervals to database...

Time: 00:13:26.9

- - - - \ No newline at end of file diff --git a/3.21/data-sources/cosmic-cancer-gene-census/index.html b/3.21/data-sources/cosmic-cancer-gene-census/index.html deleted file mode 100644 index 2a56b4ce..00000000 --- a/3.21/data-sources/cosmic-cancer-gene-census/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -cosmic-cancer-gene-census | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

cosmic-cancer-gene-census

   {
"name": "PRDM16",
"hgncId": 14000,
"ncbiGeneId": "63976",
"ensemblGeneId": "ENSG00000142611",
"cosmic": {
"roleInCancer": [
"oncogene",
"fusion"
]
}
}
FieldTypeNotes
roleInCancerstring arrayPossible roles in caner
- - - - \ No newline at end of file diff --git a/3.21/data-sources/cosmic-gene-fusion-json/index.html b/3.21/data-sources/cosmic-gene-fusion-json/index.html deleted file mode 100644 index 8fd2b276..00000000 --- a/3.21/data-sources/cosmic-gene-fusion-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -cosmic-gene-fusion-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

cosmic-gene-fusion-json

   "cosmicGeneFusions":[
{
"id":"COSF881",
"numSamples":6,
"geneSymbols":[
"MYB",
"NFIB"
],
"hgvsr":"ENST00000341911.5(MYB):r.1_2368::ENST00000397581.2(NFIB):r.2592_3318",
"histologies":[
{
"name":"adenoid cystic carcinoma",
"numSamples":6
}
],
"sites":[
{
"name":"salivary gland (submandibular)",
"numSamples":1
},
{
"name":"salivary gland (parotid)",
"numSamples":1
},
{
"name":"salivary gland (nasal cavity)",
"numSamples":1
},
{
"name":"breast",
"numSamples":3
}
],
"pubMedIds":[
19841262
]
}
]
FieldTypeNotes
idstringCOSMIC fusion ID
numSamplesint
geneSymbolsstring array5' gene & 3' gene
hgvsrstringHGVS RNA translocation fusion notation
histologiescount arrayphenotypic descriptions
sitescount arraytissue types
pubMedIdsint arrayPubMed IDs

Count

FieldTypeNotes
namestringdescription
numSamplesint
- - - - \ No newline at end of file diff --git a/3.21/data-sources/cosmic-json/index.html b/3.21/data-sources/cosmic-json/index.html deleted file mode 100644 index 799dc47f..00000000 --- a/3.21/data-sources/cosmic-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -cosmic-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

cosmic-json

{
"id":"COSV58272668",
"numSamples":8,
"refAllele":"-",
"altAllele":"CCT",
"histologies":[
{
"name":"carcinoma (serous carcinoma)",
"numSamples":2
},
{
"name":"meningioma (fibroblastic)",
"numSamples":1
},
{
"name":"carcinoma",
"numSamples":1
},
{
"name":"carcinoma (squamous cell carcinoma)",
"numSamples":1
},
{
"name":"meningioma (transitional)",
"numSamples":1
},
{
"name":"carcinoma (adenocarcinoma)",
"numSamples":1
},
{
"name":"other (neoplasm)",
"numSamples":1
}
],
"sites":[
{
"name":"ovary",
"numSamples":2
},
{
"name":"meninges",
"numSamples":2
},
{
"name":"thyroid",
"numSamples":2
},
{
"name":"cervix",
"numSamples":1
},
{
"name":"large intestine (colon)",
"numSamples":1
}
],
"pubMedIds":[
25738363,
27548314
],
"confirmedSomatic":true,
"drugResistance":true, /* not in this particular COSMIC variant */
"isAlleleSpecific":true
}
FieldTypeNotes
idstringCOSMIC Genomic Mutation ID
numSamplesint
refAllelestring
altAllelestring
histologiescount arrayphenotypic descriptions
sitescount arraytissue types
pubMedIdsint arrayPubMed IDs
confirmedSomaticbooltrue when the variant is a confirmed somatic variant
drugResistancebooltrue when the variant has been associated with drug resistance

Count

FieldTypeNotes
namestringdescription
numSamplesint
- - - - \ No newline at end of file diff --git a/3.21/data-sources/cosmic/index.html b/3.21/data-sources/cosmic/index.html deleted file mode 100644 index f4c4c374..00000000 --- a/3.21/data-sources/cosmic/index.html +++ /dev/null @@ -1,30 +0,0 @@ - - - - - - - -COSMIC | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

COSMIC

Overview

COSMIC, the Catalogue of Somatic Mutations in Cancer, is the world's largest source of expert manually curated somatic mutation information relating to human -cancers.

Publication

John G Tate, Sally Bamford, Harry C Jubb, Zbyslaw Sondka, David M Beare, Nidhi Bindal, Harry Boutselakis, Charlotte G Cole, Celestino Creatore, Elisabeth Dawson, -Peter Fish, Bhavana Harsha, Charlie Hathaway, Steve C Jupe, Chai Yin Kok, Kate Noble, Laura Ponting, Christopher C Ramshaw, Claire E Rye, Helen E Speedy, Ray -Stefancsik, Sam L Thompson, Shicai Wang, Sari Ward, Peter J Campbell, Simon A Forbes. (2019) COSMIC: the Catalogue Of Somatic Mutations In -Cancer, Nucleic Acids Research, Volume 47, Issue D1

Licensed Content

Commercial companies are required to acquire a license from COSMIC. At the moment, this means that our COSMIC -content is only available in Illumina's products and services, not in the open source distribution.

Since many of you are academic users, we will enable a COSMIC login in our downloader later this year that will allow academic and commercial organizations (with -a license) access our COSMIC data sources.

Small Variants

Our main COSMIC deliverable provides annotations for both coding and non-coding variants throughout the genome. As of COSMIC v96, this includes 28.7M variants -spanning the human genome. Nirvana currently parses four files to extract the relevant content:

  • CosmicCodingMuts.vcf.gz
  • CosmicNonCodingVariants.vcf.gz
  • CosmicMutantExport.tsv.gz
  • CosmicNCV.tsv.gz

VCF extraction

Example

#CHROM  POS ID  REF ALT QUAL  FILTER  INFO
1 65797 COSV58737189 T C . . GENE=OR4F5_ENST00000641515;STRAND=+;LEGACY_ID=COSN23957695;CDS=c.9+224T>C;AA=p.?;HGVSC=ENST00000641515.2:c.9+224T>C;HGVSG=1:g.65797T>C;CNT=1

Parsing

From the VCF files, we're mainly interested in the following columns:

  • CHROM
  • POS
  • ID
  • REF
  • ALT

TSV extraction

Example

Gene name Accession Number  Gene CDS length HGNC ID Sample name ID_sample ID_tumour Primary site  Site subtype 1  Site subtype 2  Site subtype 3  Primary histology Histology subtype 1 Histology subtype 2 Histology subtype 3 Genome-wide screen  GENOMIC_MUTATION_ID LEGACY_MUTATION_ID  MUTATION_ID Mutation CDS  Mutation AA Mutation Description  Mutation zygosity LOH GRCh  Mutation genome position  Mutation strand Resistance Mutation Mutation somatic status Pubmed_PMID ID_STUDY  Sample Type Tumour origin Age HGVSP HGVSC HGVSG
MCF2L_ENST00000375604 ENST00000375604.6 3372 14576 RK091_C01 1918867 1806188 liver NS NS NS carcinoma NS NS NS y COSV65049364 COSN1601909 113108365 c.73+3096A>G p.? Unknown het 38 13:113005079-113005079 + - Variant of unknown origin 322 fresh/frozen - NOS primary ENST00000375604.6:c.73+3096A>G 13:g.113005079A>G

Parsing

From the TSV file, we're mainly interested in the following columns:

  • GENOMIC_MUTATION_ID
  • ID_sample
  • Primary site
  • Site subtype 1
  • Primary histology
  • Histology subtype 1
  • Pubmed_PMID
  • Resistance Mutation
  • Mutation somatic status
info

For all the histologies and sites, we replace all the underlines with spaces. salivary_gland would become salivary gland.

Parsing

To aggregate the data in Nirvana, we perform the following:

  • Parse the coding and non-coding TSV files to retrieve the histologies, sites, PubMed IDs, somatic status, and resistance mutation status. Histologies and sites -are tracked with respect to sample IDs.
  • Parse the coding and non-coding VCF files to retrieve the genomic variant for each entry

Aggregating Histologies & Sites

For sites and histologies, we observe that the subtype provides additional description but is still dependent on the primary site value. For example, the primary -site might be skin, but the subtype is foot. Therefore, we will combine the values in the following manner: skin (foot).

COSMIC uses NS to show that a value is empty. If the subtype is NS, we will use the primary histology instead.

Download URL

GRCh37

GRCh38

JSON Output

{
"id":"COSV58272668",
"numSamples":8,
"refAllele":"-",
"altAllele":"CCT",
"histologies":[
{
"name":"carcinoma (serous carcinoma)",
"numSamples":2
},
{
"name":"meningioma (fibroblastic)",
"numSamples":1
},
{
"name":"carcinoma",
"numSamples":1
},
{
"name":"carcinoma (squamous cell carcinoma)",
"numSamples":1
},
{
"name":"meningioma (transitional)",
"numSamples":1
},
{
"name":"carcinoma (adenocarcinoma)",
"numSamples":1
},
{
"name":"other (neoplasm)",
"numSamples":1
}
],
"sites":[
{
"name":"ovary",
"numSamples":2
},
{
"name":"meninges",
"numSamples":2
},
{
"name":"thyroid",
"numSamples":2
},
{
"name":"cervix",
"numSamples":1
},
{
"name":"large intestine (colon)",
"numSamples":1
}
],
"pubMedIds":[
25738363,
27548314
],
"confirmedSomatic":true,
"drugResistance":true, /* not in this particular COSMIC variant */
"isAlleleSpecific":true
}
FieldTypeNotes
idstringCOSMIC Genomic Mutation ID
numSamplesint
refAllelestring
altAllelestring
histologiescount arrayphenotypic descriptions
sitescount arraytissue types
pubMedIdsint arrayPubMed IDs
confirmedSomaticbooltrue when the variant is a confirmed somatic variant
drugResistancebooltrue when the variant has been associated with drug resistance

Count

FieldTypeNotes
namestringdescription
numSamplesint

Gene Fusions

Gene fusions are manually curated from peer reviewed publications by expert COSMIC curators. A comprehensive literature curation is completed for each fusion -pair when it is released in the database. Currently COSMIC includes information on fusions involved in solid tumours and leukaemias.

TSV extraction

Example

SAMPLE_ID SAMPLE_NAME PRIMARY_SITE  SITE_SUBTYPE_1  SITE_SUBTYPE_2  SITE_SUBTYPE_3  PRIMARY_HISTOLOGY HISTOLOGY_SUBTYPE_1 HISTOLOGY_SUBTYPE_2 HISTOLOGY_SUBTYPE_3 FUSION_ID TRANSLOCATION_NAME  5'_CHROMOSOME 5'_STRAND 5'_GENE_ID  5'_GENE_NAME  5'_LAST_OBSERVED_EXON 5'_GENOME_START_FROM  5'_GENOME_START_TO  5'_GENOME_STOP_FROM 5'_GENOME_STOP_TO 3'_CHROMOSOME 3'_STRAND 3'_GENE_ID  3'_GENE_NAME  3'_FIRST_OBSERVED_EXON  3'_GENOME_START_FROM  3'_GENOME_START_TO  3'_GENOME_STOP_FROM 3'_GENOME_STOP_TO FUSION_TYPE PUBMED_PMID
749711 HCC1187 breast NS NS NS carcinoma ductal_carcinoma NS NS 665 ENST00000360863.10(RGS22):r.1_3555::ENST00000369518.1(SYCP1):r.2100_3452 8 - 197199 RGS22 22 99981937 99981937 100106116 100106116 1 + 212470 SYCP1_ENST00000369518 24 114944339 114944339 114995367 114995367 Inferred Breakpoint 20033038

Parsing

From the TSV file, we're mainly interested in the following columns:

  • SAMPLE_ID
  • PRIMARY_SITE
  • PRIMARY_HISTOLOGY
  • HISTOLOGY_SUBTYPE_1
  • FUSION_ID
  • TRANSLOCATION_NAME
  • PUBMED_PMID
info

For all the histologies and sites, we replace all the underlines with spaces. salivary_gland would become salivary gland.

Parsing

To create the gene fusion entries in Nirvana, we perform the following on each row in the TSV file:

  • Group all entries by FUSION_ID
  • Using all the entries related to this FUSION_ID:
    • Collect all the PubMed IDs
    • Tally the number of observed sample IDs
    • Grab the HGVS r. notation (should not change throughout the FUSION_ID)
    • Tally the number of samples observed for each histology
    • Tally the number of samples observed for each site
  • Extract the transcript IDs from the HGVS notation and lookup the associated gene symbols

Aggregating Histologies & Sites

Aggregating Histologies & Sites was previously described in the small variants section.

Known Issues

Known Issues

There are some issues with the HGVS RNA notation:

  • For coding transcripts, HGVS numbering should use CDS coordinates. Right now COSMIC is using cDNA coordinates for all their fusions.

Download URL

GRCh37

GRCh38

JSON Output

   "cosmicGeneFusions":[
{
"id":"COSF881",
"numSamples":6,
"geneSymbols":[
"MYB",
"NFIB"
],
"hgvsr":"ENST00000341911.5(MYB):r.1_2368::ENST00000397581.2(NFIB):r.2592_3318",
"histologies":[
{
"name":"adenoid cystic carcinoma",
"numSamples":6
}
],
"sites":[
{
"name":"salivary gland (submandibular)",
"numSamples":1
},
{
"name":"salivary gland (parotid)",
"numSamples":1
},
{
"name":"salivary gland (nasal cavity)",
"numSamples":1
},
{
"name":"breast",
"numSamples":3
}
],
"pubMedIds":[
19841262
]
}
]
FieldTypeNotes
idstringCOSMIC fusion ID
numSamplesint
geneSymbolsstring array5' gene & 3' gene
hgvsrstringHGVS RNA translocation fusion notation
histologiescount arrayphenotypic descriptions
sitescount arraytissue types
pubMedIdsint arrayPubMed IDs

Count

FieldTypeNotes
namestringdescription
numSamplesint

Cancer Gene Census

TSV Extraction

Example

GENE_NAME       CELL_TYPE       PUBMED_PMID     HALLMARK        IMPACT  DESCRIPTION     CELL_LINE
PRDM16 18496560 role in cancer oncogene oncogene
PRDM16 16015645 role in cancer fusion fusion

Parsing

To extract information about TSGs and oncogenes, the data based on the "role in cancer" attribute is filtered. -For tumor suppressor genes, rows with the value "TSG" and for oncogenes, rows with the value "oncogene" are filtered. -Some genes have both "TSG/oncogene" as their role, which indicates that they can act as both.

Columns

Only following columns are needed to gather required roles in cancer:

  • GENE_NAME
  • IMPACT
  • HALLMARK
Possible Roles in Cancer

While parsing, only following roles in cancer are found:

  • fusion
  • TSG
  • oncogene
Parsing Stats

The file contained following number of instances for each role type

Role in cancerTotal Instances
fusion149
TSG195
oncogene181
Total525

Known Issues

None

Download URL

JSON output

   {
"name": "PRDM16",
"hgncId": 14000,
"ncbiGeneId": "63976",
"ensemblGeneId": "ENSG00000142611",
"cosmic": {
"roleInCancer": [
"oncogene",
"fusion"
]
}
}
FieldTypeNotes
roleInCancerstring arrayPossible roles in caner
- - - - \ No newline at end of file diff --git a/3.21/data-sources/dann-json/index.html b/3.21/data-sources/dann-json/index.html deleted file mode 100644 index 75af710a..00000000 --- a/3.21/data-sources/dann-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -dann-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

dann-json

"dannScore": 0.27
FieldTypeNotes
dannScorefloatRange: 0 - 1.0
- - - - \ No newline at end of file diff --git a/3.21/data-sources/dann/index.html b/3.21/data-sources/dann/index.html deleted file mode 100644 index 57b21c57..00000000 --- a/3.21/data-sources/dann/index.html +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - -DANN | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

DANN

Overview

DANN uses the same feature set and training data as CADD (Combined Annotation-Dependent Depletion) to train a deep neural network (DNN). -CADD is an algorithm designed to annotate both coding and non-coding variants, and has been shown to outperform other annotation algorithms. -DANN improves on CADD (which uses Support Vector Machines (SVMs)) by capturing non-linear relationships by using a deep neural network instead of SVMs. -DANN achieves about a 19% relative reduction in the error rate and about a 14% relative increase in the area under the curve (AUC) metric over CADD’s SVM methodology.

Publication

Quang, Daniel, Yifei Chen, and Xiaohui Xie. DANN: a deep learning approach for annotating the pathogenicity of genetic variants. Bioinformatics 31.5 761-763 (2015). https://doi.org/10.1093/bioinformatics/btu703

TSV File

Example

chr     grch37_pos  ref     alt     DANN
1 10001 T A 0.16461391399220135
1 10001 T C 0.4396994049749739
1 10001 T G 0.38108629377072734
1 10002 A C 0.36182020272810128
1 10002 A G 0.44413258111779291
1 10002 A T 0.16812846819989813

Parsing

From the CSV file, we are interested in all columns:

  • chr
  • grch37_pos
  • ref
  • alt
  • DANN

GRCh38 liftover

The data is not available for GRCh38 on DANN website. We performed a liftover from GRCh37 to GRCh38 using crossmap.

Known Issues

None

Download URL

https://cbcl.ics.uci.edu/public_data/DANN/

JSON Output

"dannScore": 0.27
FieldTypeNotes
dannScorefloatRange: 0 - 1.0
- - - - \ No newline at end of file diff --git a/3.21/data-sources/dbsnp-json/index.html b/3.21/data-sources/dbsnp-json/index.html deleted file mode 100644 index 3bf5e3fc..00000000 --- a/3.21/data-sources/dbsnp-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -dbsnp-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

dbsnp-json

"dbsnp":[
"rs1042821"
]
FieldTypeNotes
dbsnpstring arraydbSNP rsIDs
- - - - \ No newline at end of file diff --git a/3.21/data-sources/dbsnp/index.html b/3.21/data-sources/dbsnp/index.html deleted file mode 100644 index 3b5cbc1f..00000000 --- a/3.21/data-sources/dbsnp/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -dbSNP | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

dbSNP

Overview

dbSNP contains human single nucleotide variations, microsatellites, and small-scale insertions and deletions along with publication, population frequency, molecular consequence, and genomic and RefSeq mapping information for both common variations and clinical mutations.

Publication

Sherry, S.T., Ward, M. and Sirotkin, K. (1999) dbSNP—Database for Single Nucleotide Polymorphisms and Other Classes of Minor Genetic Variation. Genome Res., 9, 677–679.

VCF File

Example

#CHROM  POS ID  REF ALT QUAL    FILTER  INFO
1 10177 rs367896724 A AC . . RS=367896724;RSPOS=10177;dbSNPBuildID=138; \
SSR=0;SAO=0;VP=0x050000020005130026000200;GENEINFO=DDX11L1:100287102;WGT=1; \
VC=DIV;R5;ASP;G5A;G5;KGPhase3;CAF=0.5747,0.4253;COMMON=1; \
TOPMED=0.76728147298674821,0.23271852701325178

Parsing

From the VCF file, we're mainly interested in the following:

  • rsID from the ID field
  • CAF from the INFO field

Global allele extraction

The global major and minor alleles are extracted based on the frequency of the alleles provided in the CAF field. The global minor allele frequency is the second highest value of the CAF comma delimited field (ignoring '.' values).

Tie Breaking: Global Major Allele

If there are two candidates for global major and the reference allele is one of them, we prefer the reference allele.

Tie Breaking: Global Minor Allele

If there are two candidates for global minor and the reference allele is one of them, we prefer the other allele. If the reference allele is not involved, they are chosen arbitrarily.

Equal Allele Frequency Example (2 alleles)

chr1    100 A   C   CAF=0.5,0.5

We will select A to be the global major allele and C to be the global minor allele.

Equal Allele Frequency Example (3 alleles)

chr1    100 A   C,T CAF=0.33,0.33,0.33

We will select A to be the global major allele and either C or T is chosen (arbitrarily) to be the global minor allele.

Equal Allele Frequency in Alternate Alleles

chr1    100 A   C,T CAF=0.2,0.4,0.4

We will select C or T to be arbitrarily assigned to be the global major or global minor allele.

Equal Allele Frequency Between Reference & Alternate Allele

chr1    100 A   C,T CAF=0.2,0.2,0.6

We will select T to be the global major allele and C to be the global minor allele.

Known Issues

Known Issues

If there are multiple entries with different CAF values for the same allele, we use the first CAF value.

Download URL

https://ftp.ncbi.nih.gov/snp/organisms/

JSON Output

"dbsnp":[
"rs1042821"
]
FieldTypeNotes
dbsnpstring arraydbSNP rsIDs
- - - - \ No newline at end of file diff --git a/3.21/data-sources/decipher-json/index.html b/3.21/data-sources/decipher-json/index.html deleted file mode 100644 index 64f552d2..00000000 --- a/3.21/data-sources/decipher-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -decipher-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

decipher-json

"decipher":[
{
"chromosome":"1",
"begin":13516,
"end":91073,
"numDeletions":27,
"deletionFrequency":0.675,
"numDuplications":27,
"duplicationFrequency":0.675,
"sampleSize":40,
"reciprocalOverlap": 0.27555,
"annotationOverlap": 0.5901
}
],
FieldTypeNotes
chromosomeintEnsembl-style chromosome names
beginint1-based position
endint1-based position
numDeletionsint# of observed deletions
deletionFrequencyfloatdeletion frequency
numDuplicationsint# of observed duplications
duplicationFrequencyfloatduplication frequency
sampleSizeinttotal # of samples
reciprocalOverlapfloatRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap
annotationOverlapfloatRange: 0 - 1. E.g. 0.57 would indicate a 57% annotation overlap
- - - - \ No newline at end of file diff --git a/3.21/data-sources/decipher/index.html b/3.21/data-sources/decipher/index.html deleted file mode 100644 index 14be495c..00000000 --- a/3.21/data-sources/decipher/index.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - -DECIPHER | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

DECIPHER

Overview

DECIPHER (DatabasE of genomiC varIation and Phenotype in Humans using Ensembl Resources) is an interactive web-based database which incorporates a suite of tools designed to aid the interpretation of genomic variants.

DECIPHER enhances clinical diagnosis by retrieving information from a variety of bioinformatics resources relevant to the variant found in the patient. The patient's variant is displayed in the context of both normal variation and pathogenic variation reported at that locus thereby facilitating interpretation.

Publication

DECIPHER: Database of Chromosomal Imbalance and Phenotype in Humans using Ensembl Resources. Firth, H.V. et al., 2009. Am.J.Hum.Genet 84, 524-533 (DOI: dx.doi.org/10/1016/j.ajhg.2009.03.010)

TSV Extraction

#population_cnv_id  chr start   end deletion_observations   deletion_frequency  deletion_standard_error duplication_observations    duplication_frequency   duplication_standard_error  observations    frequency   standard_error  type    sample_size study
1 1 10529 177368 0 0 1 3 0.075 0.555277708 3 0.075 0.555277708 1 40 42M calls
2 1 13516 91073 0 0 1 27 0.675 0.109713431 27 0.675 0.109713431 1 40 42M calls
3 1 18888 35451 0 0 1 2 0.002366864 0.706269473 2 0.002366864 0.706269473 1 845 DDD

Parsing

We parse the DECIPHER tsv file and extract the following columns:

  • chr
  • start
  • end
  • deletion_observations
  • deletion_frequency
  • duplication_observations
  • duplication_frequency
  • sample_size

Download URL

https://www.deciphergenomics.org/files/downloads/population_cnv_grch38.txt.gz -https://www.deciphergenomics.org/files/downloads/population_cnv_grch37.txt.gz

JSON output

"decipher":[
{
"chromosome":"1",
"begin":13516,
"end":91073,
"numDeletions":27,
"deletionFrequency":0.675,
"numDuplications":27,
"duplicationFrequency":0.675,
"sampleSize":40,
"reciprocalOverlap": 0.27555,
"annotationOverlap": 0.5901
}
],
FieldTypeNotes
chromosomeintEnsembl-style chromosome names
beginint1-based position
endint1-based position
numDeletionsint# of observed deletions
deletionFrequencyfloatdeletion frequency
numDuplicationsint# of observed duplications
duplicationFrequencyfloatduplication frequency
sampleSizeinttotal # of samples
reciprocalOverlapfloatRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap
annotationOverlapfloatRange: 0 - 1. E.g. 0.57 would indicate a 57% annotation overlap
- - - - \ No newline at end of file diff --git a/3.21/data-sources/fusioncatcher-json/index.html b/3.21/data-sources/fusioncatcher-json/index.html deleted file mode 100644 index 61b6392c..00000000 --- a/3.21/data-sources/fusioncatcher-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -fusioncatcher-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

fusioncatcher-json

   "fusionCatcher":[
{
"genes":{
"first":{
"hgnc":"ETV6",
"isOncogene":true
},
"second":{
"hgnc":"RUNX1"
},
"isParalogPair":true,
"isPseudogenePair":true,
"isReadthrough":true
},
"germlineSources":[
"1000 Genomes Project"
],
"somaticSources":[
"COSMIC",
"TCGA oesophageal carcinomas"
]
}
]
FieldTypeNotes
genesgenes object5' gene & 3' gene
germlineSourcesstring arraymatches in known germline data sources
somaticSourcesstring arraymatches in known somatic data sources

genes

FieldTypeNotes
firstgene object5' gene
secondgene object3' gene
isParalogPairbooltrue when both genes are paralogs for each other
isPseudogenePairbooltrue when both genes are pseudogenes for each other
isReadthroughbooltrue when this fusion gene is a readthrough event (both are on the same strand and there are no genes between them)

gene

FieldTypeNotes
hgncstringgene symbol. e.g. MSH6
isOncogenebooltrue when this gene is an oncogene
- - - - \ No newline at end of file diff --git a/3.21/data-sources/fusioncatcher/index.html b/3.21/data-sources/fusioncatcher/index.html deleted file mode 100644 index 9ca8b73b..00000000 --- a/3.21/data-sources/fusioncatcher/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -FusionCatcher | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

FusionCatcher

Overview

FusionCatcher is a well-known tool that searches for somatic novel/known fusion genes, translocations, and/or chimeras in RNA-seq data. While FusionCatcher itself is not part of Nirvana, we have included a subset of their genomic databases in Nirvana.

Publication

Daniel Nicorici, Mihaela Şatalan, Henrik Edgren, Sara Kangaspeska, Astrid Murumägi, Olli Kallioniemi, Sami Virtanen, Olavi Kilkku. (2014) FusionCatcher – a tool for finding somatic fusion genes in paired-end RNA-sequencing data. bioRxiv 011650

Supported Data Sources

Oncogenes

The following data sources are aggregated and used to populate the isOncogene field in the gene JSON object:

DescriptionReferenceDataFusionCatcher filename
Bushmanbushmanlab.orgcancer_genes.txt
ONGENEJGGbioinfo-minzhao.orgoncogenes_more.txt
UniProt tumor genesNARuniprot.orgtumor_genes.txt

Germline

Nirvana labelReferenceDataFusionCatcher filename
1000 Genomes ProjectPLOS ONE1000genomes.txt
Healthy (strong support)banned.txt
Illumina Body Map 2.0EBIbodymap2.txt
CACGGenomicscacg.txt
ConjoinGPLOS ONEconjoing.txt
Healthy prefrontal cortexBMC Medical GenomicsNCBI GEOcortex.txt
Duplicated Genes DatabasePLOS ONEgenouest.orgdgd.txt
GTEx healthy tissuesgtexportal.orggtex.txt
Healthyhealthy.txt
Human Protein AtlasMCPEBIhpa.txt
Babiceanu non-cancer tissuesNARNARnon-cancer_tissues.txt
non-tumor cell linesnon-tumor_cells.txt
TumorFusions normalNARNARtcga-normal.txt

Somatic

Nirvana labelReferenceDataFusionCatcher filename
Alaei-Mahabadi 18 cancersPNAS18cancers.txt
DepMap CCLEdepmap.orgccle.txt
CCLE KlijnNature BiotechnologyNature Biotechnologyccle2.txt
CCLE VellichirammalMolecular Therapy Nucleic Acidsccle3.txt
Cancer Genome ProjectCOSMICcgp.txt
ChimerKB 4.0NARkobic.re.krchimerdb4kb.txt
ChimerPub 4.0NARkobic.re.krchimerdb4pub.txt
ChimerSeq 4.0NARkobic.re.krchimerdb4seq.txt
COSMICNARCOSMICcosmic.txt
Bao gliomasGenome Researchgliomas.txt
Knownknown.txt
Mitelman DBISB-CGCGoogle Cloudmitelman.txt
TCGA oesophageal carcinomasNatureoesophagus.txt
Bailey pancreatic cancersNatureNaturepancreases.txt
PCAWGCellICGCpcawg.txt
Robinson prostate cancersCellCellprostate_cancer.txt
TCGAcancer.govtcga.txt
TumorFusions tumorNARNARtcga-cancer.txt
TCGA GaoCellCelltcga2.txt
TCGA VellichirammalMolecular Therapy Nucleic Acidstcga3.txt
TICdbBMC Genomicsunav.eduticdb.txt

Gene Pair TSV File

Most of the data files in FusionCatcher are two-column TSV files containing the Ensembl gene IDs that are paired together.

Example

Here are the first few lines of the 1000genomes.txt file:

ENSG00000006210 ENSG00000102962
ENSG00000006652 ENSG00000181016
ENSG00000014138 ENSG00000149798
ENSG00000026297 ENSG00000071242
ENSG00000035499 ENSG00000155959
ENSG00000055211 ENSG00000131013
ENSG00000055332 ENSG00000179915
ENSG00000062485 ENSG00000257727
ENSG00000065978 ENSG00000166501
ENSG00000066044 ENSG00000104980

Parsing

In Nirvana, we will only import a gene pair if both Ensembl gene IDs are recognized from either our GRCh37 or GRCh38 cache files.

Gene TSV File

Some of the data files are single-column files containing Ensembl gene IDs. This is commonly used in the data files representing oncogene data sources.

Example

Here are the first few lines of the oncogenes_more.txt file:

ENSG00000000938
ENSG00000003402
ENSG00000005469
ENSG00000005884
ENSG00000006128
ENSG00000006453
ENSG00000006468
ENSG00000007350
ENSG00000008294
ENSG00000008952

Parsing

Known Issues

Known Issues

FusionCatcher also uses creates custom Ensembl genes (e.g. ENSG09000000002) to handle missing Ensembl genes. Nirvana will ignore these entries since we only include the gene IDs that are currently recognized by Nirvana.

I suspect that these were originally RefSeq genes and if so, we can support those directly in Nirvana in the future.

Download URL

https://sourceforge.net/projects/fusioncatcher/files/data

JSON Output

   "fusionCatcher":[
{
"genes":{
"first":{
"hgnc":"ETV6",
"isOncogene":true
},
"second":{
"hgnc":"RUNX1"
},
"isParalogPair":true,
"isPseudogenePair":true,
"isReadthrough":true
},
"germlineSources":[
"1000 Genomes Project"
],
"somaticSources":[
"COSMIC",
"TCGA oesophageal carcinomas"
]
}
]
FieldTypeNotes
genesgenes object5' gene & 3' gene
germlineSourcesstring arraymatches in known germline data sources
somaticSourcesstring arraymatches in known somatic data sources

genes

FieldTypeNotes
firstgene object5' gene
secondgene object3' gene
isParalogPairbooltrue when both genes are paralogs for each other
isPseudogenePairbooltrue when both genes are pseudogenes for each other
isReadthroughbooltrue when this fusion gene is a readthrough event (both are on the same strand and there are no genes between them)

gene

FieldTypeNotes
hgncstringgene symbol. e.g. MSH6
isOncogenebooltrue when this gene is an oncogene
- - - - \ No newline at end of file diff --git a/3.21/data-sources/gerp-json/index.html b/3.21/data-sources/gerp-json/index.html deleted file mode 100644 index 2b42e722..00000000 --- a/3.21/data-sources/gerp-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -gerp-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

gerp-json

"gerpScore": 1.27
FieldTypeNotes
gerpScorefloatRange: -∞ to +∞
- - - - \ No newline at end of file diff --git a/3.21/data-sources/gerp/index.html b/3.21/data-sources/gerp/index.html deleted file mode 100644 index 956bd7d4..00000000 --- a/3.21/data-sources/gerp/index.html +++ /dev/null @@ -1,20 +0,0 @@ - - - - - - - -GERP | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

GERP

Overview

GERP identifies constrained elements in multiple alignments by quantifying substitution deficits. -These deficits represent substitutions that would have occurred if the element were neutral DNA, but did not occur because the element has been under functional constraint (Rejected Substitutions). -Nirvana uses GERP++ which is based on a significantly faster and more statistically robust maximum likelihood estimation procedure to compute expected rates of evolution.

Publication

Davydov, Eugene V., et al. "Identifying a high fraction of the human genome to be under selective constraint using GERP++." PLoS computational biology 6.12 e1001025 (2010). https://doi.org/10.1371/journal.pcbi.1001025

Source Files

Example GRCh37

GRCh37 file is a TSV format

chr     position    GERP
1 12177 0.83
1 12178 -0.206
1 12179 -0.492
1 12180 -1.66
1 12181 0.83
1 12182 0.83
1 12183 -0.417
1 12184 0.83

Example GRCh38

GRCh38 file is a lift-over BED format

chr     pos_start   pos_end     GERP
1 12646 12647 0.298
1 12647 12648 2.63
1 12648 12649 1.87
1 12649 12650 0.252
1 12650 12651 -2.06
1 12651 12652 2.61
1 12652 12653 3.97

Parsing

From the CSV file, we are interested in columns:

  • chr
  • position
  • GERP

Known Issues

None

Download URL

GRCh37

http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html

GRCh38

The data is not available for GRCh38 on GERP++ website, and was obtained from https://personal.broadinstitute.org/konradk/loftee_data/GRCh38/

JSON Output

"gerpScore": 1.27
FieldTypeNotes
gerpScorefloatRange: -∞ to +∞
- - - - \ No newline at end of file diff --git a/3.21/data-sources/gme-json/index.html b/3.21/data-sources/gme-json/index.html deleted file mode 100644 index 610f21b5..00000000 --- a/3.21/data-sources/gme-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -gme-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

gme-json

"gmeVariome":{
"allAc":10,
"allAn":202,
"allAf":0.049504,
"failedFilter":true
}
FieldTypeNotes
allAcintGME allele count
allAnintGME allele number
allAffloatGME allele frequency
failedFilterboolTrue if this variant failed any filters
- - - - \ No newline at end of file diff --git a/3.21/data-sources/gme/index.html b/3.21/data-sources/gme/index.html deleted file mode 100644 index 6fc11e18..00000000 --- a/3.21/data-sources/gme/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -GME Variome | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

GME Variome

Overview

The Greater Middle East (GME) Variome Project is aimed at generating a coding base reference for the countries found in the Greater Middle East. Nirvana presents variant frequencies for the Greater Middle Eastern population.

Publication

Scott, E. M., Halees, A., Itan, Y., Spencer, E. G., He, Y., Azab, M. A., Gabriel, S. B., Belkadi, A., Boisson, B., Abel, L., Clark, A. G., Greater Middle East Variome Consortium, Alkuraya, F. S., Casanova, J. L., & Gleeson, J. G. (2016). Characterization of Greater Middle Eastern genetic variation for enhanced disease gene discovery. Nature genetics, 48(9), 1071–1076. https://doi.org/10.1038/ng.3592

TSV Extraction

chrom   pos     ref     alt     AA      filter  FunctionGVS     geneFunction    Gene    GeneID  SIFT_pred       GERP++  AF      GME_GC  GME_AC  GME_AF  NWA     NEA     AP      Israel  SD      TP      CA      FunctionGVS_new Priority        Polyphen2_HVAR_pred     LRT_pred        MutationTaster_pred     rsid    OMIM_MIM        OMIM_Disease    AA_AC   EA_AC   rsid_link       position_link
1 69134 A G A VQSRTrancheSNP99.90to100.00 nonsynonymous_SNV exonic OR4F5 79501 T 2.31 96:0:5 10,192 0.04950495049504951 4:0:0 59:0:2 12:0:0 0:0:0 6:0:0 9:0:2 13:0:2 nonsynonymous_SNV MODERATE B N N none - - none none - http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&org=human&position=chr1%3A69134-69133
1 69270 A G A PASS synonymous_SNV exonic OR4F5 79501 . . 93:38:240 518,224 0.6981132075471698 5:5:11 63:30:86 12:5:28 1:0:2 2:2:18 7:3:46 7:2:52 synonymous_SNV LOW . . . rs201219564 - - none none http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?searchType=adhoc_search&type=rs&rs=rs201219564 http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&org=human&position=chr1%3A69270-69269
1 69428 T G T PASS nonsynonymous_SNV exonic OR4F5 79501 D 0.891 676:44:15 74,1396 0.050340136054421766 43:0:2 313:16:10 88:7:3 6:0:0 44:8:0 102:9:0 102:4:2 nonsynonymous_SNV MODERATE D N N rs140739101 - - 14,3808 313,6535 http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?searchType=adhoc_search&type=rs&rs=rs140739101 http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&org=human&position=chr1%3A69428-69427

Parsing

We parse the GME tsv file and extract the following columns:

  • chrom
  • pos
  • ref
  • alt
  • filter
  • GME_AC
  • GME_AF

GRCh37 liftover

The data is not available for GRCh38 on GME website. We performed a liftover from GRCh37 to GRCh38 using CrossMap.

Download URL

http://igm.ucsd.edu/gme/download.shtml

JSON output

"gmeVariome":{
"allAc":10,
"allAn":202,
"allAf":0.049504,
"failedFilter":true
}
FieldTypeNotes
allAcintGME allele count
allAnintGME allele number
allAffloatGME allele frequency
failedFilterboolTrue if this variant failed any filters
- - - - \ No newline at end of file diff --git a/3.21/data-sources/gnomad-lof-json/index.html b/3.21/data-sources/gnomad-lof-json/index.html deleted file mode 100644 index 67e57bbf..00000000 --- a/3.21/data-sources/gnomad-lof-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -gnomad-lof-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

gnomad-lof-json

"gnomAD":{ 
"pLi":1.00e0,
"pNull":8.94e-40,
"pRec":1.84e-16,
"synZ":-8.44e-2,
"misZ":5.96e-1,
"loeuf":1.13e0
}
FieldTypeNotes
pLifloatprobability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)
pNullfloatprobability of being completely tolerant of loss of function variation (observed = expected)
pRecfloatprobability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)
synZfloatcorrected synonymous Z score
misZfloatcorrected missense Z score
loeuffloatloss of function observed/expected upper bound fraction (LOEUF)
- - - - \ No newline at end of file diff --git a/3.21/data-sources/gnomad-small-variants-json/index.html b/3.21/data-sources/gnomad-small-variants-json/index.html deleted file mode 100644 index 27377532..00000000 --- a/3.21/data-sources/gnomad-small-variants-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -gnomad-small-variants-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

gnomad-small-variants-json

"gnomad":{ 
"coverage":20,
"allAf":0.190317,
"maleAf":0.193,
"femaleAf": 0.1935,
"afrAf":0.222876,
"amrAf":0.121394,
"easAf":0.239802,
"finAf":0.136833,
"nfeAf":0.181282,
"asjAf":0.258278,
"othAf":0.186094,
"allAn":30796,
"maleAn":15096,
"femaleAn":15700
"afrAn":8664,
"amrAn":832,
"easAn":1618,
"finAn":3486,
"nfeAn":14916,
"asjAn":302,
"othAn":978,
"allAc":5861,
"maleAc":2930,
"femaleAc": 2931,
"afrAc":1931,
"amrAc":101,
"easAc":388,
"finAc":477,
"nfeAc":2704,
"asjAc":78,
"othAc":182,
"allHc":561,
"afrHc":208,
"amrHc":6,
"easHc":42,
"finHc":31,
"nfeHc":242,
"asjHc":13,
"othHc":19,
"maleHc":280,
"femaleHc":281,
"controlsAllAf":0.190317,
"controlsAllAn":30796,
"controlsAllAc":5861,
"lowComplexityRegion":true,
"failedFilter":true
}
FieldTypeNotes
coverageintaverage coverage (non-negative integer values)
allAffloatallele frequency for all populations. Range: 0 - 1.0
maleAffloatallele frequency for male population. Range: 0 - 1.0
femaleAffloatallele frequency for female population. Range: 0 - 1.0
controlsAllAffloatallele frequency for the controls subset. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
maleAcintallele count for male population. Integer.
femaleAcintallele count for female population. Integer.
controlsAllAcintallele count for the controls subset. Integer.
allAnintallele number for all populations. Non-zero integer.
maleAnintallele number for male population. Non-zero integer.
femaleAnintallele number for female population. Non-zero integer.
controlsAllAnintallele number for the controls subset. Non-zero integer.
allHcintcount of homozygous individuals for all populations. Non-negative integer.
maleHcintcount of homozygous individuals for male population. Non-negative integer.
femaleHcintcount of homozygous individuals for female population. Non-negative integer.
afrAffloatallele frequency for the African / African American population. Range: 0 - 1.0
afrAcintallele count for the African / African American population. Integer.
afrAnintallele number for the African / African American population. Non-zero integer.
afrHcintcount of homozygous individuals for African / African American population. Non-negative integer.
amrAffloatallele frequency for the Latino population. Range: 0 - 1.0
amrAcintallele count for the Latino population. Integer.
amrAnintallele number for the Latino population. Non-zero integer.
amrHcintcount of homozygous individuals for Latino population. Non-negative integer.
easAffloatallele frequency for the East Asian population. Range: 0 - 1.0
easAcintallele count for the East Asian population. Integer.
easAnintallele number for the East Asian population. Non-zero integer.
easHcintcount of homozygous individuals for East Asian population. Non-negative integer.
finAffloatallele frequency for the Finnish population. Range: 0 - 1.0
finAcintallele count for the Finnish population. Integer.
finAnintallele number for the Finnish population. Non-zero integer.
finHcintcount of homozygous individuals for Finnish population. Non-negative integer
nfeAffloatallele frequency for the Non-Finnish European population. Range: 0 - 1.0
nfeAcintallele count for the Non-Finnish European population. Integer.
nfeAnintallele number for the Non-Finnish European population. Non-zero integer.
nfeHcintcount of homozygous individuals for Non-Finnish European population. Non-negative integer
othAffloatallele frequency for the Other population. Range: 0 - 1.0
othAcintallele count for the Other population. Integer.
othAnintallele number for the Other population. Non-zero integer.
othHcintcount of homozygous individuals for Other population. Non-negative integer
asjAffloatallele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0
asjAcintallele count for the Ashkenazi Jewish population Integer.
asjAnintallele number for the Ashkenazi Jewish population. Non-zero integer.
asjHcintcount of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer
sasAffloatallele frequency for the South Asian population. Range: 0 - 1.0
sasAcintallele count for the South Asian population Integer.
sasAnintallele number for the South Asian population. Non-zero integer.
sasHcintcount of homozygous individuals for the South Asian population. Non-negative integer.
failedFilterboolTrue if this variant failed any filters (Note: we do not list the failed filters)
lowComplexityRegionboolTrue if this variant is located in a low complexity region.
- - - - \ No newline at end of file diff --git a/3.21/data-sources/gnomad-structural-variants-data_description/index.html b/3.21/data-sources/gnomad-structural-variants-data_description/index.html deleted file mode 100644 index 3248aa5a..00000000 --- a/3.21/data-sources/gnomad-structural-variants-data_description/index.html +++ /dev/null @@ -1,19 +0,0 @@ - - - - - - - -gnomad-structural-variants-data_description | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

gnomad-structural-variants-data_description

Bed Example

The bed file was obtained from original source for GRCh37

#chrom  start   end name    svtype  ALGORITHMS  BOTHSIDES_SUPPORT   CHR2    CPX_INTERVALS   CPX_TYPE    END2    ENDEVIDENCE HIGH_SR_BACKGROUND  PCRPLUS_DEPLETED    PESR_GT_OVERDISPERSION  POS2    PROTEIN_CODING__COPY_GAIN   PROTEIN_CODING__DUP_LOF PROTEIN_CODING__DUP_PARTIAL PROTEIN_CODING__INTERGENIC  PROTEIN_CODING__INTRONIC    PROTEIN_CODING__INV_SPAN    PROTEIN_CODING__LOF PROTEIN_CODING__MSV_EXON_OVR    PROTEIN_CODING__NEAREST_TSS PROTEIN_CODING__PROMOTER    PROTEIN_CODING__UTR SOURCE  STRANDS SVLEN   SVTYPE  UNRESOLVED_TYPE UNSTABLE_AF_PCRPLUS VARIABLE_ACROSS_BATCHES AN  AC  AF  N_BI_GENOS  N_HOMREF    N_HET   N_HOMALT    FREQ_HOMREF FREQ_HET    FREQ_HOMALT MALE_AN MALE_AC MALE_AF MALE_N_BI_GENOS MALE_N_HOMREF   MALE_N_HET  MALE_N_HOMALT   MALE_FREQ_HOMREF    MALE_FREQ_HET   MALE_FREQ_HOMALT    MALE_N_HEMIREF  MALE_N_HEMIALT  MALE_FREQ_HEMIREF   MALE_FREQ_HEMIALT   PAR FEMALE_AN   FEMALE_AC   FEMALE_AF   FEMALE_N_BI_GENOS   FEMALE_N_HOMREF FEMALE_N_HET    FEMALE_N_HOMALT FEMALE_FREQ_HOMREF  FEMALE_FREQ_HET FEMALE_FREQ_HOMALT  POPMAX_AF   AFR_AN  AFR_AC  AFR_AF  AFR_N_BI_GENOS  AFR_N_HOMREF    AFR_N_HET   AFR_N_HOMALT    AFR_FREQ_HOMREF AFR_FREQ_HEAFR_FREQ_HOMALT  AFR_MALE_AN AFR_MALE_AC AFR_MALE_AF AFR_MALE_N_BI_GENOS AFR_MALE_N_HOMREF   AFR_MALE_N_HET  AFR_MALE_N_HOMALT   AFR_MALE_FREQ_HOMREF    AFR_MALE_FREQ_HET   AFR_MALE_FREQ_HOMALT    AFR_MALE_N_HEMIREF  AFR_MALE_N_HEMIALT  AFR_MALE_FREQ_HEMIREF   AFR_MALE_FREQ_HEMIALT   AFR_FEMALE_AN   AFR_FEMALE_AC   AFR_FEMALE_AF   AFR_FEMALE_N_BI_GENOS   AFR_FEMALE_N_HOMREF AFR_FEMALE_N_HET    AFR_FEMALE_N_HOMALT AFR_FEMALE_FREQ_HOMREF  AFR_FEMALE_FREQ_HET AFR_FEMALE_FREQ_HOMALT  AMR_AN  AMR_AC  AMR_AF  AMR_N_BI_GENOS  AMR_N_HOMREF    AMR_N_HET   AMR_N_HOMALT    AMR_FREQ_HOMREF AMR_FREQ_HET    AMR_FREQ_HOMALT AMR_MALE_AN AMR_MALE_AC AMR_MALE_AF AMR_MALE_N_BI_GENOS AMR_MALE_N_HOMREF   AMR_MALE_N_HET  AMR_MALE_N_HOMALT   AMR_MALE_FREQ_HOMREF    AMR_MALE_FREQ_HET   AMR_MALE_FREQ_HOMALT    AMR_MALE_N_HEMIREF  AMR_MALE_N_HEMIALT  AMR_MALE_FREQ_HEMIREF   AMR_MALE_FREQ_HEMIALT   AMR_FEMALE_AN   AMR_FEMALE_AC   AMR_FEMALE_AF   AMR_FEMALE_N_BI_GENOS   AMR_FEMALE_N_HOMREF AMR_FEMALE_N_HET    AMR_FEMALE_N_HOMALT AMR_FEMALE_FREQ_HOMREF  AMR_FEMALE_FREQ_HET AMR_FEMALE_FREQ_HOMALT  EAS_AN  EAS_AC  EAS_AF  EAS_N_BI_GENOS  EAS_N_HOMREF    EAS_N_HET   EAS_N_HOMALT    EAS_FREQ_HOMREF EAS_FREQ_HET    EAS_FREQ_HOMALT EAS_MALE_AN EAS_MALE_AC EAS_MALE_AF EAS_MALE_N_BI_GENOS EAS_MALE_N_HOMREF   EAS_MALE_N_HET  EAS_MALE_N_HOMALT   EAS_MALE_FREQ_HOMREF    EAS_MALE_FREQ_HET   EAS_MALE_FREQ_HOMALT    EAS_MALE_N_HEMIREF  EAS_MALE_N_HEMIALT  EAS_MALE_FREQ_HEMIREF   EAS_MALE_FREQ_HEMIALT   EAS_FEMALE_AN   EAS_FEMALE_AC   EAS_FEMALE_AF   EAS_FEMALE_N_BI_GENOS   EAS_FEMALE_N_HOMREF EAS_FEMALE_N_HET    EAS_FEMALE_N_HOMALT EAS_FEMALE_FREQ_HOMREF  EAS_FEMALE_FREQ_HET EAS_FEMALE_FREQ_HOMALT  EUR_AN  EUR_AC  EUR_AF  EUR_N_BI_GENOS  EUR_N_HOMREF    EUR_N_HET   EUR_N_HOMALT    EUR_FREQ_HOMREF EUR_FREQ_HET    EUR_FREQ_HOMALT EUR_MALE_AN EUR_MALE_AC EUR_MALE_AF EUR_MALE_N_BI_GENOS EUR_MALE_N_HOMREF   EUR_MALE_N_HET  EUR_MALE_N_HOMALT   EUR_MALE_FREQ_HOMREF    EUR_MALE_FREQ_HET   EUR_MALE_FREQ_HOMALT    EUR_MALE_N_HEMIREF  EUR_MALE_N_HEMIALT  EUR_MALE_FREQ_HEMIREF   EUR_MALE_FREQ_HEMIALT   EUR_FEMALE_AN   EUR_FEMALE_AC   EUR_FEMALE_AF   EUR_FEMALE_N_BI_GENOS   EUR_FEMALE_N_HOMREF EUR_FEMALE_N_HET    EUR_FEMALE_N_HOMALT EUR_FEMALE_FREQ_HOMREF  EUR_FEMALE_FREQ_HET EUR_FEMALE_FREQ_HOMALT  OTH_AN  OTH_AC  OTH_AF  OTH_N_BI_GENOS  OTH_N_HOMREF    OTH_N_HET   OTH_N_HOMALT    OTH_FREQ_HOMREF OTH_FREQ_HET    OTH_FREQ_HOMALT OTH_MALE_AN OTH_MALE_AC OTH_MALE_AF OTH_MALE_N_BI_GENOS OTH_MALE_N_HOMREF   OTH_MALE_N_HET  OTH_MALE_N_HOMALT   OTH_MALE_FREQ_HOMREF    OTH_MALE_FREQ_HET   OTH_MALE_FREQ_HOMALT    OTH_MALE_N_HEMIREF  OTH_MALE_N_HEMIALT  OTH_MALE_FREQ_HEMIREF   OTH_MALE_FREQ_HEMIALT   OTH_FEMALE_AN   OTH_FEMALE_AC   OTH_FEMALE_AF   OTH_FEMALE_N_BI_GENOS   OTH_FEMALE_N_HOMREF OTH_FEMALE_N_HET    OTH_FEMALE_N_HOMALT OTH_FEMALE_FREQ_HOMREF  OTH_FEMALE_FREQ_HET OTH_FEMALE_FREQ_HOMALT  FILTER
1 10641 10642 gnomAD-SV_v2.1_BND_1_1 BND manta False 15 NA NA 10643 10643 PE,SR False False True 10642 NA NA NA False NA NA NA NA NA NA NA NA NA -1 BND SINGLE_ENDER_-- False False 21366 145 0.006785999983549118 10683 10543 135 5 0.9868950247764587 0.012636899948120117 0.00046803298755548894 10866 69 0.00634999992325902 5433 5366 65 2 0.987667977809906 0.011963900178670883 0.000368120992789045 NA NA NA NA False 10454 76 0.007269999943673615227 5154 70 3 0.9860339760780334 0.013392000459134579 0.0005739430198445916 0.015956999734044075 93972 0.007660999894142151 4699 4629 68 2 0.9851030111312866 0.014471200294792652 0.0004256220126990229 5154 33 0.006403000093996525 2577 2544 33 0 0.9871940016746521 0.012805599719285965 0.0NA NA NA NA 4232 39 0.009216000325977802 2116 2079 35 2 0.9825140237808228 0.01654059998691082 0.0009451800142414868 1910 7 0.003664999967440963 955 949 5 1 0.9937170147895813 0.00523559981957078 0.001047119963914156 950 4 0.004211000166833401 475 472 2 1 0.9936839938163757 0.00421052984893322 0.0021052600350230932 NA NA NA NA 952 3 0.0031510000117123127 476473 3 0 0.9936969876289368 0.006302520167082548 0.0 2296 31 0.013501999899744987 1148 11131 0 0.9729970097541809 0.02700350061058998 0.0 1312 13 0.009909000247716904 656 643 13 0.9801830053329468 0.01981710083782673 0.0 NA NA NA NA 976 18 0.018442999571561813 488470 18 0 0.9631149768829346 0.03688519820570946 0.0 7574 32 0.004224999807775021 3787 37528 2 0.9920780062675476 0.007393720094114542 0.0005281229969114065 3374 17 0.005038999952375889 1681671 15 1 0.9905160069465637 0.008891520090401173 0.000592768017668277 NA NA NA NA 41815 0.003587000072002411 2091 2077 13 1 0.9933050274848938 0.006217120215296745 0.00047823999193497188 3 0.015956999734044075 94 91 3 0 0.968084990978241 0.03191490098834038 0.0 76 0.026316000148653984 38 36 2 0 0.9473680257797241 0.05263160169124603 0.0 NA NA NA NA 112 1 0.008929000236093998 56 55 1 0 0.982142984867096 0.017857100814580917 0.0UNRESOLVED

TSV Example

The tsv was obtained from lifted over dataset created by dbVar for GRCh38

#variant_call_accession variant_call_id variant_call_type   experiment_id   sample_id   sampleset_id    assembly    chrcontig   outer_start start   inner_start inner_stop  stop    outer_stop  insertion_length    variant_region_acc  variant_region_id   copy_number description validation  zygosity    origin  phenotype   hgvs_name   placement_method    placement_rank  placements_per_assembly remap_alignment remap_best_within_cluster   remap_coverage  remap_diff_chr  remap_failure_code  allele_count    allele_frequency    allele_number
nssv15777856 gnomAD-SV_v2.1_CNV_10_564_alt_1 copy number variation 1 1 GRCh38.p12 10 736806 738184 nsv4039284 10__782746___784124______GRCh37.p13_copy_number_variation 0 Remapped BestAvailable Single First Pass 0 1 AC=21,AFR_AC=10,AMR_AC=9,EAS_AC=0,EUR_AC=2,OTH_AC=0AF=0.038889,AFR_AF=0.044643,AMR_AF=0.03913,EAS_AF=0,EUR_AF=0.023256,OTH_AF=0 AN=540,AFR_AN=224,AMR_AN=230,EAS_AN=0,EUR_AN=86,OTH_AN=0

Structural Variant Type Mapping

The source files represented the structural variants with keys using various naming conventions. -In the Nirvana JSON output, these keys will be mapped according to the following.

Nirvana JSON SV Type KeyGRCh37 Source SV Type KeyGRCh38 Source SV Type Key
copy_number_variationcopy number variation
deletionDEL, CN=0deletion
duplicationDUPduplication
insertionINSinsertion
inversionINVinversion
mobile_element_insertionINS:MEmobile element insertion
mobile_element_insertionINS:ME:ALUalu insertion
mobile_element_insertionINS:ME:LINE1line1 insertion
mobile_element_insertionINS:ME:SVAsva insertion
structural alterationsequence alteration
complex_structural_alterationCPX
- - - - \ No newline at end of file diff --git a/3.21/data-sources/gnomad-structural-variants-json/index.html b/3.21/data-sources/gnomad-structural-variants-json/index.html deleted file mode 100644 index 73eff951..00000000 --- a/3.21/data-sources/gnomad-structural-variants-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -gnomad-structural-variants-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

gnomad-structural-variants-json

"gnomAD-preview": [
{
"chromosome": "1",
"begin": 40001,
"end": 47200,
"variantId": "gnomAD-SV_v2.1_DUP_1_1",
"variantType": "duplication",
"failedFilter": true,
"allAf": 0.068963,
"afrAf": 0.135694,
"amrAf": 0.022876,
"easAf": 0.01101,
"eurAf": 0.007846,
"othAf": 0.017544,
"femaleAf": 0.065288,
"maleAf": 0.07255,
"allAc": 943,
"afrAc": 866,
"amrAc": 21,
"easAc": 17,
"eurAc": 37,
"othAc": 2,
"femaleAc": 442,
"maleAc": 499,
"allAn": 13674,
"afrAn": 6382,
"amrAn": 918,
"easAn": 1544,
"eurAn": 4716,
"othAn": 114,
"femaleAn": 6770,
"maleAn": 6878,
"allHc": 91,
"afrHc": 90,
"amrHc": 1,
"easHc": 0,
"eurHc": 0,
"othHc": 55,
"femaleHc": 44,
"maleHc": 47,
"reciprocalOverlap": 0.01839,
"annotationOverlap": 0.16667
}
]

FieldTypeNotes
chromosomestringchromosome number
beginintegerposition interval start
endintegerposition internal end
variantTypestringstructural variant type
variantIdstringgnomAD ID
allAffloating pointallele frequency for all populations. Range: 0 - 1.0
afrAffloating pointallele frequency for the African super population. Range: 0 - 1.0
amrAffloating pointallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
easAffloating pointallele frequency for the East Asian super population. Range: 0 - 1.0
eurAffloating pointallele frequency for the European super population. Range: 0 - 1.0
othAffloating pointallele frequency for all other populations. Range: 0 - 1.0
femaleAffloating pointallele frequency for female population. Range: 0 - 1.0
maleAffloating pointallele frequency for male population. Range: 0 - 1.0
allAcintegerallele count for all populations.
afrAcintegerallele count for the African super population.
amrAcintegerallele count for the Ad Mixed American super population.
easAcintegerallele count for the East Asian super population.
eurAcintegerallele count for the European super population.
othAcintegerallele count for all other populations.
maleAcintegerallele count for male population.
femaleAcintegerallele count for female population.
allAnintegerallele number for all populations.
afrAnintegerallele number for the African super population.
amrAnintegerallele number for the Ad Mixed American super population.
easAnintegerallele number for the East Asian super population.
eurAnintegerallele number for the European super population.
othAnintegerallele number for all other populations.
femaleAnintegerallele number for female population.
maleAnintegerallele number for male population.
allHcintegercount of homozygous individuals for all populations.
afrHcintegercount of homozygous individuals for the African / African American population.
amrHcintegercount of homozygous individuals for the Latino population.
easHcintegercount of homozygous individuals for the East Asian population.
eurAcintegercount of homozygous individuals for the European super population.
othHcintegercount of homozygous individuals for all other populations.
maleHcintegercount of homozygous individuals for male population.
femaleHcintegercount of homozygous individuals for female population.
failedFilterbooleanTrue if this variant failed any filters (Note: we do not list the failed filters)
reciprocalOverlapfloating pointReciprocal overlap. Range: 0 - 1.0
annotationOverlapfloating pointReciprocal overlap. Range: 0 - 1.0

Note: Following fields are not available in GRCh38 because the source file does not contain this information:

Field
femaleAf
maleAf
maleAc
femaleAc
femaleAn
maleAn
allHc
afrHc
amrHc
easHc
eurAc
othHc
maleHc
femaleHc
failedFilter
- - - - \ No newline at end of file diff --git a/3.21/data-sources/gnomad/index.html b/3.21/data-sources/gnomad/index.html deleted file mode 100644 index 8ed6138d..00000000 --- a/3.21/data-sources/gnomad/index.html +++ /dev/null @@ -1,22 +0,0 @@ - - - - - - - -gnomAD | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

gnomAD

Overview

The Genome Aggregation Database (gnomAD) is a resource developed by an international coalition of investigators, with the goal of aggregating and harmonizing both exome and genome sequencing data from a wide variety of large-scale sequencing projects, and making summary data available for the wider scientific community.

Publication

Koch, L., 2020. Exploring human genomic diversity with gnomAD. Nature Reviews Genetics, 21(8), pp.448-448.

Small Variants

VCF extraction

We currently extract the following info fields from gnomAD genome and exome VCF files:

##INFO=<ID=AC,Number=A,Type=Integer,Description="Alternate allele count for samples">
##INFO=<ID=AN,Number=A,Type=Integer,Description="Total number of alleles in samples">
##INFO=<ID=nhomalt,Number=A,Type=Integer,Description="Count of homozygous individuals in samples">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Depth of informative coverage for each sample; reads with MQ=255 or with bad mates are filtered">
##INFO=<ID=lcr,Number=0,Type=Flag,Description="Variant falls within a low complexity region">
##INFO=<ID=AC_afr,Number=A,Type=Integer,Description="Alternate allele count for samples of African-American ancestry">
##INFO=<ID=AN_afr,Number=A,Type=Integer,Description="Total number of alleles in samples of African-American ancestry">
##INFO=<ID=AF_afr,Number=A,Type=Float,Description="Alternate allele frequency in samples of African-American ancestry">
##INFO=<ID=nhomalt_afr,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of African-American ancestry">
##INFO=<ID=AC_amr,Number=A,Type=Integer,Description="Alternate allele count for samples of Latino ancestry">
##INFO=<ID=AN_amr,Number=A,Type=Integer,Description="Total number of alleles in samples of Latino ancestry">
##INFO=<ID=nhomalt_amr,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of Latino ancestry">
##INFO=<ID=AC_eas,Number=A,Type=Integer,Description="Alternate allele count for samples of East Asian ancestry">
##INFO=<ID=AN_eas,Number=A,Type=Integer,Description="Total number of alleles in samples of East Asian ancestry">
##INFO=<ID=nhomalt_eas,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of East Asian ancestry">
##INFO=<ID=AC_female,Number=A,Type=Integer,Description="Alternate allele count for female samples">
##INFO=<ID=AN_female,Number=A,Type=Integer,Description="Total number of alleles in female samples">
##INFO=<ID=nhomalt_female,Number=A,Type=Integer,Description="Count of homozygous individuals in female samples">
##INFO=<ID=AC_nfe,Number=A,Type=Integer,Description="Alternate allele count for samples of non-Finnish European ancestry">
##INFO=<ID=AN_nfe,Number=A,Type=Integer,Description="Total number of alleles in samples of non-Finnish European ancestry">
##INFO=<ID=nhomalt_nfe,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of non-Finnish European ancestry">
##INFO=<ID=AC_fin,Number=A,Type=Integer,Description="Alternate allele count for samples of Finnish ancestry">
##INFO=<ID=AN_fin,Number=A,Type=Integer,Description="Total number of alleles in samples of Finnish ancestry">
##INFO=<ID=nhomalt_fin,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of Finnish ancestry">
##INFO=<ID=AC_asj,Number=A,Type=Integer,Description="Alternate allele count for samples of Ashkenazi Jewish ancestry">
##INFO=<ID=AN_asj,Number=A,Type=Integer,Description="Total number of alleles in samples of Ashkenazi Jewish ancestry">
##INFO=<ID=nhomalt_asj,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of Ashkenazi Jewish ancestry">
##INFO=<ID=AC_oth,Number=A,Type=Integer,Description="Alternate allele count for samples of uncertain ancestry">
##INFO=<ID=AN_oth,Number=A,Type=Integer,Description="Total number of alleles in samples of uncertain ancestry">
##INFO=<ID=nhomalt_oth,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of uncertain ancestry">
##INFO=<ID=AC_male,Number=A,Type=Integer,Description="Alternate allele count for male samples">
##INFO=<ID=AN_male,Number=A,Type=Integer,Description="Total number of alleles in male samples">
##INFO=<ID=nhomalt_male,Number=A,Type=Integer,Description="Count of homozygous individuals in male samples">
##INFO=<ID=controls_AC,Number=A,Type=Integer,Description="Alternate allele count for samples in the controls subset">
##INFO=<ID=controls_AN,Number=A,Type=Integer,Description="Total number of alleles in samples in the controls subset">

We also extract the following extra fields from gnomAD exome VCF file:

##INFO=<ID=AC_sas,Number=A,Type=Integer,Description="Alternate allele count for samples of South Asian ancestry">
##INFO=<ID=AN_sas,Number=A,Type=Integer,Description="Total number of alleles in samples of South Asian ancestry">
##INFO=<ID=nhomalt_sas,Number=A,Type=Integer,Description="Count of homozygous individuals in samples of South Asian ancestry">

Computation

Using these, we compute the following:

  • Coverage
  • Allele count, Homozygous count, allele number and allele frequencies for:
  • Global population
  • African/African Americans
  • Admixed Americans
  • Ashkenazi Jews
  • East Asians
  • Finnish
  • Non-Finnish Europeans
  • South Asian
  • Others (population not assigned)
  • Male
  • Female
  • Controls
Note
  • Coverage = DP / AN. Frequencies are computed using AC/AN for each population.
  • Please note that currently there is no genome sequencing data of south asian (SAS) population available in gnomAD.
  • Allele Count, Homozygous count, allele number and allele frequencies for control groups are also provided for the global population.

Merging genomes and exomes

When merging the genomes and exomes, the allele counts and allele numbers will be summed across both of the data sets.

info
  • For GRCh37, Nirvana currently uses gnomAD version 2.1 which contains both genomes and exomes data. Genomes and exomes data are merged in the output.
  • For GRCh38, Nirvana currently uses gnomAD version 3.0 which doesn't contain the exomes data. Therefore, only genomes data are presented in the output.

Filters

The following strategy will be used when there's a conflict in filter status:

Genomes PASSGenomes Filtered
Exomes PASSPASSOnly use exome data
Exomes FilteredOnly use genome dataFiltered

VCF download instructions

https://gnomad.broadinstitute.org/downloads

JSON output

"gnomad":{ 
"coverage":20,
"allAf":0.190317,
"maleAf":0.193,
"femaleAf": 0.1935,
"afrAf":0.222876,
"amrAf":0.121394,
"easAf":0.239802,
"finAf":0.136833,
"nfeAf":0.181282,
"asjAf":0.258278,
"othAf":0.186094,
"allAn":30796,
"maleAn":15096,
"femaleAn":15700
"afrAn":8664,
"amrAn":832,
"easAn":1618,
"finAn":3486,
"nfeAn":14916,
"asjAn":302,
"othAn":978,
"allAc":5861,
"maleAc":2930,
"femaleAc": 2931,
"afrAc":1931,
"amrAc":101,
"easAc":388,
"finAc":477,
"nfeAc":2704,
"asjAc":78,
"othAc":182,
"allHc":561,
"afrHc":208,
"amrHc":6,
"easHc":42,
"finHc":31,
"nfeHc":242,
"asjHc":13,
"othHc":19,
"maleHc":280,
"femaleHc":281,
"controlsAllAf":0.190317,
"controlsAllAn":30796,
"controlsAllAc":5861,
"lowComplexityRegion":true,
"failedFilter":true
}
FieldTypeNotes
coverageintaverage coverage (non-negative integer values)
allAffloatallele frequency for all populations. Range: 0 - 1.0
maleAffloatallele frequency for male population. Range: 0 - 1.0
femaleAffloatallele frequency for female population. Range: 0 - 1.0
controlsAllAffloatallele frequency for the controls subset. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
maleAcintallele count for male population. Integer.
femaleAcintallele count for female population. Integer.
controlsAllAcintallele count for the controls subset. Integer.
allAnintallele number for all populations. Non-zero integer.
maleAnintallele number for male population. Non-zero integer.
femaleAnintallele number for female population. Non-zero integer.
controlsAllAnintallele number for the controls subset. Non-zero integer.
allHcintcount of homozygous individuals for all populations. Non-negative integer.
maleHcintcount of homozygous individuals for male population. Non-negative integer.
femaleHcintcount of homozygous individuals for female population. Non-negative integer.
afrAffloatallele frequency for the African / African American population. Range: 0 - 1.0
afrAcintallele count for the African / African American population. Integer.
afrAnintallele number for the African / African American population. Non-zero integer.
afrHcintcount of homozygous individuals for African / African American population. Non-negative integer.
amrAffloatallele frequency for the Latino population. Range: 0 - 1.0
amrAcintallele count for the Latino population. Integer.
amrAnintallele number for the Latino population. Non-zero integer.
amrHcintcount of homozygous individuals for Latino population. Non-negative integer.
easAffloatallele frequency for the East Asian population. Range: 0 - 1.0
easAcintallele count for the East Asian population. Integer.
easAnintallele number for the East Asian population. Non-zero integer.
easHcintcount of homozygous individuals for East Asian population. Non-negative integer.
finAffloatallele frequency for the Finnish population. Range: 0 - 1.0
finAcintallele count for the Finnish population. Integer.
finAnintallele number for the Finnish population. Non-zero integer.
finHcintcount of homozygous individuals for Finnish population. Non-negative integer
nfeAffloatallele frequency for the Non-Finnish European population. Range: 0 - 1.0
nfeAcintallele count for the Non-Finnish European population. Integer.
nfeAnintallele number for the Non-Finnish European population. Non-zero integer.
nfeHcintcount of homozygous individuals for Non-Finnish European population. Non-negative integer
othAffloatallele frequency for the Other population. Range: 0 - 1.0
othAcintallele count for the Other population. Integer.
othAnintallele number for the Other population. Non-zero integer.
othHcintcount of homozygous individuals for Other population. Non-negative integer
asjAffloatallele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0
asjAcintallele count for the Ashkenazi Jewish population Integer.
asjAnintallele number for the Ashkenazi Jewish population. Non-zero integer.
asjHcintcount of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer
sasAffloatallele frequency for the South Asian population. Range: 0 - 1.0
sasAcintallele count for the South Asian population Integer.
sasAnintallele number for the South Asian population. Non-zero integer.
sasHcintcount of homozygous individuals for the South Asian population. Non-negative integer.
failedFilterboolTrue if this variant failed any filters (Note: we do not list the failed filters)
lowComplexityRegionboolTrue if this variant is located in a low complexity region.

Building the supplementary files

The gnomAD .nsa for Nirvana can be built using the SAUtils command's gnomad subcommand. We will describe building gnomAD version 3.1 here.

Source data files

Input VCF files (one per chromosome) and a .version file are required in a folder to build the .nsa file. For example, my directory contains:

chr10.vcf.bgz  chr22.vcf.bgz
chr11.vcf.bgz chr2.vcf.bgz
chr12.vcf.bgz chr3.vcf.bgz
chr13.vcf.bgz chr4.vcf.bgz
chr14.vcf.bgz chr5.vcf.bgz
chr15.vcf.bgz chr6.vcf.bgz
chr16.vcf.bgz chr7.vcf.bgz
chr17.vcf.bgz chr8.vcf.bgz
chr18.vcf.bgz chr9.vcf.bgz
chr19.vcf.bgz chrM.vcf.bgz
chr1.vcf.bgz chrX.vcf.bgz
chr20.vcf.bgz chrY.vcf.bgz
chr21.vcf.bgz gnomad.r3.1.version

The version file is a text file with the following content.

NAME=gnomAD
VERSION=3.1
DATE=2020-10-29
DESCRIPTION=Allele frequencies from Genome Aggregation Database (gnomAD)

The help menu for the utility is as follows:

SAUtils.dll gnomad
---------------------------------------------------------------------------
SAUtils (c) 2021 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.17.0
---------------------------------------------------------------------------

USAGE: dotnet SAUtils.dll gnomad [options]
Reads provided supplementary data files and populates tsv files

OPTIONS:
--ref, -r <VALUE> compressed reference sequence file
--genome, -g <VALUE> input directory containing VCF (and .version)
files with genomic frequencies
--exome, -e <VALUE> input directory containing VCF (and .version)
files with exomic frequencies
--temp, -t <VALUE> output temp directory for intermediate (per chrom)
NSA files
--out, -o <VALUE> output directory for NSA file
--help, -h displays the help menu
--version, -v displays the version

Here is a sample execution:

dotnet ~/Nirvana/bin/Debug/netcoreapp3.1/SAUtils.dll Gnomad \\
--ref ~/References/7/Homo_sapiens.GRCh38.Nirvana.dat --genome genomes/ \\
--out ~/SupplementaryDatabase/63/GRCh38 --temp ~/ExternalDataSources/gnomAD/3.1/GRCh38/temp

LoF Gene Metrics

Tab delimited file example

gene transcript obs_mis exp_mis oe_mis mu_mis possible_mis obs_mis_pphen exp_mis_pphen oe_mis_pphen possible_mis_pphen obs_syn exp_syn oe_syn mu_syn possible_syn obs_lof mu_lof possible_lof exp_lof pLI pNull pRec oe_lof oe_syn_lower oe_syn_upper oe_mis_lower oe_mis_upper oe_lof_lower oe_lof_upper constraint_flag syn_zmis_z lof_z oe_lof_upper_rank oe_lof_upper_bin oe_lof_upper_bin_6 n_sites classic_caf max_af no_lofs obs_het_lof obs_hom_lof defined p exp_hom_lof classic_caf_afr classic_caf_amr classic_caf_asj classic_caf_eas classic_caf_fin classic_caf_nfe classic_caf_oth classic_caf_sas p_afr p_amr p_asj p_eas p_fin p_nfep_oth p_sas transcript_type gene_id transcript_level cds_length num_coding_exons gene_type gene_length exac_pLI exac_obs_lof exac_exp_lof exac_oe_lof brain_expression chromosome start_positionend_position
MED13 ENST00000397786 871 1.1178e+03 7.7921e-01 5.5598e-05 14195 314 5.2975e+02 5.9273e-01 6708 422 3.8753e+02 1.0890e+00 1.9097e-05 4248 0 4.9203e-06 1257 9.8429e+01 1.0000e+00 8.9436e-40 1.8383e-16 0.0000e+00 1.0050e+00 1.1800e+00 7.3600e-01 8.2400e-01 0.0000e+00 3.0000e-02 -1.3765e+00 2.6232e+00 9.1935e+00 0 0 0 2 1.2058e-05 8.0492e-06 124782 3 0 124785 1.2021e-05 1.8031e-05 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 9.2812e-05 8.8571e-06 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 9.2760e-05 8.8276e-06 0.0000e+00 0.0000e+00 protein_coding ENSG00000108510 2 6522 30 protein_coding 122678 1.0000e+00 0 6.4393e+01 0.0000e+00 NA 17 60019966 60142643

JSON key to TSV column mapping

JSON keyTSV columnDescription
pLipLIprobability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)
pNullpNullprobability of being completely tolerant of loss of function variation (observed = expected)
pRecpRecprobability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)
synZsyn_zcorrected synonymous Z score
misZmis_zcorrected missense Z score
loeufoe_lof_upperloss of function observed/expected upper bound fraction (LOEUF)

Gene symbol update

The input file provides Ensembl gene ids for each entry. We observed that they were unique while gene symbols may be repeated (multiple lines may have the same gene symbol). Since Ensembl gene Ids are more stable, and Nirvana transcript cache data contains Ensembl gene ids, we use these ids to extract the gene symbols from the transcript cache. For example, if ENSG0001 has gene symbol GENE1 in the input but Nirvana cache say ENSG0001 maps to GENE2, we use GENE2 as the gene symbol for that entry.

Conflict resolution

gnomAD uses Ensembl GeneID as unique identifiers in the source file but Nirvana uses HGNC gene symbols. Multiple Ensembl GeneIDs can map to the same HGNC symbol and therefore may result is conflict.

MDGA2   ENST00000426342 306 4.0043e+02  7.6419e-01  2.1096e-05  4724    78  1.6525e+02  4.7202e-01  1923    125 1.3737e+02  9.0993e-01  7.1973e-06  1413    4   2.0926e-06  453 3.8316e+01  9.9922e-01  8.6490e-12  7.8128e-04  1.0440e-01  7.8600e-01  1.0560e+00  6.9500e-01  8.4000e-01  5.0000e-02  2.3900e-01      8.2988e-01  1.6769e+00  5.1372e+00  1529    0   0   7   2.8103e-05  4.0317e-06  124784  7   0   124791  2.8047e-05  9.8167e-05  0.0000e+00  2.8962e-05  0.0000e+00  0.0000e+00  0.0000e+00  3.5391e-05  1.6672e-04  3.2680e-05  0.0000e+00  2.8962e-05  0.0000e+00  0.0000e+00  0.0000e+00  3.5308e-05  1.6492e-04  3.2678e-05  protein_coding  ENSG00000139915 2   2181    13  protein_coding  835332  9.9322e-01  3   2.7833e+01  1.0779e-01  NA  14  47308826    48144157
MDGA2 ENST00000439988 438 5.5311e+02 7.9189e-01 2.9490e-05 6608 105 2.0496e+02 5.1228e-01 2386 180 1.9491e+02 9.2351e-01 9.8371e-06 2048 11 2.8074e-06 627 5.1882e+01 6.6457e-01 5.5841e-10 3.3543e-01 2.1202e-01 8.1700e-01 1.0450e+00 7.3100e-01 8.5700e-01 1.3200e-01 3.5100e-01 8.3940e-01 1.7393e+00 5.2595e+00 2989 1 0 9 3.6173e-05 4.0463e-06 124782 9 0 124791 3.6061e-05 1.6228e-04 6.4986e-05 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 4.4275e-05 1.6672e-04 3.2680e-05 6.4577e-05 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 4.4135e-05 1.6492e-04 3.2678e-05 protein_coding ENSG00000272781 3 3075 17 protein_coding 832866 NA NA NA NA NA 14 47311134 48143999

In such cases, Nirvana chooses the entry with the smallest "LOEUF" value. The reason for choosing this value can be highlighted by the following table:

LOEUF decileHaplo-insufficientAutosomal DominantAutosomal RecessiveOlfactory Genes
0-10%104140360
10-20%47128721
20-30%17861120
30-40%8801734
40-50%7652068
50-60%4542076
60-70%04615418
70-80%24912049
80-90%0345896
90-100%02640174
Note

List of genes with conflicting entries

MDGA2:
{"pLI":9.99e-1,"pRec":7.81e-4,"pNull":8.65e-12,"synZ":8.30e-1,"misZ":1.68e0,"loeuf":2.39e-1}
{"pLI":6.65e-1,"pRec":3.35e-1,"pNull":5.58e-10,"synZ":8.39e-1,"misZ":1.74e0,"loeuf":3.51e-1}
CRYBG3:
{"pLI":9.27e-5,"pRec":1.00e0,"pNull":1.88e-7,"synZ":1.82e0,"misZ":4.68e-1,"loeuf":4.93e-1}
{"pLI":2.69e-4,"pRec":1.00e0,"pNull":1.20e-4,"synZ":2.63e0,"misZ":9.80e-1,"loeuf":5.98e-1}
CHTF8:
{"pLI":8.29e-1,"pRec":1.67e-1,"pNull":3.21e-3,"synZ":1.94e0,"misZ":9.48e-1,"loeuf":5.13e-1}
{"pLI":3.73e-1,"pRec":5.84e-1,"pNull":4.29e-2,"synZ":3.33e-1,"misZ":2.91e-1,"loeuf":9.92e-1}
SEPT1:
{"pLI":6.77e-8,"pRec":8.90e-1,"pNull":1.10e-1,"synZ":1.58e-1,"misZ":1.57e0,"loeuf":9.68e-1}
{"pLI":1.96e-8,"pRec":6.71e-1,"pNull":3.29e-1,"synZ":1.68e-1,"misZ":1.41e0,"loeuf":1.08e0}
ARL14EPL:
{"pLI":3.48e-2,"pRec":8.38e-1,"pNull":1.28e-1,"synZ":3.56e-1,"misZ":-1.87e-1,"loeuf":1.23e0}
{"pLI":3.23e-2,"pRec":8.29e-1,"pNull":1.38e-1,"synZ":1.15e0,"misZ":-4.05e-1,"loeuf":1.26e0}
UGT2A1:
{"pLI":2.90e-13,"pRec":1.40e-1,"pNull":8.60e-1,"synZ":-1.29e0,"misZ":-1.77e0,"loeuf":1.18e0}
{"pLI":3.88e-17,"pRec":2.87e-3,"pNull":9.97e-1,"synZ":-8.00e-1,"misZ":-1.40e0,"loeuf":1.53e0}
LTB4R2:
{"pLI":4.39e-4,"pRec":6.71e-1,"pNull":3.29e-1,"synZ":-5.24e-1,"misZ":-2.96e-1,"loeuf":1.40e0}
{"pLI":1.38e-5,"pRec":4.12e-1,"pNull":5.88e-1,"synZ":-4.58e-1,"misZ":-2.02e-1,"loeuf":1.54e0}
CDRT1:
{"pLI":4.98e-14,"pRec":5.31e-1,"pNull":4.69e-1,"synZ":8.18e-1,"misZ":6.57e-1,"loeuf":1.00e0}
{"pLI":3.50e-3,"pRec":6.37e-1,"pNull":3.59e-1,"synZ":4.89e-1,"misZ":6.90e-1,"loeuf":1.63e0}
MUC3A:
{"pLI":1.48e-10,"pRec":5.76e-1,"pNull":4.24e-1,"synZ":5.81e-2,"misZ":-6.01e-1,"loeuf":1.06e0}
{"pLI":4.03e-1,"pRec":4.79e-1,"pNull":1.17e-1,"synZ":4.05e-2,"misZ":-1.60e-1,"loeuf":1.70e0}
COG8:
{"pLI":2.97e-9,"pRec":5.04e-1,"pNull":4.96e-1,"synZ":-1.35e0,"misZ":-9.37e-2,"loeuf":1.13e0}
{"pLI":2.31e-3,"pRec":5.47e-1,"pNull":4.50e-1,"synZ":-4.94e-1,"misZ":-1.48e-1,"loeuf":1.76e0}
AC006486.1:
{"pLI":9.37e-1,"pRec":6.27e-2,"pNull":2.47e-4,"synZ":1.44e0,"misZ":2.12e0,"loeuf":3.41e-1}
{"pLI":1.14e-1,"pRec":6.16e-1,"pNull":2.70e-1,"synZ":-7.57e-2,"misZ":8.33e-2,"loeuf":1.84e0}
AL645922.1:
{"pLI":4.67e-16,"pRec":1.00e0,"pNull":4.15e-5,"synZ":7.99e-1,"misZ":1.61e0,"loeuf":6.92e-1}
{"pLI":1.60e-3,"pRec":2.78e-1,"pNull":7.21e-1,"synZ":-7.30e-2,"misZ":3.21e-1,"loeuf":1.96e0}
NBPF20:
{"pLI":1.42e-7,"pRec":3.40e-2,"pNull":9.66e-1,"synZ":-1.86e0,"misZ":-2.88e0,"loeuf":1.97e0}
{"pLI":1.92e-22,"pRec":7.96e-6,"pNull":1.00e0,"synZ":-9.73e0,"misZ":-7.67e0,"loeuf":1.97e0}
PRAMEF11:
{"pLI":6.16e-4,"pRec":7.42e-1,"pNull":2.58e-1,"synZ":-4.02e0,"misZ":-3.69e0,"loeuf":1.31e0}
{"synZ":-3.33e0,"misZ":-2.59e0}
FAM231D:
{"synZ":-1.98e0,"misZ":-1.44e0}
{"synZ":1.07e0,"misZ":3.13e-1}

Conflict resolution

  • Pick the entry with the lowest LOEUF score
  • If the same, pick the lowest pLI
  • Otherwise pick the entry with the max absolute value of synZ + misZ

Download URL

https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz

JSON output

"gnomAD":{ 
"pLi":1.00e0,
"pNull":8.94e-40,
"pRec":1.84e-16,
"synZ":-8.44e-2,
"misZ":5.96e-1,
"loeuf":1.13e0
}
FieldTypeNotes
pLifloatprobability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)
pNullfloatprobability of being completely tolerant of loss of function variation (observed = expected)
pRecfloatprobability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)
synZfloatcorrected synonymous Z score
misZfloatcorrected missense Z score
loeuffloatloss of function observed/expected upper bound fraction (LOEUF)

Structural Variants

Publication

Collins, R.L., Brand, H., Karczewski, K.J. et al. 2020. A structural variation reference for medical and population genetics. Nature 581, pp.444–451. https://doi.org/10.1038/s41586-020-2287-8

Note -The gnomAD structural variant annotations are in a preview stage at the moment. -Currently, the annotations do not include translocation breakends. -Future updates will include a better way of annotating the structural variants.

Source Files

Bed Example

The bed file was obtained from original source for GRCh37

#chrom  start   end name    svtype  ALGORITHMS  BOTHSIDES_SUPPORT   CHR2    CPX_INTERVALS   CPX_TYPE    END2    ENDEVIDENCE HIGH_SR_BACKGROUND  PCRPLUS_DEPLETED    PESR_GT_OVERDISPERSION  POS2    PROTEIN_CODING__COPY_GAIN   PROTEIN_CODING__DUP_LOF PROTEIN_CODING__DUP_PARTIAL PROTEIN_CODING__INTERGENIC  PROTEIN_CODING__INTRONIC    PROTEIN_CODING__INV_SPAN    PROTEIN_CODING__LOF PROTEIN_CODING__MSV_EXON_OVR    PROTEIN_CODING__NEAREST_TSS PROTEIN_CODING__PROMOTER    PROTEIN_CODING__UTR SOURCE  STRANDS SVLEN   SVTYPE  UNRESOLVED_TYPE UNSTABLE_AF_PCRPLUS VARIABLE_ACROSS_BATCHES AN  AC  AF  N_BI_GENOS  N_HOMREF    N_HET   N_HOMALT    FREQ_HOMREF FREQ_HET    FREQ_HOMALT MALE_AN MALE_AC MALE_AF MALE_N_BI_GENOS MALE_N_HOMREF   MALE_N_HET  MALE_N_HOMALT   MALE_FREQ_HOMREF    MALE_FREQ_HET   MALE_FREQ_HOMALT    MALE_N_HEMIREF  MALE_N_HEMIALT  MALE_FREQ_HEMIREF   MALE_FREQ_HEMIALT   PAR FEMALE_AN   FEMALE_AC   FEMALE_AF   FEMALE_N_BI_GENOS   FEMALE_N_HOMREF FEMALE_N_HET    FEMALE_N_HOMALT FEMALE_FREQ_HOMREF  FEMALE_FREQ_HET FEMALE_FREQ_HOMALT  POPMAX_AF   AFR_AN  AFR_AC  AFR_AF  AFR_N_BI_GENOS  AFR_N_HOMREF    AFR_N_HET   AFR_N_HOMALT    AFR_FREQ_HOMREF AFR_FREQ_HEAFR_FREQ_HOMALT  AFR_MALE_AN AFR_MALE_AC AFR_MALE_AF AFR_MALE_N_BI_GENOS AFR_MALE_N_HOMREF   AFR_MALE_N_HET  AFR_MALE_N_HOMALT   AFR_MALE_FREQ_HOMREF    AFR_MALE_FREQ_HET   AFR_MALE_FREQ_HOMALT    AFR_MALE_N_HEMIREF  AFR_MALE_N_HEMIALT  AFR_MALE_FREQ_HEMIREF   AFR_MALE_FREQ_HEMIALT   AFR_FEMALE_AN   AFR_FEMALE_AC   AFR_FEMALE_AF   AFR_FEMALE_N_BI_GENOS   AFR_FEMALE_N_HOMREF AFR_FEMALE_N_HET    AFR_FEMALE_N_HOMALT AFR_FEMALE_FREQ_HOMREF  AFR_FEMALE_FREQ_HET AFR_FEMALE_FREQ_HOMALT  AMR_AN  AMR_AC  AMR_AF  AMR_N_BI_GENOS  AMR_N_HOMREF    AMR_N_HET   AMR_N_HOMALT    AMR_FREQ_HOMREF AMR_FREQ_HET    AMR_FREQ_HOMALT AMR_MALE_AN AMR_MALE_AC AMR_MALE_AF AMR_MALE_N_BI_GENOS AMR_MALE_N_HOMREF   AMR_MALE_N_HET  AMR_MALE_N_HOMALT   AMR_MALE_FREQ_HOMREF    AMR_MALE_FREQ_HET   AMR_MALE_FREQ_HOMALT    AMR_MALE_N_HEMIREF  AMR_MALE_N_HEMIALT  AMR_MALE_FREQ_HEMIREF   AMR_MALE_FREQ_HEMIALT   AMR_FEMALE_AN   AMR_FEMALE_AC   AMR_FEMALE_AF   AMR_FEMALE_N_BI_GENOS   AMR_FEMALE_N_HOMREF AMR_FEMALE_N_HET    AMR_FEMALE_N_HOMALT AMR_FEMALE_FREQ_HOMREF  AMR_FEMALE_FREQ_HET AMR_FEMALE_FREQ_HOMALT  EAS_AN  EAS_AC  EAS_AF  EAS_N_BI_GENOS  EAS_N_HOMREF    EAS_N_HET   EAS_N_HOMALT    EAS_FREQ_HOMREF EAS_FREQ_HET    EAS_FREQ_HOMALT EAS_MALE_AN EAS_MALE_AC EAS_MALE_AF EAS_MALE_N_BI_GENOS EAS_MALE_N_HOMREF   EAS_MALE_N_HET  EAS_MALE_N_HOMALT   EAS_MALE_FREQ_HOMREF    EAS_MALE_FREQ_HET   EAS_MALE_FREQ_HOMALT    EAS_MALE_N_HEMIREF  EAS_MALE_N_HEMIALT  EAS_MALE_FREQ_HEMIREF   EAS_MALE_FREQ_HEMIALT   EAS_FEMALE_AN   EAS_FEMALE_AC   EAS_FEMALE_AF   EAS_FEMALE_N_BI_GENOS   EAS_FEMALE_N_HOMREF EAS_FEMALE_N_HET    EAS_FEMALE_N_HOMALT EAS_FEMALE_FREQ_HOMREF  EAS_FEMALE_FREQ_HET EAS_FEMALE_FREQ_HOMALT  EUR_AN  EUR_AC  EUR_AF  EUR_N_BI_GENOS  EUR_N_HOMREF    EUR_N_HET   EUR_N_HOMALT    EUR_FREQ_HOMREF EUR_FREQ_HET    EUR_FREQ_HOMALT EUR_MALE_AN EUR_MALE_AC EUR_MALE_AF EUR_MALE_N_BI_GENOS EUR_MALE_N_HOMREF   EUR_MALE_N_HET  EUR_MALE_N_HOMALT   EUR_MALE_FREQ_HOMREF    EUR_MALE_FREQ_HET   EUR_MALE_FREQ_HOMALT    EUR_MALE_N_HEMIREF  EUR_MALE_N_HEMIALT  EUR_MALE_FREQ_HEMIREF   EUR_MALE_FREQ_HEMIALT   EUR_FEMALE_AN   EUR_FEMALE_AC   EUR_FEMALE_AF   EUR_FEMALE_N_BI_GENOS   EUR_FEMALE_N_HOMREF EUR_FEMALE_N_HET    EUR_FEMALE_N_HOMALT EUR_FEMALE_FREQ_HOMREF  EUR_FEMALE_FREQ_HET EUR_FEMALE_FREQ_HOMALT  OTH_AN  OTH_AC  OTH_AF  OTH_N_BI_GENOS  OTH_N_HOMREF    OTH_N_HET   OTH_N_HOMALT    OTH_FREQ_HOMREF OTH_FREQ_HET    OTH_FREQ_HOMALT OTH_MALE_AN OTH_MALE_AC OTH_MALE_AF OTH_MALE_N_BI_GENOS OTH_MALE_N_HOMREF   OTH_MALE_N_HET  OTH_MALE_N_HOMALT   OTH_MALE_FREQ_HOMREF    OTH_MALE_FREQ_HET   OTH_MALE_FREQ_HOMALT    OTH_MALE_N_HEMIREF  OTH_MALE_N_HEMIALT  OTH_MALE_FREQ_HEMIREF   OTH_MALE_FREQ_HEMIALT   OTH_FEMALE_AN   OTH_FEMALE_AC   OTH_FEMALE_AF   OTH_FEMALE_N_BI_GENOS   OTH_FEMALE_N_HOMREF OTH_FEMALE_N_HET    OTH_FEMALE_N_HOMALT OTH_FEMALE_FREQ_HOMREF  OTH_FEMALE_FREQ_HET OTH_FEMALE_FREQ_HOMALT  FILTER
1 10641 10642 gnomAD-SV_v2.1_BND_1_1 BND manta False 15 NA NA 10643 10643 PE,SR False False True 10642 NA NA NA False NA NA NA NA NA NA NA NA NA -1 BND SINGLE_ENDER_-- False False 21366 145 0.006785999983549118 10683 10543 135 5 0.9868950247764587 0.012636899948120117 0.00046803298755548894 10866 69 0.00634999992325902 5433 5366 65 2 0.987667977809906 0.011963900178670883 0.000368120992789045 NA NA NA NA False 10454 76 0.007269999943673615227 5154 70 3 0.9860339760780334 0.013392000459134579 0.0005739430198445916 0.015956999734044075 93972 0.007660999894142151 4699 4629 68 2 0.9851030111312866 0.014471200294792652 0.0004256220126990229 5154 33 0.006403000093996525 2577 2544 33 0 0.9871940016746521 0.012805599719285965 0.0NA NA NA NA 4232 39 0.009216000325977802 2116 2079 35 2 0.9825140237808228 0.01654059998691082 0.0009451800142414868 1910 7 0.003664999967440963 955 949 5 1 0.9937170147895813 0.00523559981957078 0.001047119963914156 950 4 0.004211000166833401 475 472 2 1 0.9936839938163757 0.00421052984893322 0.0021052600350230932 NA NA NA NA 952 3 0.0031510000117123127 476473 3 0 0.9936969876289368 0.006302520167082548 0.0 2296 31 0.013501999899744987 1148 11131 0 0.9729970097541809 0.02700350061058998 0.0 1312 13 0.009909000247716904 656 643 13 0.9801830053329468 0.01981710083782673 0.0 NA NA NA NA 976 18 0.018442999571561813 488470 18 0 0.9631149768829346 0.03688519820570946 0.0 7574 32 0.004224999807775021 3787 37528 2 0.9920780062675476 0.007393720094114542 0.0005281229969114065 3374 17 0.005038999952375889 1681671 15 1 0.9905160069465637 0.008891520090401173 0.000592768017668277 NA NA NA NA 41815 0.003587000072002411 2091 2077 13 1 0.9933050274848938 0.006217120215296745 0.00047823999193497188 3 0.015956999734044075 94 91 3 0 0.968084990978241 0.03191490098834038 0.0 76 0.026316000148653984 38 36 2 0 0.9473680257797241 0.05263160169124603 0.0 NA NA NA NA 112 1 0.008929000236093998 56 55 1 0 0.982142984867096 0.017857100814580917 0.0UNRESOLVED

TSV Example

The tsv was obtained from lifted over dataset created by dbVar for GRCh38

#variant_call_accession variant_call_id variant_call_type   experiment_id   sample_id   sampleset_id    assembly    chrcontig   outer_start start   inner_start inner_stop  stop    outer_stop  insertion_length    variant_region_acc  variant_region_id   copy_number description validation  zygosity    origin  phenotype   hgvs_name   placement_method    placement_rank  placements_per_assembly remap_alignment remap_best_within_cluster   remap_coverage  remap_diff_chr  remap_failure_code  allele_count    allele_frequency    allele_number
nssv15777856 gnomAD-SV_v2.1_CNV_10_564_alt_1 copy number variation 1 1 GRCh38.p12 10 736806 738184 nsv4039284 10__782746___784124______GRCh37.p13_copy_number_variation 0 Remapped BestAvailable Single First Pass 0 1 AC=21,AFR_AC=10,AMR_AC=9,EAS_AC=0,EUR_AC=2,OTH_AC=0AF=0.038889,AFR_AF=0.044643,AMR_AF=0.03913,EAS_AF=0,EUR_AF=0.023256,OTH_AF=0 AN=540,AFR_AN=224,AMR_AN=230,EAS_AN=0,EUR_AN=86,OTH_AN=0

Structural Variant Type Mapping

The source files represented the structural variants with keys using various naming conventions. -In the Nirvana JSON output, these keys will be mapped according to the following.

Nirvana JSON SV Type KeyGRCh37 Source SV Type KeyGRCh38 Source SV Type Key
copy_number_variationcopy number variation
deletionDEL, CN=0deletion
duplicationDUPduplication
insertionINSinsertion
inversionINVinversion
mobile_element_insertionINS:MEmobile element insertion
mobile_element_insertionINS:ME:ALUalu insertion
mobile_element_insertionINS:ME:LINE1line1 insertion
mobile_element_insertionINS:ME:SVAsva insertion
structural alterationsequence alteration
complex_structural_alterationCPX

Download URLs

GRCh37

The GRCh37 file was downloaded from the original source. Following table gives some essential data metrics:

https://storage.googleapis.com/gcp-public-data--gnomad/papers/2019-sv/gnomad_v2.1_sv.sites.bed.gz

GRCh38

Note: The data was unavailable from gnomAD 2.1 original source, however the lifted over structural variant dataset was created by dbVar and was obtained from them https://www.ncbi.nlm.nih.gov/sites/dbvarapp/studies/nstd166/.

Download URL

https://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/tsv/nstd166.GRCh38.variant_call.tsv.gz

JSON output

"gnomAD-preview": [
{
"chromosome": "1",
"begin": 40001,
"end": 47200,
"variantId": "gnomAD-SV_v2.1_DUP_1_1",
"variantType": "duplication",
"failedFilter": true,
"allAf": 0.068963,
"afrAf": 0.135694,
"amrAf": 0.022876,
"easAf": 0.01101,
"eurAf": 0.007846,
"othAf": 0.017544,
"femaleAf": 0.065288,
"maleAf": 0.07255,
"allAc": 943,
"afrAc": 866,
"amrAc": 21,
"easAc": 17,
"eurAc": 37,
"othAc": 2,
"femaleAc": 442,
"maleAc": 499,
"allAn": 13674,
"afrAn": 6382,
"amrAn": 918,
"easAn": 1544,
"eurAn": 4716,
"othAn": 114,
"femaleAn": 6770,
"maleAn": 6878,
"allHc": 91,
"afrHc": 90,
"amrHc": 1,
"easHc": 0,
"eurHc": 0,
"othHc": 55,
"femaleHc": 44,
"maleHc": 47,
"reciprocalOverlap": 0.01839,
"annotationOverlap": 0.16667
}
]

FieldTypeNotes
chromosomestringchromosome number
beginintegerposition interval start
endintegerposition internal end
variantTypestringstructural variant type
variantIdstringgnomAD ID
allAffloating pointallele frequency for all populations. Range: 0 - 1.0
afrAffloating pointallele frequency for the African super population. Range: 0 - 1.0
amrAffloating pointallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
easAffloating pointallele frequency for the East Asian super population. Range: 0 - 1.0
eurAffloating pointallele frequency for the European super population. Range: 0 - 1.0
othAffloating pointallele frequency for all other populations. Range: 0 - 1.0
femaleAffloating pointallele frequency for female population. Range: 0 - 1.0
maleAffloating pointallele frequency for male population. Range: 0 - 1.0
allAcintegerallele count for all populations.
afrAcintegerallele count for the African super population.
amrAcintegerallele count for the Ad Mixed American super population.
easAcintegerallele count for the East Asian super population.
eurAcintegerallele count for the European super population.
othAcintegerallele count for all other populations.
maleAcintegerallele count for male population.
femaleAcintegerallele count for female population.
allAnintegerallele number for all populations.
afrAnintegerallele number for the African super population.
amrAnintegerallele number for the Ad Mixed American super population.
easAnintegerallele number for the East Asian super population.
eurAnintegerallele number for the European super population.
othAnintegerallele number for all other populations.
femaleAnintegerallele number for female population.
maleAnintegerallele number for male population.
allHcintegercount of homozygous individuals for all populations.
afrHcintegercount of homozygous individuals for the African / African American population.
amrHcintegercount of homozygous individuals for the Latino population.
easHcintegercount of homozygous individuals for the East Asian population.
eurAcintegercount of homozygous individuals for the European super population.
othHcintegercount of homozygous individuals for all other populations.
maleHcintegercount of homozygous individuals for male population.
femaleHcintegercount of homozygous individuals for female population.
failedFilterbooleanTrue if this variant failed any filters (Note: we do not list the failed filters)
reciprocalOverlapfloating pointReciprocal overlap. Range: 0 - 1.0
annotationOverlapfloating pointReciprocal overlap. Range: 0 - 1.0

Note: Following fields are not available in GRCh38 because the source file does not contain this information:

Field
femaleAf
maleAf
maleAc
femaleAc
femaleAn
maleAn
allHc
afrHc
amrHc
easHc
eurAc
othHc
maleHc
femaleHc
failedFilter
- - - - \ No newline at end of file diff --git a/3.21/data-sources/mito-heteroplasmy/index.html b/3.21/data-sources/mito-heteroplasmy/index.html deleted file mode 100644 index e761f8d0..00000000 --- a/3.21/data-sources/mito-heteroplasmy/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Mitochondrial Heteroplasmy | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

Mitochondrial Heteroplasmy

Overview

Mitochondrial Heteroplasmy is an aggregate population data set that characterizes the amount of heteroplasmy observed for each variant. The latest version of this data set is based on re-processed 1000 Genomes Project data using the Illumina DRAGEN pipeline.

JSON File

Example

{
"T:C":{
"ad":[
1,
1,
1,
1,
1,
1
],
"allele_type":"alt",
"vrf":[
0.002369668246445498,
0.0024937655860349127,
0.0016129032258064516,
0.0025188916876574307,
0.0022935779816513763,
0.002008032128514056
],
"vrf_stats":{
"kurtosis":38.889891511122556,
"max":0.0025188916876574307,
"mean":5.4052190471990743e-05,
"min":0.0,
"nobs":246,
"skewness":6.346664692283075,
"stdev":0.0003461416264750575,
"variance":1.1981402557879823e-07
}
}
}

Parsing

From the JSON file, we're mainly interested in the following keys:

  • variant (i.e. T:C)
  • ad
  • vrf
  • nobs (number of observations)
Adjusting for null observations

The nobs value indicates how many observations were made. Ideally this would have been represented in the ad and vrf arrays, but it's left as an exercise for the reader.

Binning VRF Data

The vrf (variant read frequency) array in the JSON object above is paired with with the ad array (allele depths) shown above.

The data in the JSON object has a crazy number of significant digits. This means that as the number of samples increase, this array will grow. To make this more future-proof, Nirvana bins everything according to 0.1% increments.

With the binned data, we end up having 775 distinct vrf values in the entire JSON file. This also means that the variant with the largest number of VRFs would originally have 246 entries, but due to binning this will decrease to 143.

Pre-processing the Data

The JSON file is converted into a small TSV file that is embedded in Nirvana. Here is an example of the TSV file:

#CHROM  POS REF ALT VRF_BINS    VRF_COUNTS
chrM 1 G . 0.981,0.987,0.988,0.989,0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.998,0.999 1,2,2,4,7,8,11,19,43,60,48,64,499,1736
chrM 2 A . 0.981,0.987,0.988,0.989,0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.998,0.999 1,2,2,4,7,8,11,19,43,60,48,64,499,1736

Algorithm

Nirvana will calculate mitochondrial heteroplasmy data for every sample in the VCF. Using the computed VRF for each sample, we compute where in the empirical mitochondrial heteroplasmy distribution that VRF occurs and express that as a percentile.

Percentiles

Nirvana uses the statistical definition of percentile (indicating the value below which a given percentage of observations in a group of observations falls). Unless the sample's VRF is higher than all the VRFs represented in the distribution, the range will be [0, 1).

Download URL

Unavailable

The original data set is only available internally at Illumina at the moment.

JSON Output

"samples":[
{
"genotype":"0/1",
"variantFrequencies":[
0.333,
0.5
],
],
"alleleDepths":[
10,
20,
30
],
"heteroplasmyPercentile":[
23.13,
12.65
]
}
]
FieldTypeNotes
heteroplasmyPercentilefloat arrayone percentile for each variant frequency (each alternate allele)
- - - - \ No newline at end of file diff --git a/3.21/data-sources/mitomap-small-variants-json/index.html b/3.21/data-sources/mitomap-small-variants-json/index.html deleted file mode 100644 index 947c0d08..00000000 --- a/3.21/data-sources/mitomap-small-variants-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -mitomap-small-variants-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

mitomap-small-variants-json

"mitomap":[ 
{
"refAllele":"G",
"altAllele":"A",
"diseases":[
"Bipolar disorder",
"Melanoma"
],
"hasHomoplasmy":false,
"hasHeteroplasmy":true,
"status":"Reported",
"clinicalSignificance":"confirmed pathogenic",
"scorePercentile":83.30,
"numGenBankFullLengthSeqs":2,
"pubMedIds":["2316527","6299878","6301949"],
"isAlleleSpecific":true
}
]
FieldTypeNotes
refAllelestring
altAllelestring
diseasesstring arrayassociated diseases
hasHomoplasmyboolean
hasHeteroplasmyboolean
statusstringrecord status
clinicalSignificancestringpredicted pathogenicity
scorePercentilefloatMitoTIP score
numGenBankFullLengthSeqsinteger# of GenBank full-length sequences
pubMedIdsstring array
isAlleleSpecificbooleantrue when the current variant alternate allele matches the MITOMAP alternate allele
- - - - \ No newline at end of file diff --git a/3.21/data-sources/mitomap-structural-variants-json/index.html b/3.21/data-sources/mitomap-structural-variants-json/index.html deleted file mode 100644 index 46b18b8e..00000000 --- a/3.21/data-sources/mitomap-structural-variants-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -mitomap-structural-variants-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

mitomap-structural-variants-json

"mitomap":[ 
{
"chromosome":"MT",
"begin":3166,
"end":14152,
"variantType":"deletion",
"reciprocalOverlap":0.18068,
"annotationOverlap":0.42405
}
]
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring array
reciprocalOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
annotationOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
- - - - \ No newline at end of file diff --git a/3.21/data-sources/mitomap/index.html b/3.21/data-sources/mitomap/index.html deleted file mode 100644 index 9b98b16e..00000000 --- a/3.21/data-sources/mitomap/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -MITOMAP | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

MITOMAP

Overview

MITOMAP provides a compendium of polymorphisms and mutations in human mitochondrial DNA.

Publication

Lott, M.T., Leipzig, J.N., Derbeneva, O., Xie, H.M., Chalkia, D., Sarmady, M., Procaccio, V., and Wallace, D.C. mtDNA variation and analysis using MITOMAP and MITOMASTER. Current Protocols in Bioinformatics 1(123):1.23.1-26 (2013). http://www.mitomap.org

Scraping HTML Pages

Example

MITOMAP is unique in that it doesn't offer the data in a downloadable format. As a result, the annotation content in Nirvana is scraped from the following MITOMAP pages:

  1. mtDNA Control Region Sequence Variants
  2. mtDNA Coding Region & RNA Sequence Variants
  3. Reported Mitochondrial DNA Base Substitution Diseases: rRNA/tRNA mutations
  4. Reported Mitochondrial DNA Base Substitution Diseases: Coding and Control Region Point Mutations
  5. Reported mtDNA Deletions
  6. mtDNA Simple Insertions

Parsing

Here's what the HTML code looks like:

["582","<a href='/MITOMAP/GenomeLoci#MTTF'>MT-TF</a>","Mitochondrial myopathy","T582C","tRNA Phe","-","+","Reported","<span style='display:inline-block;white-space:nowrap;'><a href='/cgi-bin/mitotip?pos=582&alt=C&quart=2'><u>72.90%</u></a> <i class='fa fa-arrow-up' style='color:orange' aria-hidden='true'></i></span>","0","<a href='/cgi-bin/print_ref_list?refs=90165,91590&title=RNA+Mutation+T582C' target='_blank'>2</a>"],
["583","<a href='/MITOMAP/GenomeLoci#MTTF'>MT-TF</a>","MELAS / MM & EXIT","G583A","tRNA Phe","-","+","Cfrm","<span style='display:inline-block;white-space:nowrap;'><a href='/cgi-bin/mitotip?pos=583&alt=A&quart=0'><u>93.10%</u></a> <i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i><i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i><i class='fa fa-arrow-up' style='color:red' aria-hidden='true'></i></span>","0","<a href='/cgi-bin/print_ref_list?refs=2066,90532,91590&title=RNA+Mutation+G583A' target='_blank'>3</a>"],

We're mainly interested in the following columns (numbers indicate the HTML page above):

  • Position1,2,3,4
  • Disease3,4
  • Nucleotide Change1,2
  • Allele3,4
  • Homoplasmy3,4
  • Heteroplasmy3,4
  • Status3,4
  • MitoTIP3,4
  • GB Seqs FL(CR)1,2,3,4
  • Deletion Junction5
  • Insert (nt)6
  • Insert Point (nt)6
  • References/Curated References1,2,3,4
MitoTIP

The MitoTIP information is used to populate the clinicalSignificance and scorePercentile JSON keys. The "frequency alert" entries are skipped since it's not directly relevant to clinical significance.

Left alignment

Many of the variants in MITOMAP have not been normalized. As part of our import procedure, we left align all insertions and deletions.

Variant Enumeration

Sometimes MITOMAP provides data that indicates that multiple values have been observed. Some examples of this are C-C(2-8) and A-AC or ACC. Alternate alleles containing IUPAC ambiguity codes are similarly enumerated.

Inversions

MITOMAP inversions are currently treated as MNVs.

Allele Parsing

The following MITOMAP allele parsing conventions are supported:

  • C123T
  • 16021_16022del
  • 8042del2
  • C9537insC
  • 3902_3908invACCTTGC
  • A-AC or ACC
  • C-C(2-8)
  • 8042delAT

PostgreSQL Dump File

Example

COPY mitomap.reference (id, authors, title, publication, editors, volume, number, pages, date, city, publisher, keywords, abstract, nlmid) FROM stdin;
1 Albring, M., Griffith, J. and Attardi, G. Association of a protein structure of probable membrane derivation with HeLa cell mitochondrial DNA near its origin of replication Proceedings of the National Academy of Sciences of the United States of America . 74 4 1348-1352 1977 . . Deoxyribonucleoproteins; DNA Replication; DNA, Mitochondrial; Hela Cells; Membrane Proteins; Microscopy, Electron; Molecular Weight; Neoplasm Proteins; Protein Binding Almost all (about 95 percent) of the mitochondrial DNA molecules released by Triton X-100 lysis of HeLa cell mitochondria in the presence of 0.15 M salt are associated with a single protein-containing structure varying in appearance between a 10-20 nm knob and a 100-500 nm membrane-like patch. Analysis by high resolution electron microscopy and by polyacrylamide gel electrophoresis after cleavage of mitochondrial DNA with the endonucleases EcoRI, HindIII, and Hpa II has shown that the protein structure is attached to the DNA in the region of the D-loop, and probably near the origin of mitochondrial DNA replication. The data strongly suggest that HeLa cell mitochondrial DNA is attached in vivo to the inner mitochondrial membrane at or near the origin of replication, and that a membrane fragment of variable size remains associated with the DNA during the isolation. After sodium dodecyl sulfate extraction of mitochondrial DNA, a small 5-10 nm protein is found at the same site on a fraction of the mitochondrial DNA molecules. 266177
2 Anderson, S., Bankier, A.T., Barrell, B.G., de Bruijn, M.H., Coulson, A.R., Drouin, J., Eperon, I.C., Nierlich, D.P., Roe, B.A., Sanger, F., Schreier, P.H., Smith, A.J., Staden, R., Young, I.G. Sequence and organization of the human mitochondrial genome Nature . 290 5806 457-465 1981 . . Base Sequence; Codon; DNA Replication; mtDNA; Evolution; Genes, Structural; Human; Nucleic Acid Precursors; Peptide Chain Initiation; Peptide Chain Termination; RNA, Ribosomal; RNA, Transfer; Transcription, Genetic The complete sequence of the 16,569-base pair human mitochondrial genome is presented. The genes for the 12S and 16S rRNAs, 22 tRNAs, cytochrome c oxidase subunits I, II and III, ATPase subunit 6, cytochrome b and eight other predicted protein coding genes have been located. The sequence shows extreme economy in that the genes have none or only a few noncoding bases between them, and in many cases the termination codons are not coded in the DNA but are created post- transcriptionally by polyadenylation of the mRNAs. 7219534

Parsing

From the PostgreSQL dump file, we're interested in parsing the mapping between reference IDs and the PubMed IDs:

  • id
  • nlmid
Why not use the PostgreSQL file for everything?

Ideally we would use this file for parsing all of our data, but the schema contains 80+ tables and we haven't invested the time yet to see how the tables are linked together to produce the 6 main HTML pages that we're interested in.

Known Issues

Duplicated records

Multiple records describing the same nucleotide change are merged into the same record. If any conflicting information is found (homoplasmy, heteroplasmy, status, clinical significance, score percentile, end coordinate, variant type), an exception is thrown.

  • For diseases and PubMed IDs, we take the union of the values in the duplicated records.
  • For full length GenBank sequences, we take the largest number from each of the duplicated records since it provides the strongest evidence for this variant.
Skipped records

Records that represent an alternate notation of the original variant are skipped. Similarly some variants with confusing alleles (T961delT+ / -C(n)ins) are also skipped.

Download URLs

JSON Output

Small Variants

"mitomap":[ 
{
"refAllele":"G",
"altAllele":"A",
"diseases":[
"Bipolar disorder",
"Melanoma"
],
"hasHomoplasmy":false,
"hasHeteroplasmy":true,
"status":"Reported",
"clinicalSignificance":"confirmed pathogenic",
"scorePercentile":83.30,
"numGenBankFullLengthSeqs":2,
"pubMedIds":["2316527","6299878","6301949"],
"isAlleleSpecific":true
}
]
FieldTypeNotes
refAllelestring
altAllelestring
diseasesstring arrayassociated diseases
hasHomoplasmyboolean
hasHeteroplasmyboolean
statusstringrecord status
clinicalSignificancestringpredicted pathogenicity
scorePercentilefloatMitoTIP score
numGenBankFullLengthSeqsinteger# of GenBank full-length sequences
pubMedIdsstring array
isAlleleSpecificbooleantrue when the current variant alternate allele matches the MITOMAP alternate allele

Structural Variants

"mitomap":[ 
{
"chromosome":"MT",
"begin":3166,
"end":14152,
"variantType":"deletion",
"reciprocalOverlap":0.18068,
"annotationOverlap":0.42405
}
]
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring array
reciprocalOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
annotationOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
- - - - \ No newline at end of file diff --git a/3.21/data-sources/omim-json/index.html b/3.21/data-sources/omim-json/index.html deleted file mode 100644 index 2a63f960..00000000 --- a/3.21/data-sources/omim-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -omim-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

omim-json

"omim":[ 
{
"mimNumber":600678,
"geneName":"MutS, E. coli, homolog of, 6",
"description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",
"phenotypes":[
{
"mimNumber":614350,
"phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",
"description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal dominant"
]
},
{
"mimNumber":608089,
"phenotype":"Endometrial cancer, familial",
"mapping":"molecular basis of the disorder is known"
},
{
"mimNumber":276300,
"phenotype":"Mismatch repair cancer syndrome",
"description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal recessive"
],
"comments" : [
"contribute to susceptibility to multifactorial disorders or to susceptibility to infection",
"unconfirmed or possibly spurious mapping"
]
}
]
}
]
FieldTypeNotes
mimNumberintOMIM ID for gene
geneNamestringgene name
descriptionstring
phenotypesobject arraysee Phenotype entry below

Phenotype

FieldTypeNotes
mimNumberint
phenotypestring
descriptionstring
mappingstringsee possible values below
inheritancestring arraysee possible values below
commentsstring arraysee possible values below

Mapping

  1. disorder was positioned by mapping of the wild type gene
  2. disease phenotype itself was mapped
  3. molecular basis of the disorder is known
  4. disorder is a chromosome deletion or duplication syndrome

Inheritance

  • autosomal recessive
  • autosomal dominant

Comments

  • contributes to the susceptibility to multifactorial disorders
  • variations that lead to apparently abnormal laboratory test values
  • unconfirmed mapping
- - - - \ No newline at end of file diff --git a/3.21/data-sources/omim/index.html b/3.21/data-sources/omim/index.html deleted file mode 100644 index 6724bf1e..00000000 --- a/3.21/data-sources/omim/index.html +++ /dev/null @@ -1,23 +0,0 @@ - - - - - - - -OMIM | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

OMIM

Overview

OMIM is a comprehensive, authoritative compendium of human genes and genetic phenotypes that is freely available and updated daily.

Publications

Amberger JS, Bocchini CA, Scott AF, Hamosh A. OMIM.org: leveraging knowledge across phenotype-gene relationships. Nucleic Acids Res. 2019 Jan 8;47(D1):D1038-D1043. doi:10.1093/nar/gky1151. PMID: 30445645.

Amberger JS, Bocchini CA, Schiettecatte FJM, Scott AF, Hamosh A. OMIM.org: Online Mendelian Inheritance in Man (OMIM®), an online catalog of human genes and genetic disorders. Nucleic Acids Res. 2015 Jan;43(Database issue):D789-98. PMID: 25428349.

Parse OMIM data

Nirvana uses gene symbols as the gene identifiers internally. To generate the OMIM database, we first map the MIM numbers, which are the primary identifiers used by OMIM, to gene symbols supported by Nirvana. Please note that there can be multiple MIM numbers mapped to one gene symbol. Only MIM numbers successfully mapped to a Nirvana gene symbol are further processed. The OMIM API is used to fetch all the information associated with a gene MIM number, except the gene symbols.

mim2gene.txt

This mim2gene.txt (http://omim.org/static/omim/data/mim2gene.txt) file provides the mapping between MIM numbers and gene symbols. An example of this file is given below:

# MIM Number    MIM Entry Type (see FAQ 1.3 at https://omim.org/help/faq)   Entrez Gene ID (NCBI)   Approved Gene Symbol (HGNC) Ensembl Gene ID (Ensembl)
100050 predominantly phenotypes
100070 phenotype 100329167
100100 phenotype
100200 predominantly phenotypes
100300 phenotype
100500 moved/removed
100600 phenotype
100640 gene 216 ALDH1A1 ENSG00000165092
100650 gene/phenotype 217 ALDH2 ENSG00000111275
100660 gene 218 ALDH3A1 ENSG00000108602
100670 gene 219 ALDH1B1 ENSG00000137124
100675 predominantly phenotypes
100678 gene 39 ACAT2 ENSG00000120437

The information in the "Entrez Gene ID (NCBI)", "Approved Gene Symbol (HGNC)" and "Ensembl Gene ID (Ensembl)" columns are used to find the proper gene symbol supported by Nirvana, which may or may not be the same as the gene symbol listed here.

OMIM API

Nirvana retrieves the OMIM annotations from the OMIM API JSON responses. The "entry" handler is used to fetch all the annotations associated with a given OMIM gene. A sample JSON response from the API is provided there.

{
"omim": {
"version": "1.0",
"entryList": [
{
"entry": {
"prefix": "*",
"mimNumber": 100640,
"status": "live",
"titles": {
"preferredTitle": "ALDEHYDE DEHYDROGENASE 1 FAMILY, MEMBER A1; ALDH1A1",
"alternativeTitles": "ALDEHYDE DEHYDROGENASE 1; ALDH1;;\nACETALDEHYDE DEHYDROGENASE 1;;\nALDH, LIVER CYTOSOLIC;;\nRETINAL DEHYDROGENASE 1; RALDH1"
},
"textSectionList": [
{
"textSection": {
"textSectionName": "description",
"textSectionTitle": "Description",
"textSectionContent": "The ALDH1A1 gene encodes a liver cytosolic isoform of acetaldehyde dehydrogenase ({EC 1.2.1.3}), an enzyme involved in the major pathway of alcohol metabolism after alcohol dehydrogenase (ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650}), variation in which has been implicated in different responses to alcohol ingestion.\n\nALDH1 is associated with a low Km for NAD, a high Km for acetaldehyde, and is strongly inactivated by disulfiram. ALDH2 is associated with a high Km for NAD, and low Km for acetaldehyde, and is insensitive to inhibition by disulfiram ({4:Hsu et al., 1985})."
}
}
],
"geneMap": {
"sequenceID": 7709,
"chromosome": 9,
"chromosomeSymbol": "9",
"chromosomeSort": 225,
"chromosomeLocationStart": 72900670,
"chromosomeLocationEnd": 72953052,
"transcript": "ENST00000297785.7",
"cytoLocation": "9q21",
"computedCytoLocation": "9q21.13",
"mimNumber": 100640,
"geneSymbols": "ALDH1A1",
"geneName": "Aldehyde dehydrogenase-1 family, member A1, soluble",
"mappingMethod": "REa, A",
"confidence": "P",
"mouseGeneSymbol": "Aldh1a1",
"mouseMgiID": "MGI:1353450",
"geneInheritance": null
},
"externalLinks": {
"geneIDs": "216",
"hgncID": "402",
"ensemblIDs": "ENSG00000165092,ENST00000297785.8",
"approvedGeneSymbols": "ALDH1A1",
"ncbiReferenceSequences": "1519246465",
"proteinSequences": "194378740,211947843,2183299,178400,119582947,119582948,178372,40807656,194375548,30582681,209402710,4262707,194739599,4261625,178394,261487497,16306661,21361176,32815082,118495,62089228",
"uniGenes": "Hs.76392",
"swissProtIDs": "P00352",
"decipherGene": false,
"umlsIDs": "C1412333",
"gtr": true,
"cmgGene": false,
"keggPathways": true,
"gwasCatalog": false,

}
}
},
{
"entry": {
"prefix": "*",
"mimNumber": 102560,
"status": "live",
"titles": {
"preferredTitle": "ACTIN, GAMMA-1; ACTG1",
"alternativeTitles": "ACTIN, GAMMA; ACTG;;\nCYTOSKELETAL GAMMA-ACTIN;;\nACTIN, CYTOPLASMIC, 2"
},
"textSectionList": [
{
"textSection": {
"textSectionName": "description",
"textSectionTitle": "Description",
"textSectionContent": "Actins are a family of highly conserved cytoskeletal proteins that play fundamental roles in nearly all aspects of eukaryotic cell biology. The ability of a cell to divide, move, endocytose, generate contractile force, and maintain shape is reliant upon functional actin-based structures. Actin isoforms are grouped according to expression patterns: muscle actins predominate in striated and smooth muscle (e.g., ACTA1, {102610}, and ACTA2, {102620}, respectively), whereas the 2 cytoplasmic nonmuscle actins, gamma-actin (ACTG1) and beta-actin (ACTB; {102630}), are found in all cells ({13:Sonnemann et al., 2006})."
}
}
],
"geneMap": {
"sequenceID": 13666,
"chromosome": 17,
"chromosomeSymbol": "17",
"chromosomeSort": 947,
"chromosomeLocationStart": 81509970,
"chromosomeLocationEnd": 81512798,
"transcript": "ENST00000331925.7",
"cytoLocation": "17q25.3",
"computedCytoLocation": "17q25.3",
"mimNumber": 102560,
"geneSymbols": "ACTG1, DFNA20, DFNA26, BRWS2",
"geneName": "Actin, gamma-1",
"mappingMethod": "REa, A, Fd",
"confidence": "C",
"mouseGeneSymbol": "Actg1",
"mouseMgiID": "MGI:87906",
"geneInheritance": null,
"phenotypeMapList": [
{
"phenotypeMap": {
"mimNumber": 102560,
"phenotype": "Baraitser-Winter syndrome 2",
"phenotypeMimNumber": 614583,
"phenotypicSeriesNumber": "PS243310",
"phenotypeMappingKey": 3,
"phenotypeInheritance": "Autosomal dominant"
}
},
{
"phenotypeMap": {
"mimNumber": 102560,
"phenotype": "Deafness, autosomal dominant 20/26",
"phenotypeMimNumber": 604717,
"phenotypicSeriesNumber": "PS124900",
"phenotypeMappingKey": 3,
"phenotypeInheritance": "Autosomal dominant"
}
}
]
}
}
}
]
}
}

Content from the OMIM API JSON response is reorganized as shown in the Nirvana JSON Output

Mappings between the Nirvana JSON output and OMIM JSON API are listed in the table below:

Nirvana JSON key chainOMIM API JSON key chain
omim:mimNumberomim:entryList:entry:mimNumber
omim:geneNameomim:entryList:entry:geneMap:geneName
omim:descriptionomim:entryList:entry:textSectionList:textSection:textSectionContent
omim:phenotypes:mimNumberomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:mimNumber
omim:phenotypes:phenotypeomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotype
omim:phenotypes:descriptionomim:entryList:entry:textSectionList:textSection:textSectionContent
omim:phenotypes:mappingomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotypeMappingKey (see mapping below)
omim:phenotypes:inheritancesomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotypeInheritance
omim:phenotypes:commentsomim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotype (see mapping below)

Mapping key to content

1 to disorder was positioned by mapping of the wild type gene
-2 to disease phenotype itself was mapped
-3 to molecular basis of the disorder is known
-4 to disorder is a chromosome deletion or duplication syndrome

Phenotype character to comment

? to unconfirmed or possibly spurious mapping
-[/] to nondiseases
-{/} to contribute to susceptibility to multifactorial disorders or to susceptibility to infection

There are different types of link in the OMIM description section. For example, in above JSON response, we have the description of MIM entry 100640:

The ALDH1A1 gene encodes a liver cytosolic isoform of acetaldehyde dehydrogenase ({EC 1.2.1.3}), an enzyme involved in the major pathway of alcohol metabolism after alcohol dehydrogenase (ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650}), variation in which has been implicated in different responses to alcohol ingestion.\n\nALDH1 is associated with a low Km for NAD, a high Km for acetaldehyde, and is strongly inactivated by disulfiram. ALDH2 is associated with a high Km for NAD, and low Km for acetaldehyde, and is insensitive to inhibition by disulfiram ({4:Hsu et al., 1985}).

As the descriptions will be shown as plain text, we remove the curry brackets surrounding links and try to make the text still readable with minimal modifications. Briefly:

  • Links referring to another MIM entry (e.g. {100650}) will be removed. Any word(s) specifically associated with the removed link will also be removed. For example, "(ADH, see {103700})" will become "(ADH)" after the process.
  • Links referring to a literature reference will be processed to remove the internal index and curry brackets. For example, "{4:Hsu et al., 1985}" becomes "Hsu et al., 1985".
  • All the other links will simple have their curry brackets removed. For example, "{EC 1.2.1.3}" becomes "EC 1.2.1.3".
  • If the content within a pair of parentheses becomes empty after being processed, the parentheses need to be removed as well and its surrounding white spaces should be properly processed. For example, "ALDH2 ({100650})," will become "ALDH2,".

Here is a list of examples about how the description section supposed to be processed:

Original textProcessed text
({516030}, {516040}, and {516050})
(e.g., D1, {168461}; D2, {123833}; D3, {123834})(e.g., D1; D2; D3)
(desmocollins; see DSC2, {125645})(desmocollins; see DSC2)
(e.g., see {102700}, {300755})
(ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650})(ADH). See also liver mitochondrial ALDH2
(see, e.g., CACNA1A; {601011})(see, e.g., CACNA1A)
(e.g., GSTA1; {138359}), mu (e.g., {138350})(e.g., GSTA1), mu
(NFKB; see {164011})(NFKB)
(see ISGF3G, {147574})(see ISGF3G)
(DCK; {EC 2.7.1.74}; {125450})(DCK; EC 2.7.1.74)

JSON output

"omim":[ 
{
"mimNumber":600678,
"geneName":"MutS, E. coli, homolog of, 6",
"description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",
"phenotypes":[
{
"mimNumber":614350,
"phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",
"description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal dominant"
]
},
{
"mimNumber":608089,
"phenotype":"Endometrial cancer, familial",
"mapping":"molecular basis of the disorder is known"
},
{
"mimNumber":276300,
"phenotype":"Mismatch repair cancer syndrome",
"description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal recessive"
],
"comments" : [
"contribute to susceptibility to multifactorial disorders or to susceptibility to infection",
"unconfirmed or possibly spurious mapping"
]
}
]
}
]
FieldTypeNotes
mimNumberintOMIM ID for gene
geneNamestringgene name
descriptionstring
phenotypesobject arraysee Phenotype entry below

Phenotype

FieldTypeNotes
mimNumberint
phenotypestring
descriptionstring
mappingstringsee possible values below
inheritancestring arraysee possible values below
commentsstring arraysee possible values below

Mapping

  1. disorder was positioned by mapping of the wild type gene
  2. disease phenotype itself was mapped
  3. molecular basis of the disorder is known
  4. disorder is a chromosome deletion or duplication syndrome

Inheritance

  • autosomal recessive
  • autosomal dominant

Comments

  • contributes to the susceptibility to multifactorial disorders
  • variations that lead to apparently abnormal laboratory test values
  • unconfirmed mapping

Building the supplementary files

The first step in builing the OMIM .nga files is to use the SAUtils command's subcommand downloadOMIM to download the necessary data. In order to download the data the user must possess an API key obtained from OMIM. This key has to be set as the environment variable OmimApiKey.

export OmimApiKey=<users-omim-api-key>
dotnet NirvanaBuild/SAUtils.dll downloadOMIM
---------------------------------------------------------------------------
SAUtils (c) 2023 Illumina, Inc.
Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953
---------------------------------------------------------------------------

USAGE: dotnet SAUtils.dll downloadomim [options]
Download the OMIM gene annotation data

OPTIONS:
--cache, -c <directory>
input cache directory
--ref, -r <filename> input reference filename
--out, -o <VALUE> output directory
--help, -h displays the help menu
--version, -v displays the version

dotnet NirvanaBuild/SAUtils.dll downloadOMIM --ref References/7/Homo_sapiens.GRCh38.Nirvana.dat --uga Cache/ --out ExternalDataSources/OMIM/2021-06-14
---------------------------------------------------------------------------
SAUtils (c) 2023 Illumina, Inc.
Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953
---------------------------------------------------------------------------

Gene Symbol Update Statistics
============================================
{
"NumGeneSymbolsUpToDate": 16788,
"NumGeneSymbolsUpdated": 95,
"NumGenesWhereBothIdsAreNull": 0,
"NumGeneSymbolsNotInCache": 106,
"NumResolvedGeneSymbolConflicts": 15,
"NumUnresolvedGeneSymbolConflicts": 0
}

Time: 00:04:08.9

Once the download has succeeded, the nga files can be produced using the SAUtils command's subcommand omim.

dotnet NirvanaBuild/SAUtils.dll omim
---------------------------------------------------------------------------
SAUtils (c) 2023 Illumina, Inc.
Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953
---------------------------------------------------------------------------

USAGE: dotnet SAUtils.dll omim [options]
Creates a gene annotation database from OMIM data

OPTIONS:
--m2g, -m <VALUE> MimToGeneSymbol tsv file
--json, -j <VALUE> OMIM entry json file
--out, -o <VALUE> output directory
--help, -h displays the help menu
--version, -v displays the version


dotnet NirvanaBuild/SAUtils.dll omim --m2g ExternalDataSources/OMIM/2021-06-14/MimToGeneSymbol.tsv --json ExternalDataSources/OMIM/2021-06-14/MimEntries.json.gz --out SupplementaryDatabase/63/
---------------------------------------------------------------------------
SAUtils (c) 2023 Illumina, Inc.
Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953
---------------------------------------------------------------------------


Time: 00:00:04.5
- - - - \ No newline at end of file diff --git a/3.21/data-sources/phylop-json/index.html b/3.21/data-sources/phylop-json/index.html deleted file mode 100644 index 14357628..00000000 --- a/3.21/data-sources/phylop-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -phylop-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

phylop-json

"variants":[
{
"vid":"2:48010488:A",
"chromosome":"chr2",
"begin":48010488,
"end":48010488,
"refAllele":"G",
"altAllele":"A",
"variantType":"SNV",
"phylopScore":0.459
}
]
FieldTypeNotes
phylopScorefloatrange: -14.08 to 6.424
- - - - \ No newline at end of file diff --git a/3.21/data-sources/phylop/index.html b/3.21/data-sources/phylop/index.html deleted file mode 100644 index 5d38bb0e..00000000 --- a/3.21/data-sources/phylop/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -PhyloP | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

PhyloP

Overview

PhyloP (phylogenetic p-values) conservation scores are obtained from the [PHAST package] (http://compgen.bscb.cornell.edu/phast/) for multiple alignments of vertebrate genomes to the human genome. For GRCh38, the multiple alignments are against 19 mammals and for GRCh37, it is against 45 vertebrate genomes.

Publication

Siepel A, Bejerano G, Pedersen JS, Hinrichs AS, Hou M, Rosenbloom K, Clawson H, Spieth J, Hillier LW, Richards S, et al. Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. Genome Res. 2005 Aug;15(8):1034-50. (http://www.genome.org/cgi/doi/10.1101/gr.3715005)

WigFix File

The data is provided in WigFix files which is a text file that provides conservation scores for contiguous intervals in the following format:

fixedStep chrom=chr1 start=10918 step=1
0.064
0.058
0.064
0.058
0.064
0.064
fixedStep chrom=chr1 start=34045 step=1
0.111
0.100
0.111
0.111
0.100
0.111
0.111
0.111
0.100
0.111
-1.636

We convert them to binary files with indexes for fast query. Note that these are scores for genomic positions and are reported only for SNVs.

Download URL

GRCh37: http://hgdownload.cse.ucsc.edu/goldenpath/hg19/phyloP46way/vertebrate/

GRCh38: http://hgdownload.cse.ucsc.edu/goldenPath/hg38/phyloP20way/

JSON Output

Unlike other supplemetary datasources, phyloP scores are reported in the variants section.

"variants":[
{
"vid":"2:48010488:A",
"chromosome":"chr2",
"begin":48010488,
"end":48010488,
"refAllele":"G",
"altAllele":"A",
"variantType":"SNV",
"phylopScore":0.459
}
]
FieldTypeNotes
phylopScorefloatrange: -14.08 to 6.424
- - - - \ No newline at end of file diff --git a/3.21/data-sources/primate-ai-json/index.html b/3.21/data-sources/primate-ai-json/index.html deleted file mode 100644 index adb0b27e..00000000 --- a/3.21/data-sources/primate-ai-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -primate-ai-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

primate-ai-json

"primateAI":[
{
"hgnc":"TP53",
"scorePercentile":0.3,
}
]
FieldTypeNotes
hgncstring
scorePercentilefloatrange: 0 - 1.0
- - - - \ No newline at end of file diff --git a/3.21/data-sources/primate-ai/index.html b/3.21/data-sources/primate-ai/index.html deleted file mode 100644 index 50e0ac45..00000000 --- a/3.21/data-sources/primate-ai/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Primate AI | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

Primate AI

Overview

Primate AI is a deep residual neural network for classifying the pathogenicity of missense mutations. The method is described in the publication:

Publication

Sundaram, L., Gao, H., Padigepati, S.R. et al. Predicting the clinical impact of human mutation with deep neural networks. Nat Genet 50, 1161–1170 (2018). https://doi.org/10.1038/s41588-018-0167-z

TSV File

Example

chr pos ref alt refAA   altAA   strand_1pos_0neg    trinucleotide_context   UCSC_gene   ExAC_coverage   primateDL_score
chr10 1046704 C T R C 1 CCG uc001ift.3 45.49 0.849114537239
chr10 1046704 C G R G 1 CCG uc001ift.3 45.49 0.795686006546

Parsing

From the TSV file, we're mainly interested in the following columns:

  • chr
  • pos
  • ref
  • alt
  • primateDL_score

We also use UCSC_gene to filter out variants that don't have matching gene models in Nirvana.

Pre-processing

Converting UCSC IDs

Primate AI only provides UCSC IDs. As an initial pre-processing step, we'll need to convert these to either Entrez or Ensembl Gene IDs.

The following queries are used to download the conversions from UCSC:

mysql -h genome-mysql.soe.ucsc.edu -u genome -A -P 3306 \
-e "select * FROM knownToLocusLink;" hg19 > ucsc_locuslink.tsv

mysql -h genome-mysql.soe.ucsc.edu -u genome -A -P 3306 \
-e "select knownToEnsembl.name, knownToEnsembl.value, ensGene.name2 FROM knownToEnsembl, ensGene WHERE knownToEnsembl.value = ensGene.name;" \
hg19 > ucsc_ensembl.tsv

Running the Pre-Processor

The Primate AI pre-processor can be run as follows:

dotnet PrimateAiPreProcessor.dll UGA_develop.tsv PrimateAI_scores_v0.2.tsv.gz \
ucsc_locuslink.tsv ucsc_ensembl.tsv PrimateAI_0.2_GRCh37.tsv.gz

During conversion, 0.5% of the UCSC Ids cannot be converted to either Entrez or Ensembl gene IDs. Once the gene IDs have been acquired, we check to see which are available in Nirvana.

The following Entrez Gene IDs were not found:

399753
401980
504189
504191
100293534

Here is the output from the pre-processor:

- loading UCSC to Entrez Gene ID dictionary... 73,432 genes loaded.
- loading UCSC to Ensembl Gene ID dictionary... 76,178 genes loaded.
- loading UGA gene ID to gene dictionary... 103,277 genes loaded.
- parsing Primate AI variants... 70,121,953 variants parsed.

# variants with unknown gene ID: 27,253 / 70,121,953
# genes with unknown gene ID: 109 / 19,614

# variants not in UGA: 2,036 / 70,121,953
# genes not in UGA: 6 / 19,614

Known Issues

Known Issues

The Primate AI data set provides raw scores, but the scores are biased according to gene context. I.e. a 0.4 means something different in TP53 than it does in KRAS.

As a result, the Primate AI team provided guidance on aggregating these scores and presenting them as percentiles with respect to the associated gene. According to their research, the 25th percentile is a good proxy for benign variants and the 75th percentile is a good proxy for pathogenic variants.

Download URL

https://basespace.illumina.com/s/cPgCSmecvhb4

JSON Output

"primateAI":[
{
"hgnc":"TP53",
"scorePercentile":0.3,
}
]
FieldTypeNotes
hgncstring
scorePercentilefloatrange: 0 - 1.0
- - - - \ No newline at end of file diff --git a/3.21/data-sources/revel-json/index.html b/3.21/data-sources/revel-json/index.html deleted file mode 100644 index 78bd65af..00000000 --- a/3.21/data-sources/revel-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -revel-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

revel-json

"revel":{ 
"score":0.027
}
FieldTypeNotes
scorefloatRange: 0 - 1.0
- - - - \ No newline at end of file diff --git a/3.21/data-sources/revel/index.html b/3.21/data-sources/revel/index.html deleted file mode 100644 index 9ef7ec94..00000000 --- a/3.21/data-sources/revel/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -REVEL | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

REVEL

Overview

REVEL is an ensemble method for predicting the pathogenicity of missense variants based on a combination of scores from 13 individual tools: MutPred, FATHMM v2.3, VEST 3.0, PolyPhen-2, SIFT, PROVEAN, MutationAssessor, MutationTaster, LRT, GERP++, SiPhy, phyloP, and phastCons.

Publication

Ioannidis, N. M. et al. REVEL: An Ensemble Method for Predicting the Pathogenicity of Rare Missense Variants. The American Journal of Human Genetics 99, 877-885 (2016). https://doi.org/10.1016/j.ajhg.2016.08.016

CSV File

Example

chr,hg19_pos,grch38_pos,ref,alt,aaref,aaalt,REVEL
1,35142,35142,G,A,T,M,0.027
1,35142,35142,G,C,T,R,0.035
1,35142,35142,G,T,T,K,0.043
1,35143,35143,T,A,T,S,0.018
1,35143,35143,T,C,T,A,0.034

Parsing

From the CSV file, we're mainly interested in the following columns:

  • chr
  • hg19_pos
  • grch38_pos
  • ref
  • alt
  • REVEL

Known Issues

Sorting

Since the input file contains positions for both GRCh37 and GRCh38, we split it into two TSV files (for the sake of better readability) with identical format. The positions for GRCh37 were sorted but not for GRCh38. So we re-sort the variants by position in the GRCh38 file.

Conflicting Scores

When there are multiple scores available for the same variant (i.e. the same position with the same alternative allele), we pick the highest score.

Download URL

https://sites.google.com/site/revelgenomics/downloads

JSON Output

"revel":{ 
"score":0.027
}
FieldTypeNotes
scorefloatRange: 0 - 1.0
- - - - \ No newline at end of file diff --git a/3.21/data-sources/splice-ai-json/index.html b/3.21/data-sources/splice-ai-json/index.html deleted file mode 100644 index 42576a66..00000000 --- a/3.21/data-sources/splice-ai-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -splice-ai-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

splice-ai-json

"spliceAI":[ 
{
"hgnc":"BLCAP",
"acceptorGainDistance":-3,
"acceptorGainScore":0.3,
"donorLossDistance":7,
"donorLossScore":0.9
},
{
"hgnc":"NNAT",
"acceptorGainDistance":-1,
"acceptorGainScore":0.2,
"donorGainDistance":-2,
"donorGainScore":0.3
}
]
FieldTypeNotes
hgncstringHGNC gene symbol
acceptorGainDistanceint± bp from current position
acceptorGainScorefloatrange: 0 - 1.0. 1 decimal place
acceptorLossDistanceint± bp from current position
acceptorLossScorefloatrange: 0 - 1.0. 1 decimal place
donorGainDistanceint± bp from current position
donorGainScorefloatrange: 0 - 1.0. 1 decimal place
donorLossDistanceint± bp from current position
donorLossScorefloatrange: 0 - 1.0. 1 decimal place
- - - - \ No newline at end of file diff --git a/3.21/data-sources/splice-ai/index.html b/3.21/data-sources/splice-ai/index.html deleted file mode 100644 index edc983ad..00000000 --- a/3.21/data-sources/splice-ai/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Splice AI | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

Splice AI

Overview

SpliceAI, a 32-layer deep neural network, predicts splicing from a pre-mRNA sequence.

Publication

K. Jaganathan, et al. Predicting splicing from primary sequence with deep learning. Cell, 176 (3) (2019), pp. 535-548 e24

VCF File

Example

##fileformat=VCFv4.0
##assembly=GRCh37/hg19
##INFO=<ID=SYMBOL,Number=1,Type=String,Description="HGNC gene symbol">
##INFO=<ID=STRAND,Number=1,Type=String,Description="+ or - depending on whether the gene lies in the positive or negative strand">
##INFO=<ID=TYPE,Number=1,Type=String,Description="E or I depending on whether the variant position is exonic or intronic (GENCODE V24lift37 canonical annotation)">
##INFO=<ID=DIST,Number=1,Type=Integer,Description="Distance between the variant position and the closest splice site (GENCODE V24lift37 canonical annotation)">
##INFO=<ID=DS_AG,Number=1,Type=Float,Description="Delta score (acceptor gain)">
##INFO=<ID=DS_AL,Number=1,Type=Float,Description="Delta score (acceptor loss)">
##INFO=<ID=DS_DG,Number=1,Type=Float,Description="Delta score (donor gain)">
##INFO=<ID=DS_DL,Number=1,Type=Float,Description="Delta score (donor loss)">
##INFO=<ID=DP_AG,Number=1,Type=Integer,Description="Delta position (acceptor gain) relative to the variant position">
##INFO=<ID=DP_AL,Number=1,Type=Integer,Description="Delta position (acceptor loss) relative to the variant position">
##INFO=<ID=DP_DG,Number=1,Type=Integer,Description="Delta position (donor gain) relative to the variant position">
##INFO=<ID=DP_DL,Number=1,Type=Integer,Description="Delta position (donor loss) relative to the variant position">
#CHROM POS ID REF ALT QUAL FILTER INFO
10 92946 . C T . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0000;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=-26;DP_AL=-10;DP_DG=3;DP_DL=35
10 92946 . C G . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0008;DS_AL=0.0000;DS_DG=0.0003;DS_DL=0.0000;DP_AG=34;DP_AL=-27;DP_DG=35;DP_DL=1
10 92946 . C A . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0004;DS_AL=0.0000;DS_DG=0.0001;DS_DL=0.0000;DP_AG=-10;DP_AL=-48;DP_DG=35;DP_DL=-21
10 92947 . A C . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0002;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=-49;DP_AL=-11;DP_DG=0;DP_DL=34
10 92947 . A T . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0002;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=33;DP_AL=-11;DP_DG=-22;DP_DL=34
10 92947 . A G . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0006;DS_AL=0.0000;DS_DG=0.0001;DS_DL=0.0000;DP_AG=33;DP_AL=-11;DP_DG=34;DP_DL=32

Parsing

From the VCF file, we're mainly interested in the following columns:

  • DS_AG - Δ score (acceptor gain)
  • DS_AL - Δ score (acceptor loss)
  • DS_DG - Δ score (donor gain)
  • DS_DL - Δ score (donor loss)
  • DP_AG - Δ position (acceptor gain) relative to the variant position
  • DP_AL - Δ position (acceptor loss) relative to the variant position
  • DP_DG - Δ position (donor gain) relative to the variant position
  • DP_DL - Δ position (donor loss) relative to the variant position

The Splice AI team suggests the following interpretation for the scores:

RangeConfidencePathogenicity
0 ≤ x < 0.1lowlikely benign
0.1 ≤ x ≤ 0.5mediumlikely pathogenic
x > 0.5highpathogenic

Pre-processing

Filtering

Splice AI provides a comprehensive list of entries throughout the genome. However, many of the entries have little value. I.e. observing low splice scores in intergenic regions. Not only do these extra entries require more storage, but the unused content has a negative impact on annotation speed.

As a result, Nirvana filters out all the values in the low confidence tier except for regions within 15 bp of nascent splice sites. For those regions, we found it useful to see if Splice AI predicted an interruption of the splicing mechanism.

Download URL

https://basespace.illumina.com/s/5u6ThOblecrh

JSON Output

"spliceAI":[ 
{
"hgnc":"BLCAP",
"acceptorGainDistance":-3,
"acceptorGainScore":0.3,
"donorLossDistance":7,
"donorLossScore":0.9
},
{
"hgnc":"NNAT",
"acceptorGainDistance":-1,
"acceptorGainScore":0.2,
"donorGainDistance":-2,
"donorGainScore":0.3
}
]
FieldTypeNotes
hgncstringHGNC gene symbol
acceptorGainDistanceint± bp from current position
acceptorGainScorefloatrange: 0 - 1.0. 1 decimal place
acceptorLossDistanceint± bp from current position
acceptorLossScorefloatrange: 0 - 1.0. 1 decimal place
donorGainDistanceint± bp from current position
donorGainScorefloatrange: 0 - 1.0. 1 decimal place
donorLossDistanceint± bp from current position
donorLossScorefloatrange: 0 - 1.0. 1 decimal place
- - - - \ No newline at end of file diff --git a/3.21/data-sources/topmed-json/index.html b/3.21/data-sources/topmed-json/index.html deleted file mode 100644 index 79f6f81e..00000000 --- a/3.21/data-sources/topmed-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -topmed-json | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

topmed-json

"topmed":{ 
"allAc":20,
"allAn":125568,
"allAf":0.000159,
"allHc":0,
"failedFilter":true
}
FieldTypeNotes
allAcintTOPMed allele count
allAnintTOPMed allele number. Non-zero integer.
allAffloatTOPMed allele frequency (computed by Nirvana)
allHcintTOPMed homozygous count
failedFilterboolTrue if this variant failed any filters
- - - - \ No newline at end of file diff --git a/3.21/data-sources/topmed/index.html b/3.21/data-sources/topmed/index.html deleted file mode 100644 index 5a55e5ad..00000000 --- a/3.21/data-sources/topmed/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -TOPMed | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

TOPMed

Overview

The Trans-Omics for Precision Medicine (TOPMed) program, sponsored by the National Institutes of Health (NIH) National Heart, Lung and Blood Institute (NHLBI), is part of a broader Precision Medicine Initiative, which aims to provide disease treatments tailored to an individual’s unique genes and environment. TOPMed contributes to this Initiative through the integration of whole-genome sequencing (WGS) and other omics (e.g., metabolic profiles, epigenomics, protein and RNA expression patterns) data with molecular, behavioral, imaging, environmental, and clinical data.

Publication

Kowalski, M.H., Qian, H., Hou, Z., Rosen, J.D., Tapia, A.L., Shan, Y., Jain, D., Argos, M., Arnett, D.K., Avery, C. and Barnes, K.C., 2019. Use of> 100,000 NHLBI Trans-Omics for Precision Medicine (TOPMed) Consortium whole genome sequences improves imputation quality and detection of rare variant associations in admixed African and Hispanic/Latino populations. PLoS genetics, 15(12), p.e1008500.

VCF extraction

We currently extract the following fields from TOPMed VCF file:

##INFO=<ID=AN,Number=1,Type=Integer,Description="Number of Alleles in Samples with Coverage">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Alternate Allele Counts in Samples with Coverage">
##INFO=<ID=AF,Number=A,Type=Float,Description="Alternate Allele Frequencies">
##INFO=<ID=Het,Number=A,Type=Integer,Description="Number of samples with heterozygous genotype calls">
##INFO=<ID=Hom,Number=A,Type=Integer,Description="Number of samples with homozygous alternate genotype calls">

Example:

chr1    10132   TOPMed_freeze_5?chr1:10,132     T       C       255     SVM     VRT=1;NS=62784;AN=125568;AC=32;AF=0.000254842;Het=32;Hom=0      NA:FRQ  125568:0.000254842

GRCh37 liftover

The data is not available for GRCh37 on TOPMed website. We performed a liftover from GRCh38 to GRCh37 using dbSNP ids.

Download URL

https://bravo.sph.umich.edu/freeze5/hg38/download

JSON output

"topmed":{ 
"allAc":20,
"allAn":125568,
"allAf":0.000159,
"allHc":0,
"failedFilter":true
}
FieldTypeNotes
allAcintTOPMed allele count
allAnintTOPMed allele number. Non-zero integer.
allAffloatTOPMed allele frequency (computed by Nirvana)
allHcintTOPMed homozygous count
failedFilterboolTrue if this variant failed any filters
- - - - \ No newline at end of file diff --git a/3.21/file-formats/custom-annotations/index.html b/3.21/file-formats/custom-annotations/index.html deleted file mode 100644 index 8a08cf50..00000000 --- a/3.21/file-formats/custom-annotations/index.html +++ /dev/null @@ -1,40 +0,0 @@ - - - - - - - -Custom Annotations | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

Custom Annotations

Overview

While the team tries to keep data sources up-to-date, you might want to start incorporate new annotations ahead of our update cycle. Another -common use case involves protected health information (PHI). Custom annotations are a mechanism that enables both use cases.

Here are some examples of how our collaborators use custom annotations:

  • associating context from both a patient-level and a patient cohort level with the variant annotations
  • adding content that is licensed (e.g. HGMD) to the variant annotations

At the moment, we have two different custom annotation file formats. One provides additional annotations to variants (both small variants and SVs) -while the other caters to gene annotations.

In both cases, the custom annotation file format is a tab-delimited file that is separated into two parts: the header & the data.

The header is where you can customize how you want the data to appear in the JSON file and provide context about the genome assembly and how -Nirvana should match the variants.

At Illumina, there are usually many components downstream of Nirvana that have to parse our annotations. If a customer provides a custom -annotation, those downstream tools need to understand more about the data such as:

  • data type (e.g. number, boolean, or a string)
  • data category (e.g. is this an allele count, allele number, allele frequency, etc.)
  • associated population (i.e. if this is an allele frequency)

For each custom annotation, Nirvana uses this context to create a JSON schema that can be sent to downstream tools. If -a tool knows that this is an allele frequency, it can validate user input to ensure that it's in the range of [0, 1].

Variant File Format

File Format

Nirvana expects plain text (or gzipped text) files. Using tools like Excel can add extra characters that can break parsing. We highly recommend creating and modifying these files with plain text editor like Notepad, Notepad++ or Atom.

Basic Allele Frequency Example

Create the Custom Annotation TSV

Imagine that you want to create a basic allele frequency custom annotation for small variants. If we visualized the tab-delimited file -(TSV), it would look something like this:

Col 1Col 2Col 3Col 4Col 5
#title=MyDataSource
#assembly=GRCh38
#matchVariantsBy=allele
#CHROMPOSREFALTallAf
#categories...AlleleFrequency
#descriptions...ALL
#type...number
chr1623603511TGAT0.000006579
chr1668801894GA0.000006569
chr1911107436GA0.00003291

Here's the full TSV file.

Let's go over the header and discuss the contents:

  • title indicates the name of the JSON key
  • assembly indicates that this data is only valid for GRCh38.
  • matchVariantsBy indicates how annotations should be matched and reported. In this case annotations will be matched and reported by allele.
  • categories provides hints to downstream tools on how they might want to treat the data. In this case, we indicate that it's an allele frequency.
  • descriptions are used in special circumstances to provide more context. Even though column 5 is called allAf, it might not be clear to a -downstream tool that this means a global allele frequency using all sub-populations. In this case, ALL indicates the intended population.
  • type indicates to downstream tools the data type. Since allele frequencies are numbers, we'll write number in this column.
Reference Base Checking

Nirvana validates all the reference bases in a custom annotation. If a variant or genomic region is specified that has the wrong reference base, an error will be produced.

Sorting

The variants within each chromosome must be sorted by genomic position.

Convert to Nirvana Format

First we need to convert the TSV file to Nirvana's native file format and let's put that file in a new directory called CA:

$ mkdir CA
$ dotnet bin/Release/netcoreapp2.1/SAUtils.dll customvar \
-r Data/References/Homo_sapiens.GRCh38.Nirvana.dat -i MyDataSource.tsv -o CA
---------------------------------------------------------------------------
SAUtils (c) 2020 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0
---------------------------------------------------------------------------

Chromosome 16 completed in 00:00:00.1
Chromosome 19 completed in 00:00:00.0

Time: 00:00:00.2

Annotate with Nirvana

Let's annotate the following VCF (notice that it's one of the variants that we have in our custom annotation):

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
16 68801894 . G A . . .

Here's the full VCF file.

Since Nirvana can handle multiple directories with external annotations, all we need to do is specify our new CA directory in addition to -the normal Nirvana command-line.

$ dotnet bin/Release/netcoreapp2.1/Nirvana.dll -c Data/Cache/GRCh38/Both \
-r Data/References/Homo_sapiens.GRCh38.Nirvana.dat \
--sd Data/SupplementaryAnnotation/GRCh38 --sd CA -i TestCA.vcf -o TestCA
---------------------------------------------------------------------------
Nirvana (c) 2020 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0
---------------------------------------------------------------------------

Initialization Time Positions/s
---------------------------------------------------------------------------
Cache 00:00:01.8
SA Position Scan 00:00:00.0 19

Reference Preload Annotation Variants/s
---------------------------------------------------------------------------
chr16 00:00:00.2 00:00:01.3 1

Summary Time Percent
---------------------------------------------------------------------------
Initialization 00:00:01.9 25.5 %
Preload 00:00:00.2 3.3 %
Annotation 00:00:01.3 18.2 %

Time: 00:00:06.3

Investigate the Results

We would expect the following data to show up in our JSON output file:

      "variants": [
{
"vid": "16-68801894-G-A",
"chromosome": "16",
"begin": 68801894,
"end": 68801894,
"refAllele": "G",
"altAllele": "A",
"variantType": "SNV",
"hgvsg": "NC_000016.10:g.68801894G>A",
"phylopScore": 1,
"MyDataSource": {
"refAllele": "G",
"altAllele": "A",
"allAf": 7e-06
},
"clinvar": [

Here's the full JSON file.

Nirvana preserves up to 6 decimal places for allele frequency data.

Categories & Descriptions Example

Create the Custom Annotation TSV

Building on the previous example, we can add other types of annotations like predictions and general notes.

Col 1Col 2Col 3Col 4Col 5Col 6Col 7
#title=MyDataSource
#assembly=GRCh38
#matchVariantsBy=allele
#CHROMPOSREFALTallAfpathogenicitynotes
#categories...AlleleFrequencyPrediction.
#descriptions...ALL..
#type...numberstringstring
chr1623603511TGAT0.000006579P.
chr1668801894GA0.000006569LPSeen in case 123
chr1911107436GA0.00003291..

Here's the full TSV file.

Placeholders

You can use a period to denote an empty value (much in the same way as periods are used in VCF files to signify missing values). While -Nirvana also accepts empty columns in the TSV file, we use them in these examples to promote readability.

Let's go over what's new in this example:

  • Column 6 adds a field called pathogenicity which uses the Prediction category. When using this category, Nirvana will -validate to make -sure that the field contains either the abbreviations (B, LB, VUS, LP, and P) or the long-form equivalents (e.g. benign or pathogenic).
  • Column 7 adds a field called notes and it doesn't have a category or description. We're just going to use it to add some internal -notes.

Annotate with Nirvana

Let's use a new VCF file. It includes all the same positions as our custom annotation file, but only the middle variant also matches the -alternate allele (allele-specific match):

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
16 23603511 . TG T . . .
16 68801894 . G A . . .
19 11107436 . G C . . .

Here's the full VCF file.

Investigate the Results

Because we specified #matchVariantsBy=allele in our custom annotation file, only the middle variant will get an annotation:

      "variants": [
{
"vid": "16-68801894-G-A",
"chromosome": "16",
"begin": 68801894,
"end": 68801894,
"refAllele": "G",
"altAllele": "A",
"variantType": "SNV",
"hgvsg": "NC_000016.10:g.68801894G>A",
"phylopScore": 1,
"MyDataSource": {
"refAllele": "G",
"altAllele": "A",
"allAf": 7e-06,
"pathogenicity": "LP",
"notes": "Seen in case 123"
},
"clinvar": [

Here's the full JSON file.

Using Positional Matches

What would happen if we changed to #matchVariantsBy=position? Two things will happen. First, our positional variants will now match:

      "variants": [
{
"vid": "16-23603511-TG-T",
"chromosome": "16",
"begin": 23603512,
"end": 23603512,
"refAllele": "G",
"altAllele": "-",
"variantType": "deletion",
"hgvsg": "NC_000016.10:g.23603512delG",
"MyDataSource": [
{
"refAllele": "GA",
"altAllele": "-",
"allAf": 7e-06,
"pathogenicity": "P"
}
],
"clinvar": [

In addition, you will now see an extra flag for our allele-specific variant:

      "variants": [
{
"vid": "16-68801894-G-A",
"chromosome": "16",
"begin": 68801894,
"end": 68801894,
"refAllele": "G",
"altAllele": "A",
"variantType": "SNV",
"hgvsg": "NC_000016.10:g.68801894G>A",
"phylopScore": 1,
"MyDataSource": [
{
"refAllele": "G",
"altAllele": "A",
"allAf": 7e-06,
"pathogenicity": "LP",
"notes": "Seen in case 123",
"isAlleleSpecific": true
}
],
"clinvar": [

Genomic Region Example

Create the Custom Annotation TSV

In the previous example, we added a note for the middle variant, but sometimes it's handy to annotate a genomic region. Consider the following example:

Col 1Col 2Col 3Col 4Col 5
#title=MyDataSource
#assembly=GRCh38
#matchVariantsBy=allele
#CHROMPOSREFENDnotes
#categories....
#descriptions....
#type...string
chr1620000000T70000000Lots of false positives in this region

Here's the full TSV file.

Let's go over what's new in this example:

  • Column 5 now has a field called notes. In essence, it looks exactly like column 7 from our previous example.
  • The main difference is that now one of our custom annotation entries is actually a genomic region. Any variant that overlaps with that region will get a custom annotation.

In the previous example we learned about positional matching vs allele-specific matching. For genomic regions, #matchVariantsBy=allele and #matchVariantsBy=position produce -the same result.

Annotate with Nirvana

Let's use the same VCF file as our previous example.

Investigate the Results

    {
"chromosome": "16",
"position": 23603511,
"refAllele": "TG",
"altAlleles": [
"T"
],
"cytogeneticBand": "16p12.2",
"MyDataSource": [
{
"start": 20000000,
"end": 70000000,
"notes": "Lots of false positives in this region",
"reciprocalOverlap": 0,
"annotationOverlap": 0
}
],
"variants": [

Here's the full JSON file.

Reciprocal & Annotation Overlap

For all intervals, Nirvana internally calculates two overlaps: a variant overlap and an annotation overlap. Variant overlap is the percentage of the variant's length that is -overlapped. Annotation overlap is the percentage of the annotation's length that is overlap.

Reciprocal overlap is the minimum of those two overlaps. Given that the annotation is 50 Mbp and the deletion is one 1 bp, both overlaps will be pretty close to 0.

We will also see this annotation for the other variant on chr16:

    {
"chromosome": "16",
"position": 68801894,
"refAllele": "G",
"altAlleles": [
"A"
],
"cytogeneticBand": "16q22.1",
"MyDataSource": [
{
"start": 20000000,
"end": 70000000,
"notes": "Lots of false positives in this region",
"reciprocalOverlap": 0,
"annotationOverlap": 0
}
],
"variants": [

Genomic Regions for Structural Variants Example

Create the Custom Annotation TSV

Often we use genomic regions to represent other known CNVs and SVs in the genome. In this use case, we usually don't want to match these regions to other small variants. To force Nirvana to match regions only to other SVs, use the #matchVariantsBy=sv option in the header. Here is an example:

Col 1Col 2Col 3Col 4Col 5
#title=MyDataSource
#assembly=GRCh38
#matchVariantsBy=sv
#CHROMPOSREFENDnotes
#categories....
#descriptions....
#type...string
chr1620000000T70000000Lots of false positives in this region

Here's the full TSV file.

Let's go over what's new in this example:

  • The main difference is the header field #matchVariantsBy=sv which indicates that only structural variants that overlap these genomic regions will receive annotations.

Annotate with Nirvana

Let's use a new VCF file. It contains the first variant from the previous file and a structural variant deletion- both of which overlap the given genomic region.

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
16 23603511 . TG T . . .
16 68801894 . G <DEL> . . END=73683789;SVTYPE=DEL

Here's the full VCF file.

Investigate the Results

Note that this time, MyDataSource only showed up for the <DEL> and not the deletion 16-23603511-TG-T.

    {
"chromosome": "16",
"position": 23603511,
"refAllele": "TG",
"altAlleles": [
"T"
],
"cytogeneticBand": "16p12.2",
"variants": [
...
...
{
"chromosome": "16",
"position": 68801894,
"svEnd": 73683789,
"refAllele": "G",
"altAlleles": [
"<DEL>"
],
"cytogeneticBand": "16q22.1-q22.3",
"MyDataSource": [
{
"start": 20000000,
"end": 70000000,
"notes": "Lots of false positives in this region",
"reciprocalOverlap": 0.02396,
"annotationOverlap": 0.02396
}
],
"variants": [

Mixing Small Variants and Genomic Regions

Create the Custom Annotation TSV

Previously we looked at examples that either had small variants or genomic regions. Let's create a file that contains both:

Col 1Col 2Col 3Col 4Col 5Col 6
#title=MyDataSource
#assembly=GRCh38
#matchVariantsBy=allele
#CHROMPOSREFALTENDnotes
#categories.....
#descriptions.....
#type....string
chr1623603511TGAT..
chr1668801894GA..
chr1911107436GA..
chr2110510818C.10699435Interval #1
chr2110510818C<DEL>10699435Interval #2
chr2212370388TT[chr22:12370729[.Known false-positive

Here's the full TSV file.

Let's go over what's new in this example:

  • Column 4 now has the REF field. Exception for the case listed below, this is only used by small variants or translocation breakends.
  • Column 5 now has the END field. This is only used by genomic regions.
  • There are two custom annotations on chr21 and the start and end coordinates look the same, so what's different? Interval #2 has a symbolic allele in the ALT column. When this is used in custom annotation, the start position is treated as the padding base (using VCF conventions). When Nirvana matches a variant to interval #2, it will ignore the padding base and consider the start position to be at position 10510819.

Annotate with Nirvana

Let's use a new VCF file to study how matching works for intervals #1 and #2:

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
21 10510818 . C <DUP> . . END=10699435;SVTYPE=DUP
22 12370388 . T T[chr22:12370729[ . . SVTYPE=BND

Here's the full VCF file.

The first variant is similar to the custom annotation labelled "interval #2". Position 10510818 is the padding base, so it effectively starts at position 10510819.

Investigate the Results

  "positions": [
{
"chromosome": "21",
"position": 10510818,
"svEnd": 10699435,
"refAllele": "C",
"altAlleles": [
"<DUP>"
],
"cytogeneticBand": "21p11.2",
"MyDataSource": [
{
"start": 10510818,
"end": 10699435,
"notes": "Interval #1",
"reciprocalOverlap": 0.99999,
"annotationOverlap": 0.99999
},
{
"start": 10510819,
"end": 10699435,
"notes": "Interval #2",
"reciprocalOverlap": 1,
"annotationOverlap": 1
}
],

Here's the full JSON file.

As expected, the variant and interval #2 have matching endpoints, therefore there is 100% overlap. Interval #1 technically starts 1 bp earlier, so its overlap 99.9%.

Further down the JSON file, we find the annotated translocation breakend:

      "variants": [
{
"vid": "22-12370388-T-T[chr22:12370729[",
"chromosome": "22",
"begin": 12370388,
"end": 12370388,
"isStructuralVariant": true,
"refAllele": "T",
"altAllele": "T[chr22:12370729[",
"variantType": "translocation_breakend",
"MyDataSource": {
"refAllele": "T",
"altAllele": "T[chr22:12370729[",
"notes": "Known false-positive"
}
}

Gene File Format

Basic Gene Example

Create the Custom Annotation TSV

Previously we looked at examples that either had small variants or genomic regions, however, sometimes we would like to add custom gene annotations. The gene custom annotation file format -looks slightly different:

Col 1Col 2Col 3Col 4
#title=MyDataSource
#geneSymbolgeneIdphenotypenotes
#categories...
#descriptions...
#type.stringstring
TP537157Colorectal cancer, hereditary nonpolyposis, type 5.
KRASENSG00000133703Mismatch repair cancer syndromeSeen in cohort 123

Here's the full TSV file.

Let's go over what's in this example:

  • Column 2 has the geneId field. This can be either an Entrez Gene ID or an Ensembl ID.
Gene Symbols

Gene symbols are always in flux and are being updated on a daily basis at the NCBI and at HGNC. Due to this, Nirvana uses the geneId to match genes rather than the gene symbol. However, to -make the custom annotation files easier to read, we've included the geneSymbol column as well.

Unknown Gene IDs

When Nirvana parses the gene custom annotation file, it will note any gene IDs that are currently not recognized in Nirvana. In such a case, Nirvana will display an error showing all the -unrecognized gene IDs.

Annotate with Nirvana

Let's use a VCF file that contain variants in TP53 and KRAS:

##fileformat=VCFv4.1
#CHROM POS ID REF ALT QUAL FILTER INFO
12 25227255 . A T . . .
17 7675074 . C A . . .

Here's the full VCF file.

Investigate the Results

  "genes": [
{
"name": "KRAS",
"clingenGeneValidity": [
{
"diseaseId": "MONDO_0009026",
"disease": "Costello syndrome",
"classification": "disputed",
"classificationDate": "2018-07-24"
}
],
"clingenDosageSensitivityMap": {
"haploinsufficiency": "no evidence to suggest that dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "no evidence to suggest that dosage sensitivity is associated with clinical phenotype"
},
"gnomAD": {
"pLi": 0.000788,
"pRec": 0.789,
"pNull": 0.21,
"synZ": 0.336,
"misZ": 2.32,
"loeuf": 1.24
},
"MyDataSource": {
"phenotype": "Mismatch repair cancer syndrome",
"notes": "Seen in cohort 123"
}
},

This is the abbreviated output for KRAS. Here's the full JSON file if you want to see the complete KRAS entry.

Customizing the Header

Title

For the title, you can provide any string that hasn't already been used. The title should be unique.

caution

Make sure that the title does not conflict with other keys in the JSON file.

For small variants, you can't provide a title that conflicts with other keys in the variant object. Some examples of this would be -vid, chromosome, transcripts, etc.. The title should also not conflict with other data source keys like clinvar or gnomad.

For structural variants, you can't provide a title that conflicts with other keys in the position object. Some examples of this would be -chromosome, svLength, cytogeneticBand, etc. The title should also not conflict with other data source keys like clingen or dgv.

caution

Care should be taken not to annotate using multiple custom annotations that all use the same title.

Genome Assemblies

The following genome assemblies can be specified:

  • GRCh37
  • GRCh38

Matching Criteria

The matching criteria instructs how Nirvana should match a VCF variant to the custom annotation.

The following matching criteria can be specified:

  • allele - use this when you only want allele-specific matches. This is commonly the case when using allele frequency data sources like gnomAD
  • position - use this when you want positional matches. This is commonly used with disease phenotype data sources like ClinVar
  • sv - use this when you want to match to all other overlapping SVs. This use case arose when we were adding custom annotations for baseline -copy number intervals along the genome.

Categories

Categories are not used by Nirvana, but are often used by downstream tools. Categories provide hints for how those tools should filter or display -the annotation data.

When a category is specified, Nirvana will provide additional validation for those fields. The following table describes each category:

CategoryDescriptionValidation
AlleleCountallele counts for a specific populationSee the supported populations below
AlleleNumberallele numbers for a specific populationSee the supported populations below
AlleleFrequencyallele frequencies for a specific populationSee the supported populations below
PredictionACMG-style pathogenicity classificationsbenign (B)
likely benign (LB)
VUS
likely pathogenic (LP)
pathogenic (P)
Filterfree text that signals downstream tools to add the column to the filterMax 20 characters
Descriptionfree-text descriptionMax 100 characters
Identifierany IDMax 50 characters
HomozygousCountcount of homozygous individuals for a specific populationSee the supported populations below
Scoreany score valueAny double-precision floating point number

Descriptions

Descriptions are used to add more context to the categories. For now, descriptions are mainly used to associate allele counts, numbers, and frequencies with their respective populations.

Populations

The following populations were specified in the HapMap project, 1000 Genomes Project, ExAC, and gnomAD.

Population CodeSuper-population CodeDescription
ACBAFRAfrican Caribbeans in Barbados
AFRAFRAfrican
ALLALLAll populations
AMRAMRAd Mixed American
ASJAshkenazi Jewish
ASWAFRAmericans of African Ancestry in SW USA
BEBSASBengali from Bangladesh
CDXEASChinese Dai in Xishuangbanna, China
CEUEURUtah Residents (CEPH) with Northern and Western European Ancestry
CHBEASHan Chinese in Beijing, China
CHSEASSouthern Han Chinese
CLMAMRColombians from Medellin, Colombia
EASEASEast Asian
ESNAFREsan in Nigeria
EUREUREuropean
FINEURFinnish in Finland
GBREURBritish in England and Scotland
GIHSASGujarati Indian from Houston, Texas
GWDAFRGambian in Western Divisions in the Gambia
IBSEURIberian population in Spain
ITUSASIndian Telugu from the UK
JPTEASJapanese in Tokyo, Japan
KHVEASKinh in Ho Chi Minh City, Vietnam
LWKAFRLuhya in Webuye, Kenya
MAGAFRMandinka in the Gambia
MKKAFRMaasai in Kinyawa, Kenya
MSLAFRMende in Sierra Leone
MXLAMRMexican Ancestry from Los Angeles, USA
NFEEUREuropean (Non-Finnish)
OTHOTHOther
PELAMRPeruvians from Lima, Peru
PJLSASPunjabi from Lahore, Pakistan
PURAMRPuerto Ricans from Puerto Rico
SASSASSouth Asian
STUSASSri Lankan Tamil from the UK
TSIEURToscani in Italia
YRIAFRYoruba in Ibadan, Nigeria

Data Types

Each custom annotation can be one of the following data types:

  • bool - true or false
  • number - any integer or floating-point number
  • string - text
tip

For boolean variables, only keys with a true value will be output to the JSON object.

Using SAUtils

Nirvana includes a tool called SAUtils that converts various data sources into Nirvana's native binary format. The sub-commands customvar and customgene are used to specify a variant file or a gene file respectively.

Convert Variant File

dotnet bin/Release/netcoreapp2.1/SAUtils.dll customvar \
-r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \
-i MyDataSource.tsv \
-o SupplementaryAnnotation
  • the -r argument specifies the compressed reference path
  • the -i argument specifies the input TSV path
  • the -o argument specifies the output directory

Convert Gene File

dotnet bin/Release/netcoreapp2.1/SAUtils.dll customgene \
-r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \
-c Data/Cache \
-i MyDataSource.tsv \
-o SupplementaryAnnotation
  • the -c argument specifies the Nirvana cache path
  • the -i argument specifies the input TSV path
  • the -o argument specifies the output directory
- - - - \ No newline at end of file diff --git a/3.21/file-formats/nirvana-json-file-format/index.html b/3.21/file-formats/nirvana-json-file-format/index.html deleted file mode 100644 index 49319252..00000000 --- a/3.21/file-formats/nirvana-json-file-format/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Nirvana JSON File Format | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

Nirvana JSON File Format

Overview

Conventions

In the Nirvana JSON representation, we try to maximize the amount of useful information that is relayed in the output file. As such, we have several conventions that are useful to know about:

  • With boolean key/value pairs, we only output the keys that have a true value. I.e. there's no reason to display "isStructuralVariant":false a few million times when annotating a small variant VCF.
  • When transferring data from the VCF file to the JSON (e.g. for allele depths (AD)), it is common to use a period (.) as a placeholder for missing data in the VCF file. Nirvana treats periods like empty or null strings and therefore will not output those entries.

JSON Layout

info

In general, each position corresponds to a row in the original VCF file.

For each gene that was referenced in the transcripts found in the positions section, there will be additional gene-level annotation in the gene section.

Parsing

info

We've put together a new section that discusses how to parse our JSON files easily using examples in a Python Jupyter notebook and a R version as well. In addition, we have information about how to quickly dump content from our JSON file using a tabix-like utility called JASIX.

{
"header":{
"annotator":"Nirvana 3.0.0-alpha.5+g6c52e247",
"creationTime":"2017-06-14 15:53:13",
"genomeAssembly":"GRCh37",
"dataSources":[
{
"name":"OMIM",
"version":"unknown",
"description":"An Online Catalog of Human Genes and Genetic Disorders",
"releaseDate":"2017-05-03"
},
{
"name":"VEP",
"version":"84",
"description":"BothRefSeqAndEnsembl",
"releaseDate":"2017-01-16"
},
{
"name":"ClinVar",
"version":"20170503",
"description":"A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence",
"releaseDate":"2017-05-03"
},
{
"name":"phyloP",
"version":"hg19",
"description":"46 way conservation score between humans and 45 other vertebrates",
"releaseDate":"2009-11-10"
}
],
"samples":[
"NA12878",
"NA12891",
"NA12892"
]
},
FieldTypeNotes
annotatorstringthe name of the annotator and the current version
creationTimestringyyyy-MM-dd hh:mm:ss
genomeAssemblystringsee possible values below
schemaVersionintegerincremented whenever the core structure of the JSON file introduces breaking changes
dataVersionstring
dataSourcesobject arraysee Data Source entry below
samplesstring arraythe order of these sample names will be used throughout the JSON file when enumerating samples

Data Source

FieldTypeNotes
namestring
versionstring
descriptionstringoptional description of the data source
releaseDatestringyyyy-MM-dd

Genome Assemblies

  • GRCh37
  • GRCh38
  • hg19
  • SARSCoV2

Positions

"positions":[
{
"chromosome":"chr2",
"position":48010488,
"repeatUnit":"GGCCCC",
"refRepeatCount":3,
"svEnd":48020488,
"refAllele":"G",
"altAlleles":[
"A",
"GT"
],
"quality":461,
"filters":[
"PASS"
],
"ciPos":[
-170,
170
],
"ciEnd":[
-175,
175
],
"svLength":1000,
"strandBias":1.23,
"jointSomaticNormalQuality":29,
"cytogeneticBand":"2p16.3",
FieldTypeVariant TypeNotes
chromosomestringallexactly as displayed in the vcf
positionintegerallexactly as displayed in the vcf (1-based notation). Range: 1 - 250 million
repeatUnitstringSTRprovided by ExpansionHunter
refRepeatCountintegerSTRprovided by ExpansionHunter
svEndintegerSV
refAllelestringallexactly as displayed in the vcf
altAllelestring arrayallexactly as displayed in the vcf
qualityfloatallexactly as displayed in the vcf (Normally an integer, but some variant callers using floating point. Has been observed as high as 500k)
filtersstring arrayallexactly as displayed in the vcf
ciPosinteger arraySV
ciEndinteger arraySV
svLengthintegerSV
strandBiasfloatsmall variantprovided by GATK (from SB)
jointSomaticNormalQualityintegerSVprovided by the Manta variant caller (SOMATICSCORE)
cytogeneticBandstringalle.g. 17p13.1

ClinGen

"clingen":[
{
"chromosome":"17",
"begin":525,
"end":14667519,
"variantType":"copy_number_gain",
"id":"nsv996083",
"clinicalInterpretation":"pathogenic",
"observedGains":1,
"validated":true,
"phenotypes":[
"Intrauterine growth retardation"
],
"phenotypeIds":[
"HP:0001511",
"MedGen:C1853481"
],
"reciprocalOverlap":0.00131
},
{
"chromosome":"17",
"begin":45835,
"end":7600330,
"variantType":"copy_number_loss",
"id":"nsv869419",
"clinicalInterpretation":"pathogenic",
"observedLosses":1,
"validated":true,
"phenotypes":[
"Developmental delay AND/OR other significant developmental or morphological phenotypes"
],
"reciprocalOverlap":0.00254
}
]
FieldTypeNotes
clingenobject array
chromosomestringEnsembl-style chromosome names
begininteger1-based position
endinteger1-based position
variantTypestringAny of the sequence alterations defined here.
idstringIdentifier from the data source. Alternatively a VID
clinicalInterpretationstringsee possible values below
observedGainsintegerRange: 0 - (231 - 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.
observedLossesintegerRange: 0 - (231 - 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.
validatedboolean
phenotypesstring arrayDescription of the phenotype.
phenotypeIdsstring arrayDescription of the phenotype IDs.
reciprocalOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).

clinicalInterpretation

  • benign
  • curated benign
  • curated pathogenic
  • likely benign
  • likely pathogenic
  • path gain
  • path loss
  • pathogenic
  • uncertain
"clingenDosageSensitivityMap": [{
"chromosome": "15",
"begin": 30900686,
"end": 32153204,
"haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",
"reciprocalOverlap": 0.00147,
"annotationOverlap": 0.33994
},
{
"chromosome": "15",
"begin": 31727418,
"end": 32153204,
"haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",
"triplosensitivity": "dosage sensitivity unlikely",
"reciprocalOverlap": 0.00147,
"annotationOverlap": 1
}]
FieldTypeNotes
clingenDosageSensitivityMapobject array
chromosomestringEnsembl-style chromosome names
begininteger1-based position
endinteger1-based position
haploinsufficiencystringsee possible values below
triplosensitivitystring(same as haploinsufficiency) 
reciprocalOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).
annotationOverlapfloating pointRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).

haploinsufficiency and triplosensitivity

  • no evidence to suggest that dosage sensitivity is associated with clinical phenotype
  • little evidence suggesting dosage sensitivity is associated with clinical phenotype
  • emerging evidence suggesting dosage sensitivity is associated with clinical phenotype
  • sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype
  • gene associated with autosomal recessive phenotype
  • dosage sensitivity unlikely

1000 Genomes (SV)

"oneKg":[
{
"chromosome":"1",
"begin":1595369,
"end":1612441,
"variantType": "copy_number_variation",
"id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",
"allAn": 5008,
"allAc": 2702,
"allAf": 0.539537,
"afrAf": 0.6052,
"amrAf": 0.3675,
"eurAf": 0.5357,
"easAf": 0.5368,
"sasAf": 0.5797,
"reciprocalOverlap": 0.07555
}
],
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring
idstring
allAnintegerallele number for all populations. Non-zero integer.
allAcintegerallele count for all populations. Integer.
allAffloating pointallele frequency for all populations. Range: 0 - 1.0
afrAffloating pointallele frequency for the African super population. Range: 0 - 1.0
amrAffloating pointallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
eurAffloating pointallele frequency for the European super population. Range: 0 - 1.0
easAffloating pointallele frequency for the East Asian super population. Range: 0 - 1.0
sasAffloating pointallele frequency for the South Asian super population. Range: 0 - 1.0
reciprocalOverlapfloating pointrange: 0 - 1.

gnomAD (SV)

"gnomAD-preview": [
{
"chromosome": "1",
"begin": 40001,
"end": 47200,
"variantId": "gnomAD-SV_v2.1_DUP_1_1",
"variantType": "duplication",
"failedFilter": true,
"allAf": 0.068963,
"afrAf": 0.135694,
"amrAf": 0.022876,
"easAf": 0.01101,
"eurAf": 0.007846,
"othAf": 0.017544,
"femaleAf": 0.065288,
"maleAf": 0.07255,
"allAc": 943,
"afrAc": 866,
"amrAc": 21,
"easAc": 17,
"eurAc": 37,
"othAc": 2,
"femaleAc": 442,
"maleAc": 499,
"allAn": 13674,
"afrAn": 6382,
"amrAn": 918,
"easAn": 1544,
"eurAn": 4716,
"othAn": 114,
"femaleAn": 6770,
"maleAn": 6878,
"allHc": 91,
"afrHc": 90,
"amrHc": 1,
"easHc": 0,
"eurHc": 0,
"othHc": 55,
"femaleHc": 44,
"maleHc": 47,
"reciprocalOverlap": 0.01839,
"annotationOverlap": 0.16667
}
]

FieldTypeNotes
chromosomestringchromosome number
beginintegerposition interval start
endintegerposition internal end
variantTypestringstructural variant type
variantIdstringgnomAD ID
allAffloating pointallele frequency for all populations. Range: 0 - 1.0
afrAffloating pointallele frequency for the African super population. Range: 0 - 1.0
amrAffloating pointallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
easAffloating pointallele frequency for the East Asian super population. Range: 0 - 1.0
eurAffloating pointallele frequency for the European super population. Range: 0 - 1.0
othAffloating pointallele frequency for all other populations. Range: 0 - 1.0
femaleAffloating pointallele frequency for female population. Range: 0 - 1.0
maleAffloating pointallele frequency for male population. Range: 0 - 1.0
allAcintegerallele count for all populations.
afrAcintegerallele count for the African super population.
amrAcintegerallele count for the Ad Mixed American super population.
easAcintegerallele count for the East Asian super population.
eurAcintegerallele count for the European super population.
othAcintegerallele count for all other populations.
maleAcintegerallele count for male population.
femaleAcintegerallele count for female population.
allAnintegerallele number for all populations.
afrAnintegerallele number for the African super population.
amrAnintegerallele number for the Ad Mixed American super population.
easAnintegerallele number for the East Asian super population.
eurAnintegerallele number for the European super population.
othAnintegerallele number for all other populations.
femaleAnintegerallele number for female population.
maleAnintegerallele number for male population.
allHcintegercount of homozygous individuals for all populations.
afrHcintegercount of homozygous individuals for the African / African American population.
amrHcintegercount of homozygous individuals for the Latino population.
easHcintegercount of homozygous individuals for the East Asian population.
eurAcintegercount of homozygous individuals for the European super population.
othHcintegercount of homozygous individuals for all other populations.
maleHcintegercount of homozygous individuals for male population.
femaleHcintegercount of homozygous individuals for female population.
failedFilterbooleanTrue if this variant failed any filters (Note: we do not list the failed filters)
reciprocalOverlapfloating pointReciprocal overlap. Range: 0 - 1.0
annotationOverlapfloating pointReciprocal overlap. Range: 0 - 1.0

Note: Following fields are not available in GRCh38 because the source file does not contain this information:

Field
femaleAf
maleAf
maleAc
femaleAc
femaleAn
maleAn
allHc
afrHc
amrHc
easHc
eurAc
othHc
maleHc
femaleHc
failedFilter

MITOMAP (SV)

"mitomap":[ 
{
"chromosome":"MT",
"begin":3166,
"end":14152,
"variantType":"deletion",
"reciprocalOverlap":0.18068,
"annotationOverlap":0.42405
}
]
FieldTypeNotes
chromosomestring
begininteger
endinteger
variantTypestring array
reciprocalOverlapfloatRange: 0 - 1. Specified up to 5 decimal places
annotationOverlapfloatRange: 0 - 1. Specified up to 5 decimal places

Samples

"samples":[
{
"genotype":"0/1",
"variantFrequencies":[
0.333,
0.5
],
"totalDepth":57,
"genotypeQuality":12,
"copyNumber":3,
"repeatUnitCounts":[
10,
20
],
"alleleDepths":[
10,
20,
30
],
"failedFilter":true,
"splitReadCounts":[
10,
20
],
"pairedEndReadCounts":[
10,
20
],
"isDeNovo":true,
"diseaseAffectedStatuses":[
"-"
],
"artifactAdjustedQualityScore":89.3,
"likelihoodRatioQualityScore":78.2,
"heteroplasmyPercentile":[
23.13,
12.65
]
}
]
FieldTypeVCFNotes
genotypestringGT
variantFrequenciesfloat arrayVF, ADrange: 0 - 1.0. One value per alternate allele
totalDepthintegerDPnon-negative integer values
genotypeQualityintegerGQnon-negative integer values. Typically maxes out at 99
copyNumberintegerCNnon-negative integer values
minorHaplotypeCopyNumberintegerMCNnon-negative integer values
repeatUnitCountsinteger arrayREPCNExpansionHunter-specific
alleleDepthsinteger arrayADnon-negative integer values
failedFilterboolFT
splitReadCountsinteger arraySRManta-specific
pairedEndReadCountsinteger arrayPRManta-specific
isDeNovoboolDN
deNovoQualityfloatDQ
diseaseAffectedStatusesstring arrayDSTExpansionHunter-specific
artifactAdjustedQualityScorefloatAQPEPE-specific. Range: 0 - 100.0
likelihoodRatioQualityScorefloatLQPEPE-specific. Range: 0 - 100.0
lossOfHeterozygosityboolCN, MCN
somaticQualityfloatSQ
heteroplasmyPercentilefloatVFrange: 0 - 100. 2 decimal places. One value per alternate allele
binCountintegerBCnon-negative integer values
Empty Samples

If a sample does not contain any entries, we will create a sample object that contains the isEmpty key. This ensures that sample ordering is preserved while indicating that a sample is intentionally empty.

"samples":[
{
"isEmpty":true
}
],

Variants

"variants":[
{
"vid":"2:48010488:A",
"chromosome":"chr2",
"begin":48010488,
"end":48010488,
"isReferenceMinorAllele":true,
"isStructuralVariant":true,
"refAllele":"G",
"altAllele":"A",
"variantType":"SNV",
"isDecomposedVariant":true,
"isRecomposedVariant":true,
"linkedVids":["2:48010488:GTA:ATC"],
"hgvsg":"NC_000002.11:g.48010488G>A",
"phylopScore":0.459
FieldTypeNotes
vidstringsee Variant Identifiers
chromosomestring
beginint1-based non-negative integer values. Range: 1 - 250 million
endint1-based non-negative integer values. Range: 1 - 250 million
isReferenceMinorAllelebooltrue when this is a reference minor allele
isStructuralVariantbooltrue when the variant is a structural variant
inLowComplexityRegionbooltrue when the variant lies in a low complexity region (gnomAD low complexity regions)
refAllelestringparsimonious representation of the reference allele
altAllelestringparsimonious representation of the alternate allele.
variantTypestringuses Sequence Ontology sequence alterations
isDecomposedVariantbooltrue when the decomposed variant has been used to create another recomposed variant
isRecomposedVariantbooltrue when the variant is recomposed from two or more decomposed variants
linkedVidsstring arraylist of VIDs for variants connecting decomposed and recomposed variants
hgvsgstringHGVS g. notation
phylopScorefloatphyloP conservation score. Range: -14.08 to 6.424
Reference Minor Alleles

Nirvana supports annotating reference minor alleles. In such a case, refAllele will be replaced by the global major allele and altAllele will be replaced with the original reference allele.

Flagging Decomposed & Recomposed Variants

When two or more decomposed variants are recomposed into an MNV, the decomposed variants will be marked with "isDecomposedVariant":true.

Similarly, the recomposed variant will be shown as a new VCF position. This recomposed variant will be flagged with "isRecomposedVariant":true.

Transcripts

"transcripts":[
{
"transcript":"ENST00000445503.1",
"source":"Ensembl",
"bioType":"nonsense_mediated_decay",
"codons":"gGg/gAg",
"aminoAcids":"G/E",
"cdnaPos":"268",
"cdsPos":"116",
"exons":"1/9",
"introns":"1/8",
"proteinPos":"39",
"geneId":"ENSG00000116062",
"hgnc":"MSH6",
"consequence":[
"missense_variant",
"NMD_transcript_variant"
],
"hgvsc":"ENST00000445503.1:c.116G>A",
"hgvsp":"ENSP00000405294.1:p.(Gly39Glu)",
"geneFusion":{
"exon":6,
"intron":5,
"fusions":[
{
"hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000437180.1}:c.58+568_1443",
"exon":3,
"intron":2
},
{
"hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000300305.3}:c.58+568_1443",
"exon":2,
"intron":1
}
]
},
"isCanonical":true,
"polyPhenScore":0.95,
"polyPhenPrediction":"probably damaging",
"proteinId":"ENSP00000405294.1",
"siftScore":0.61,
"siftPrediction":"tolerated",
"completeOverlap":true
}
]
FieldTypeNotes
transcriptstringtranscript ID. e.g. ENST00000445503.1
sourcestringRefSeq / Ensembl
bioTypestringdescriptions of the biotypes from Ensembl
codonsstring
aminoAcidsstring
cdnaPosstring
cdsPosstring
exonsstringexons affected by the variant
intronsstringintrons affected by the variant
proteinPosstring
geneIdstringgene ID. e.g. ENSG00000116062
hgncstringgene symbol. e.g. MSH6
consequencestring arraySequence Ontology Consequences
hgvscstringHGVS coding nomenclature
hgvspstringHGVS protein nomenclature
geneFusionobjectsee Gene Fusions entry below
isCanonicalbooltrue when this is a canonical transcript
polyPhenScorefloatrange: 0 - 1.0
polyPhenPredictionstringsee possible values below
proteinIdstringprotein ID. E.g. ENSP00000405294.1
siftScorefloatrange: 0 - 1.0
siftPredictionstringsee possible values below
completeOverlapbooltrue when this transcript is completely overlapped by the variant
cancerHotspotsstring arraysee Cancer Hotspots entry below

PolyPhen

  • probably damaging
  • possibly damaging
  • benign
  • unknown

SIFT

  • tolerated
  • deleterious
  • tolerated - low confidence
  • deleterious - low confidence

Amino Acid Conservation

"aminoAcidConservation": {
"scores": [0.34]
}
FieldTypeNotes
aminoAcidConservationobject
scoresobject array of doublespercent conserved with respect to human amino acid residue. Range: 0.01 - 1.00

Gene Fusions

FieldTypeNotes
exonintactual exon where the breakpoint was located
intronintactual intron where the breakpoint was located
fusionsobject arraysee Fusion entry below

Fusion

FieldTypeNotes
exonintactual exon where the other breakpoint was located
intronintactual intron where the other breakpoint was located
hgvscstringHGVS coding nomenclature describing the two genes and the transcripts that are fused along with

Cancer Hotspots

FieldTypeNotes
residuestring
numSamplesinthow many samples are associated with a variant at the same amino acid position
numAltAminoAcidSamplesinthow many samples are associated with a variant with the same position and alternate amino acid position
qValuedouble

Regulatory Regions

"regulatoryRegions":[
{
"id":"ENSR00001542175",
"type":"promoter",
"consequence":[
"regulatory_region_variant"
]
}
]
FieldTypeNotes
idstring
typestringsee possible values below
consequencestring arraysee possible values below

Regulatory Types

  • CTCF_binding_site
  • enhancer
  • open_chromatin_region
  • promoter
  • promoter_flanking_region
  • TF_binding_site

Regulatory Consequences

  • regulatory_region_variant
  • regulatory_region_ablation
  • regulatory_region_amplification
  • regulatory_region_truncation

ClinVar

small variants:

"clinvar":[
{
"id":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"significance":[
"benign"
],
"refAllele":"G",
"altAllele":"A",
"lastUpdatedDate":"2020-03-01",
"isAlleleSpecific":true
},
{
"id":"RCV000030258.4",
"variationId":"VCV000036581.3",
"reviewStatus":"reviewed by expert panel",
"alleleOrigins":[
"germline"
],
"refAllele":"G",
"altAllele":"A",
"phenotypes":[
"Lynch syndrome"
],
"medGenIds":[
"C1333990"
],
"omimIds":[
"120435"
],
"significance":[
"benign"
],
"lastUpdatedDate":"2017-05-01",
"isAlleleSpecific":true
}
]

large variants:

"clinvar":[
{
"chromosome":"1",
"begin":629025,
"end":8537745,
"variantType":"copy_number_loss",
"id":"RCV000051993.4",
"variationId":"VCV000058242.1",
"reviewStatus":"criteria provided, single submitter",
"alleleOrigins":[
"not provided"
],
"phenotypes":[
"See cases"
],
"significance":[
"pathogenic"
],
"lastUpdatedDate":"2022-04-21",
"pubMedIds":[
"21844811"
]
},
{
"id":"VCV000058242.1",
"reviewStatus":"criteria provided, single submitter",
"significance":[
"pathogenic"
],
"lastUpdatedDate":"2022-04-21"
},
......
]
FieldTypeNotes
idstringClinVar ID
variationIdstringClinVar VCV ID
variantTypestringvariant type
reviewStatusstringsee possible values below
alleleOriginsstring arraysee possible values below
refAllelestring
altAllelestring
phenotypesstring array
medGenIdsstring arrayMedGen IDs
omimIdsstring arrayOMIM IDs
orphanetIdsstring arrayOrphanet IDs
significancestring arraysee possible values below
lastUpdatedDatestringyyyy-MM-dd
pubMedIdsstring arrayPubMed IDs
isAlleleSpecificbooltrue when the current variant alternate allele matches the ClinVar alternate allele

reviewStatus:

  • no assertion provided
  • no assertion criteria provided
  • criteria provided, single submitter
  • practice guideline
  • classified by multiple submitters
  • criteria provided, conflicting interpretations
  • criteria provided, multiple submitters, no conflicts
  • no interpretation for the single variant

alleleOrigins:

  • unknown
  • other
  • germline
  • somatic
  • inherited
  • paternal
  • maternal
  • de-novo
  • biparental
  • uniparental
  • not-tested
  • tested-inconclusive

significance:

  • uncertain significance
  • not provided
  • benign
  • likely benign
  • likely pathogenic
  • pathogenic
  • drug response
  • histocompatibility
  • association
  • risk factor
  • protective
  • affects
  • conflicting data from submitters
  • other
  • no interpretation for the single variant
  • conflicting interpretations of pathogenicity

1000 Genomes

"oneKg":{
"allAf":0.200879,
"afrAf":0.210287,
"amrAf":0.139769,
"easAf":0.275794,
"eurAf":0.181909,
"sasAf":0.173824,
"allAn":5008,
"afrAn":1322,
"amrAn":694,
"easAn":1008,
"eurAn":1006,
"sasAn":978,
"allAc":1006,
"afrAc":278,
"amrAc":97,
"easAc":278,
"eurAc":183,
"sasAc":170
}
FieldTypeNotes
allAffloatallele frequency for all populations. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
allAnintallele number for all populations. Non-zero integer.
afrAffloatallele frequency for the African super population. Range: 0 - 1.0
afrAcintallele count for the African super population. Integer.
afrAnintallele number for the African super population. Non-zero integer.
amrAffloatallele frequency for the Ad Mixed American super population. Range: 0 - 1.0
amrAcintallele count for the Ad Mixed American super population. Integer.
amrAnintallele number for the Ad Mixed American super population. Non-zero integer.
easAffloatallele frequency for the East Asian super population. Range: 0 - 1.0
easAcintallele count for the East Asian super population. Integer.
easAnintallele number for the East Asian super population. Non-zero integer.
eurAffloatallele frequency for the European super population. Range: 0 - 1.0
eurAcintallele count for the European super population. Integer.
eurAnintallele number for the European super population. Non-zero integer.
sasAffloatallele frequency for the South Asian super population. Range: 0 - 1.0
sasAcintallele count for the South Asian super population. Integer.
sasAnintallele number for the South Asian super population. Non-zero integer.

DANN

"dannScore": 0.27
FieldTypeNotes
dannScorefloatRange: 0 - 1.0

dbSNP

"dbsnp":[
"rs1042821"
]
FieldTypeNotes
dbsnpstring arraydbSNP rsIDs

DECIPHER

"decipher":[
{
"chromosome":"1",
"begin":13516,
"end":91073,
"numDeletions":27,
"deletionFrequency":0.675,
"numDuplications":27,
"duplicationFrequency":0.675,
"sampleSize":40,
"reciprocalOverlap": 0.27555,
"annotationOverlap": 0.5901
}
],
FieldTypeNotes
chromosomeintEnsembl-style chromosome names
beginint1-based position
endint1-based position
numDeletionsint# of observed deletions
deletionFrequencyfloatdeletion frequency
numDuplicationsint# of observed duplications
duplicationFrequencyfloatduplication frequency
sampleSizeinttotal # of samples
reciprocalOverlapfloatRange: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap
annotationOverlapfloatRange: 0 - 1. E.g. 0.57 would indicate a 57% annotation overlap

GERP

"gerpScore": 1.27
FieldTypeNotes
gerpScorefloatRange: -∞ to +∞

GME Variome

"gmeVariome":{
"allAc":10,
"allAn":202,
"allAf":0.049504,
"failedFilter":true
}
FieldTypeNotes
allAcintGME allele count
allAnintGME allele number
allAffloatGME allele frequency
failedFilterboolTrue if this variant failed any filters

gnomAD

"gnomad":{ 
"coverage":20,
"allAf":0.190317,
"maleAf":0.193,
"femaleAf": 0.1935,
"afrAf":0.222876,
"amrAf":0.121394,
"easAf":0.239802,
"finAf":0.136833,
"nfeAf":0.181282,
"asjAf":0.258278,
"othAf":0.186094,
"allAn":30796,
"maleAn":15096,
"femaleAn":15700
"afrAn":8664,
"amrAn":832,
"easAn":1618,
"finAn":3486,
"nfeAn":14916,
"asjAn":302,
"othAn":978,
"allAc":5861,
"maleAc":2930,
"femaleAc": 2931,
"afrAc":1931,
"amrAc":101,
"easAc":388,
"finAc":477,
"nfeAc":2704,
"asjAc":78,
"othAc":182,
"allHc":561,
"afrHc":208,
"amrHc":6,
"easHc":42,
"finHc":31,
"nfeHc":242,
"asjHc":13,
"othHc":19,
"maleHc":280,
"femaleHc":281,
"controlsAllAf":0.190317,
"controlsAllAn":30796,
"controlsAllAc":5861,
"lowComplexityRegion":true,
"failedFilter":true
}
FieldTypeNotes
coverageintaverage coverage (non-negative integer values)
allAffloatallele frequency for all populations. Range: 0 - 1.0
maleAffloatallele frequency for male population. Range: 0 - 1.0
femaleAffloatallele frequency for female population. Range: 0 - 1.0
controlsAllAffloatallele frequency for the controls subset. Range: 0 - 1.0
allAcintallele count for all populations. Integer.
maleAcintallele count for male population. Integer.
femaleAcintallele count for female population. Integer.
controlsAllAcintallele count for the controls subset. Integer.
allAnintallele number for all populations. Non-zero integer.
maleAnintallele number for male population. Non-zero integer.
femaleAnintallele number for female population. Non-zero integer.
controlsAllAnintallele number for the controls subset. Non-zero integer.
allHcintcount of homozygous individuals for all populations. Non-negative integer.
maleHcintcount of homozygous individuals for male population. Non-negative integer.
femaleHcintcount of homozygous individuals for female population. Non-negative integer.
afrAffloatallele frequency for the African / African American population. Range: 0 - 1.0
afrAcintallele count for the African / African American population. Integer.
afrAnintallele number for the African / African American population. Non-zero integer.
afrHcintcount of homozygous individuals for African / African American population. Non-negative integer.
amrAffloatallele frequency for the Latino population. Range: 0 - 1.0
amrAcintallele count for the Latino population. Integer.
amrAnintallele number for the Latino population. Non-zero integer.
amrHcintcount of homozygous individuals for Latino population. Non-negative integer.
easAffloatallele frequency for the East Asian population. Range: 0 - 1.0
easAcintallele count for the East Asian population. Integer.
easAnintallele number for the East Asian population. Non-zero integer.
easHcintcount of homozygous individuals for East Asian population. Non-negative integer.
finAffloatallele frequency for the Finnish population. Range: 0 - 1.0
finAcintallele count for the Finnish population. Integer.
finAnintallele number for the Finnish population. Non-zero integer.
finHcintcount of homozygous individuals for Finnish population. Non-negative integer
nfeAffloatallele frequency for the Non-Finnish European population. Range: 0 - 1.0
nfeAcintallele count for the Non-Finnish European population. Integer.
nfeAnintallele number for the Non-Finnish European population. Non-zero integer.
nfeHcintcount of homozygous individuals for Non-Finnish European population. Non-negative integer
othAffloatallele frequency for the Other population. Range: 0 - 1.0
othAcintallele count for the Other population. Integer.
othAnintallele number for the Other population. Non-zero integer.
othHcintcount of homozygous individuals for Other population. Non-negative integer
asjAffloatallele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0
asjAcintallele count for the Ashkenazi Jewish population Integer.
asjAnintallele number for the Ashkenazi Jewish population. Non-zero integer.
asjHcintcount of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer
sasAffloatallele frequency for the South Asian population. Range: 0 - 1.0
sasAcintallele count for the South Asian population Integer.
sasAnintallele number for the South Asian population. Non-zero integer.
sasHcintcount of homozygous individuals for the South Asian population. Non-negative integer.
failedFilterboolTrue if this variant failed any filters (Note: we do not list the failed filters)
lowComplexityRegionboolTrue if this variant is located in a low complexity region.

MITOMAP

"mitomap":[ 
{
"refAllele":"G",
"altAllele":"A",
"diseases":[
"Bipolar disorder",
"Melanoma"
],
"hasHomoplasmy":false,
"hasHeteroplasmy":true,
"status":"Reported",
"clinicalSignificance":"confirmed pathogenic",
"scorePercentile":83.30,
"numGenBankFullLengthSeqs":2,
"pubMedIds":["2316527","6299878","6301949"],
"isAlleleSpecific":true
}
]
FieldTypeNotes
refAllelestring
altAllelestring
diseasesstring arrayassociated diseases
hasHomoplasmyboolean
hasHeteroplasmyboolean
statusstringrecord status
clinicalSignificancestringpredicted pathogenicity
scorePercentilefloatMitoTIP score
numGenBankFullLengthSeqsinteger# of GenBank full-length sequences
pubMedIdsstring array
isAlleleSpecificbooleantrue when the current variant alternate allele matches the MITOMAP alternate allele

Primate AI

"primateAI":[
{
"hgnc":"TP53",
"scorePercentile":0.3,
}
]
FieldTypeNotes
hgncstring
scorePercentilefloatrange: 0 - 1.0

REVEL

"revel":{ 
"score":0.027
}
FieldTypeNotes
scorefloatRange: 0 - 1.0

Splice AI

"spliceAI":[ 
{
"hgnc":"BLCAP",
"acceptorGainDistance":-3,
"acceptorGainScore":0.3,
"donorLossDistance":7,
"donorLossScore":0.9
},
{
"hgnc":"NNAT",
"acceptorGainDistance":-1,
"acceptorGainScore":0.2,
"donorGainDistance":-2,
"donorGainScore":0.3
}
]
FieldTypeNotes
hgncstringHGNC gene symbol
acceptorGainDistanceint± bp from current position
acceptorGainScorefloatrange: 0 - 1.0. 1 decimal place
acceptorLossDistanceint± bp from current position
acceptorLossScorefloatrange: 0 - 1.0. 1 decimal place
donorGainDistanceint± bp from current position
donorGainScorefloatrange: 0 - 1.0. 1 decimal place
donorLossDistanceint± bp from current position
donorLossScorefloatrange: 0 - 1.0. 1 decimal place

TOPMed

"topmed":{ 
"allAc":20,
"allAn":125568,
"allAf":0.000159,
"allHc":0,
"failedFilter":true
}
FieldTypeNotes
allAcintTOPMed allele count
allAnintTOPMed allele number. Non-zero integer.
allAffloatTOPMed allele frequency (computed by Nirvana)
allHcintTOPMed homozygous count
failedFilterboolTrue if this variant failed any filters

Genes

Nirvana repots gene annotations for all genes that have an overlapping variant with the exception of flanking variants (i.e. variants that only cause upstream_gene_variant or downstream_gene_variant).

"genes":[
{
"name":"MSH6",
"hgncId":7329,
"summary":"This gene encodes a member of the DNA mismatch repair MutS family. In E. coli, the MutS protein helps in the recognition of mismatched nucleotides prior to their repair. A highly conserved region of approximately 150 aa, called the Walker-A adenine nucleotide binding motif, exists in MutS homologs. The encoded protein heterodimerizes with MSH2 to form a mismatch recognition complex that functions as a bidirectional molecular switch that exchanges ADP and ATP as DNA mismatches are bound and dissociated. Mutations in this gene may be associated with hereditary nonpolyposis colon cancer, colorectal cancer, and endometrial cancer. Transcripts variants encoding different isoforms have been described. [provided by RefSeq, Jul 2013]",
/* this is where gene-level data sources can be found e.g. OMIM */
}
]
FieldTypeNotes
namestringHGNC gene symbol
hgncIdintHGNC ID
summarystringshort description of the gene from OMIM

OMIM

"omim":[ 
{
"mimNumber":600678,
"geneName":"MutS, E. coli, homolog of, 6",
"description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",
"phenotypes":[
{
"mimNumber":614350,
"phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",
"description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal dominant"
]
},
{
"mimNumber":608089,
"phenotype":"Endometrial cancer, familial",
"mapping":"molecular basis of the disorder is known"
},
{
"mimNumber":276300,
"phenotype":"Mismatch repair cancer syndrome",
"description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",
"mapping":"molecular basis of the disorder is known",
"inheritances":[
"Autosomal recessive"
],
"comments" : [
"contribute to susceptibility to multifactorial disorders or to susceptibility to infection",
"unconfirmed or possibly spurious mapping"
]
}
]
}
]
FieldTypeNotes
mimNumberintOMIM ID for gene
geneNamestringgene name
descriptionstring
phenotypesobject arraysee Phenotype entry below

Phenotype

FieldTypeNotes
mimNumberint
phenotypestring
descriptionstring
mappingstringsee possible values below
inheritancestring arraysee possible values below
commentsstring arraysee possible values below

Mapping

  1. disorder was positioned by mapping of the wild type gene
  2. disease phenotype itself was mapped
  3. molecular basis of the disorder is known
  4. disorder is a chromosome deletion or duplication syndrome

Inheritance

  • autosomal recessive
  • autosomal dominant

Comments

  • contributes to the susceptibility to multifactorial disorders
  • variations that lead to apparently abnormal laboratory test values
  • unconfirmed mapping

gnomAD LoF Gene Metrics

"gnomAD":{ 
"pLi":1.00e0,
"pNull":8.94e-40,
"pRec":1.84e-16,
"synZ":-8.44e-2,
"misZ":5.96e-1,
"loeuf":1.13e0
}
FieldTypeNotes
pLifloatprobability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)
pNullfloatprobability of being completely tolerant of loss of function variation (observed = expected)
pRecfloatprobability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)
synZfloatcorrected synonymous Z score
misZfloatcorrected missense Z score
loeuffloatloss of function observed/expected upper bound fraction (LOEUF)

ClinGen Disease Validity

"clingenGeneValidity":[
{
"diseaseId":"MONDO_0007893",
"disease":"Noonan syndrome with multiple lentigines",
"classification":"no reported evidence",
"classificationDate":"2018-06-07"
},
{
"diseaseId":"MONDO_0015280",
"disease":"cardiofaciocutaneous syndrome",
"classification":"no reported evidence",
"classificationDate":"2018-06-07"
}
]
FieldTypeNotes
clingenGeneValidityobject
diseaseIdstringMonarch Disease Ontology ID (MONDO)
diseasestringdisease label
classificationstringsee below for possible values
classificationDatestringyyyy-MM-dd

classification

  • no reported evidence
  • disputed
  • limited
  • moderate
  • definitive
  • strong
  • refuted
  • no known disease relationship

COSMIC Cancer Gene Census

   {
"name": "PRDM16",
"hgncId": 14000,
"ncbiGeneId": "63976",
"ensemblGeneId": "ENSG00000142611",
"cosmic": {
"roleInCancer": [
"oncogene",
"fusion"
]
}
}
FieldTypeNotes
roleInCancerstring arrayPossible roles in caner
- - - - \ No newline at end of file diff --git a/3.21/index.html b/3.21/index.html deleted file mode 100644 index 16f7412f..00000000 --- a/3.21/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Introduction | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

Nirvana provides clinical-grade annotation of genomic variants (SNVs, MNVs, insertions, deletions, indels, STRs, gene fusions, and SVs (including CNVs). It can be run as a stand-alone package, as an AWS Lambda function, or integrated into larger software tools that require variant annotation.

The input to Nirvana are VCFs and the output is a structured JSON representation of all annotation and sample information (as extracted from the VCF). Nirvana handles multiple alternate alleles and multiple samples with ease.

The software is being developed under a rigorous SDLC and testing process to ensure accuracy of the results and enable embedding in other software with regulatory needs. Nirvana uses a continuous integration pipeline where millions of variant annotations are monitored against baseline values daily.

Fun Fact

Nirvana is a backronym for NImble and Robust VAriant aNnotAtor

What does Nirvana annotate?

We use Sequence Ontology consequences to describe how each variant impacts a given transcript:

In addition, we also use external data sources to provide additional context for each variant:

Licensing

Code

Nirvana source code is provided under the GPLv3 license. Nirvana includes several third party packages provided under other open source licenses, please see Dependencies for additional details.

Data

The data used by Nirvana is publicly available, however some data sources have special restrictions on use by non-academic entities.

Nirvana Team

Active Team

The Nirvana team works on the core functionality, AWS annotation services, in addition to keeping the annotation data sources up-to-date.

Current members of the Nirvana team are listed in alphabetical order below.

Fahd Siddiqui

Joined our team back in December 2021 and brings even more cloud and ML experience to our team.

Joseph Platzer

Test Lead. Joins Nirvana with a history of building sequencing tools and keeping the customer first.

Michael Strömberg

Nirvana founder and now ever grateful Nirvana cheerleader to those who actually write code for it.

Ningxin Ouyang

Our newest addition to the team with a wealth of experience in transcript factor footprinting.

Rajat Shuvro Roy

Lead developer. Loves to speed up things and make services available to all interested users.

Honorary Alumni

Nirvana would never be what it is today without the huge contributions from these folks who have moved on to bigger and greater things.

Haochen Li

Detail-oriented quick thinker that keeps cool even in the most stressful situations. Now working as a Senior Bioinformatics Data Scientist at GRAIL.

Julien Lajugie

Julien is a legend around these parts. When he's not taking down opponents in Taekwondo or melting riffs in his rock band, he's demolishing bugs and making the world a better place.

Shuli Kang

Oncology bioinformatician from USC before joining our team at Illumina. Now working as a Senior Bioinformatics Scientist at Novartis Gene Therapies.

Yu Jiang

Biostatistics genius from Duke University before joining our team at Illumina. Now working as a Research Engineer at Facebook AI Research.
- - - - \ No newline at end of file diff --git a/3.21/introduction/covid19/index.html b/3.21/introduction/covid19/index.html deleted file mode 100644 index 3df56081..00000000 --- a/3.21/introduction/covid19/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Annotating COVID-19 | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

Annotating COVID-19

The Nirvana development team is mainly focused on providing annotations for the human genome. This focus allows us to maximize our resources towards understanding human health.

However, nothing in our architecture prevents us from supporting other genomes. Earlier this year, we had an opportunity to put that statement to the test - we added support for annotating the SARS-CoV-2 genome, the virus that causes the COVID-19 disease.

In addition to normal transcript annotation, we also supply:

  • allele frequencies
  • protein domains
SARS-CoV-2 Galaxy Project

The allele frequencies used by Nirvana were provided by the SARS-CoV-2 Galaxy Project. This is an international effort that provides ongoing analysis of COVID-19 using Galaxy, BioConda, and public research infrastructures.

Getting Nirvana

If you don't have Nirvana already, please consult our Getting Started page first.

Downloading the COVID-19 data files

Here's a data zip file containing new gene models, reference, and external data sources for SARS-CoV-2:

Just go to the directory that contains your Nirvana Data directory.

cd ~/Nirvana
curl -O https://illumina.github.io/NirvanaDocumentation/files/Covid19Data.zip
unzip Covid19Data.zip

Download a COVID-19 VCF file

Here's a COVID-19 VCF file you can play around with:

curl -O https://illumina.github.io/NirvanaDocumentation/files/Covid19Mutations.vcf.gz

Running Nirvana

Once you have downloaded the data sets, use the following command to annotate your VCF:

dotnet bin/Release/netcoreapp2.1/Nirvana.dll \
-c Data/Cache/SARS-CoV-2/SARS-CoV-2 \
--sd Data/SupplementaryAnnotation/SARS-CoV-2 \
-r Data/References/SARS-CoV-2.ASM985889v3.dat \
-i Covid19Mutations.vcf.gz \
-o Covid19Mutations
  • the -c argument specifies the cache prefix
  • the --sd argument specifies the supplementary annotation directory
  • the -r argument specifies the compressed reference path
  • the -i argument specifies the input VCF path
  • the -o argument specifies the output filename prefix

When running Nirvana, performance metrics are shown as it evaluates each chromosome in the input VCF file:

---------------------------------------------------------------------------
Nirvana (c) 2020 Illumina, Inc.
Stromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0
---------------------------------------------------------------------------

Initialization Time Positions/s
---------------------------------------------------------------------------
Cache 00:00:00.0
SA Position Scan 00:00:00.0 1763

Reference Preload Annotation Variants/s
---------------------------------------------------------------------------
NC_045512 00:00:00.0 00:00:00.1 173

Summary Time Percent
---------------------------------------------------------------------------
Initialization 00:00:00.0 2.0 %
Preload 00:00:00.0 0.3 %
Annotation 00:00:00.1 6.0 %

Time: 00:00:01.5

The output will be a JSON file called Covid19Mutations.json.gz. Here's the full JSON file.

Investigating the Results

Here's an example of what a COVID-19 variant looks like in the JSON output:

{
"chromosome":"NC_045512.2",
"position":27323,
"refAllele":"C",
"altAlleles":[
"T"
],
"filters":[
"PASS"
],
"proteinDomains":[
{
"start":27202,
"end":27384,
"proteinId":"YP_009724394.1",
"domainId":"cl13556",
"domainName":"Sars6 super family",
"reciprocalOverlap":0.00546,
"annotationOverlap":0.00546
}
],
"variants":[
{
"vid":"NC_045512.2-27323-C-T",
"chromosome":"NC_045512.2",
"begin":27323,
"end":27323,
"refAllele":"C",
"altAllele":"T",
"variantType":"SNV",
"hgvsg":"NC_045512.2:g.27323C>T",
"alleleFrequency":{
"refAllele":"C",
"altAllele":"T",
"allAc":8,
"allAn":1058,
"allAf":0.007561
},
"transcripts":[
{
"transcript":"YP_009724394.1",
"source":"RefSeq",
"bioType":"protein_coding",
"codons":"tCt/tTt",
"aminoAcids":"S/F",
"cdnaPos":"122",
"cdsPos":"122",
"exons":"1/1",
"proteinPos":"41",
"geneId":"43740572",
"hgnc":"ORF6",
"consequence":[
"missense_variant"
],
"hgvsc":"YP_009724394.1:c.122C>T",
"hgvsp":"YP_009724394.1:p.(Ser41Phe)",
"proteinId":"YP_009724394.1"
},
{
"transcript":"YP_009724395.1",
"source":"RefSeq",
"bioType":"protein_coding",
"geneId":"43740573",
"hgnc":"ORF7a",
"consequence":[
"upstream_gene_variant"
],
"proteinId":"YP_009724395.1"
}
]
}
]
}
- - - - \ No newline at end of file diff --git a/3.21/introduction/dependencies/index.html b/3.21/introduction/dependencies/index.html deleted file mode 100644 index 3fe6816a..00000000 --- a/3.21/introduction/dependencies/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Dependencies | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

Dependencies

All of the following dependencies have been included in this repository.

NameLicenseUsage
Amazon.LambdaApacheAWS extensions for .NET CLI
AWSSDKApacheAWS Lambda, S3, SNS support
Json.NETMITJASIX utility
libdeflateMITBlockCompression library
MoqBSDMocking framework for unit tests
NDesk.OptionsMIT/X11CommandLine library
xUnitApacheUnit testing framework
zlib-ngzlibBlockCompression library
zstdBSDBlockCompression library
- - - - \ No newline at end of file diff --git a/3.21/introduction/getting-started/index.html b/3.21/introduction/getting-started/index.html deleted file mode 100644 index f3d7298a..00000000 --- a/3.21/introduction/getting-started/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Getting Started | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

Getting Started

Nirvana is written in C# using .NET Core (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files.

tip

Nirvana currently uses .NET6.0. Please make sure that you have the most current runtime from the .NET Core downloads page.

Getting Nirvana

Latest Release

Contact the team to obtain the latest release.

GitHub Release Notes

Alternatively, you can grab the previous binaries from our GitHub Releases page:

mkdir -p Nirvana/Data
cd Nirvana
unzip Nirvana-3.18.1-net6.0.zip

Quick Start

If you want to get started right away, we've created a script that unzips the Nirvana build, downloads the annotation data, and starts annotating a test file:

bash ./TestNirvana.sh NirvanaBuild.zip

We have verified that this script works on Windows (using Git Bash or WSL), Linux, and Mac OS X.

Docker

You can find us on Docker Hub under annotation/nirvana:

caution

We think Docker is fantastic. However, because our data files are usually accessed through a Docker volume, there is a noticeable performance penalty when running Nirvana in Docker.

mkdir -p Nirvana/Data
cd Nirvana
docker pull annotation/nirvana:3.14

For Docker, we have special instructions for running the Downloader:

sudo docker run --rm -it -v Data:/scratch annotation/nirvana:3.14 dotnet \
/opt/nirvana/Downloader.dll --ga GRCh37 -o /scratch

Similarly, we have special instructions for running Nirvana (Here's a toy VCF in case you need it):

sudo docker run --rm -it -v Data:/scratch annotation/nirvana:3.14 dotnet \
/opt/nirvana/Nirvana.dll -c /scratch/Cache/GRCh37/Both \
-r /scratch/References/Homo_sapiens.GRCh37.Nirvana.dat \
--sd /scratch/SupplementaryAnnotation/GRCh37 \
-i /scratch/HiSeq.10000.vcf.gz -o /scratch/HiSeq

Downloading the data files

To download the latest data sources (or update the ones that you already have), use the following command to automate the download from S3:

dotnet bin/Release/net6.0/Downloader.dll \
--ga GRCh37 \
-o Data
  • the --ga argument specifies the genome assembly which can be GRCh37, GRCh38, or both.
  • the -o argument specifies the output directory
Glitches in the Matrix

Every once in a while, the download process does not go smoothly. Perhaps the internet connection cut out or you ran out of disk space. The Downloader attempts to detect these situations by checking the file sizes at the very end. If you see that a file was marked truncated, try fixing the root cause and running the downloader again.

tip

From time to time, you can re-run the Downloader to get the latest annotation files. It will only download the files that changed.

Download a test VCF file

Here's a toy VCF file you can play around with:

curl -O https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.vcf.gz

Running Nirvana

Once you have downloaded the data sets, use the following command to annotate your VCF:

dotnet bin/Release/net6.0/Nirvana.dll \
-c Data/Cache \
--sd Data/SupplementaryAnnotation/GRCh37 \
-r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \
-i HiSeq.10000.vcf.gz \
-o HiSeq.10000
  • the -c argument specifies the cache directory
  • the --sd argument specifies the supplementary annotation directory
  • the -r argument specifies the compressed reference path
  • the -i argument specifies the input VCF path
  • the -o argument specifies the output filename prefix

When running Nirvana, performance metrics are shown as it evaluates each chromosome in the input VCF file:

---------------------------------------------------------------------------
Nirvana (c) 2023 Illumina, Inc.
Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0
---------------------------------------------------------------------------

Initialization Time Positions/s
---------------------------------------------------------------------------
Cache 00:00:00.0
SA Position Scan 00:00:00.0 153,634

Reference Preload Annotation Variants/s
---------------------------------------------------------------------------
chr1 00:00:00.2 00:00:00.8 11,873

Summary Time Percent
---------------------------------------------------------------------------
Initialization 00:00:00.0 1.5 %
Preload 00:00:00.2 4.9 %
Annotation 00:00:00.8 18.5 %

Time: 00:00:04.4

The output will be a JSON file called HiSeq.10000.json.gz. Here's the full JSON file.

The Nirvana command line

The full command line options can be viewed by using the -h option or no options

dotnet bin/Release/net6.0/Nirvana.dll
---------------------------------------------------------------------------
Nirvana (c) 2023 Illumina, Inc.
Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0
---------------------------------------------------------------------------

USAGE: dotnet Nirvana.dll -i <vcf path> -c <cache dir> --sd <sa dir> -r <ref path> -o <base output filename>
Annotates a set of variants

OPTIONS:
--cache, -c <directory>
input cache directory
--in, -i <path> input VCF path
--out, -o <file path> output file path
--ref, -r <path> input compressed reference sequence path
--sd <directory> input supplementary annotation directory
--sources, -s <VALUE> annotation data sources to be used (comma
separated list of supported tags)
--force-mt forces to annotate mitochondrial variants
--legacy-vids enables support for legacy VIDs
--enable-dq report DQ from VCF samples field
--enable-bidirectional-fusions
enables support for bidirectional gene fusions
--str <VALUE> user provided STR annotation TSV file
--vcf-info <VALUE> additional vcf info field keys (comma separated)
desired in the output
--vcf-sample-info <VALUE>
additional vcf format field keys (comma separated)
desired in the output
--help, -h displays the help menu
--version, -v displays the version

Supplementary annotation version: 69, Reference version: 7

Specifying annotation sources

By default, Nirvana will use all available data sources. However, the user can customize the set of sources using the --sources|-s option. If an unknown source is specified, a warning message will be printed.

dotnet bin/Release/net6.0/Nirvana.dll \
-c Data/Cache/GRCh37 \
--sd Data/SupplementaryAnnotation/GRCh37 \
-r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \
-i HiSeq.10000.vcf.gz \
-o HiSeq.10000 \
-s omim,gnomad,ense
---------------------------------------------------------------------------
Nirvana (c) 2023 Illumina, Inc.
Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0
---------------------------------------------------------------------------

WARNING: Unknown tag in data-sources: ense.
Available values are: aminoAcidConservation,primateAI,dbsnp,spliceAI,revel,cosmic,clinvar,gnomad,
mitomap,oneKg,gmeVariome,topmed,clingen,decipher,gnomAD-preview,clingenDosageSensitivityMap,
gerpScore,dannScore,omim,clingenGeneValidity,phylopScore,lowComplexityRegion,refMinor,
heteroplasmy,Ensembl,RefSeq

Initialization Time Positions/s
---------------------------------------------------------------------------
SA Position Scan 00:00:00.3 307,966
....
..

The list of available values is compiled from the files provided (using -c and --sd options).

- - - - \ No newline at end of file diff --git a/3.21/introduction/parsing-json/index.html b/3.21/introduction/parsing-json/index.html deleted file mode 100644 index 2d997100..00000000 --- a/3.21/introduction/parsing-json/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Parsing Nirvana JSON | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

Parsing Nirvana JSON

Why JSON?

VCF is a fantastic file format that was developed during the methods development activities within the 1000 Genomes Project. Prior to that, variant callers were outputting information into a variety of tab-delimited formats. Sometimes based on existing standards (like GFF), while most were proprietary. The primary intent of VCF files was to provide a human-readable, standardized representation of genetic variants. Similar to SAM/BAM files, VCF files used BCF files as their binary counterpart.

In the very beginning, Nirvana offered VCF output for annotation. While many variant annotators offer an option to output VCF files, one could argue if they are still human-readable. Here's an example from a VCF file produced by VEP v102:

chr3    107840527   .   A   ATTTTTTTTT,AT,ATTTTTTTT 153.51  PASS    AN=6;MQ=244.10;
SOR=1.739;QD=2.24;DP=57;AF=0.500,0.167,0.333;FS=0.000;AC=3,1,2;CSQ=TTTTTTTTT|
intron_variant&non_coding_transcript_variant|MODIFIER|LINC00635|ENSG00000241469|
Transcript|ENST00000608506.6|lncRNA||4/4|ENST00000608506.6:n.622-132_622-124dup|||||||
rs35564779||-1||HGNC|HGNC:27184|||5|||||||||Ensembl|||||||||||||||||||||||||||||||||||
|||||||||0.792|-0.109757,T|intron_variant&non_coding_transcript_variant|MODIFIER|
LINC00635|ENSG00000241469|Transcript|ENST00000608506.6|lncRNA||4/4|
ENST00000608506.6:n.622-124dup|||||||rs35564779||-1||HGNC|HGNC:27184|||5|||||||||
Ensembl||||||||||||||||||||||||||||||||||||||||||||0.932|-0.075622,TTTTTTTT|
intron_variant&non_coding_transcript_variant|MODIFIER|LINC00635|ENSG00000241469|
Transcript|ENST00000608506.6|lncRNA||4/4|ENST00000608506.6:n.622-131_622-124dup|||||||
rs35564779||-1||HGNC|HGNC:27184|||5|||||||||Ensembl|||||||||||||||||||||||||||||||||||
|||||||||0.808|-0.105490,TTTTTTTTT|intron_variant&non_coding_transcript_variant|
MODIFIER|LINC00636|ENSG00000240423|Transcript|ENST00000649048.1|lncRNA||2/3|
ENST00000649048.1:n.179+5223_179+5231dup|||||||rs35564779||1||HGNC|HGNC:27702|||||||||
|||Ensembl||||||||||||||||||||||||||||||||||||||||||||0.792|-0.109757, (etc.)

Originally Nirvana used the same VCF notation as VEP uses above. The problem is that you end up with a large amount of text that is difficult to parse out by eye and requires the use of several delimiters to divide the information into useful segments. When we originally annotated this variant using VEP, this single variant used 488,909 bytes (almost ½ MB). Surprisingly, we found that this broke some downstream tools that had preconceived notions of how long a single line could be in a VCF file.

caution

Whitespace is not allowed in the VCF INFO field. This means that if you wanted to express a gene description from OMIM: "HRAS PROTOONCOGENE, GTPase; HRAS", you would need to replace the spaces with something else like an underline. You would also need to hope that the VCF parser correctly handles embedded commas and semicolons in the description.

What do other annotators use?

Unfortunately, file format standardization has not made it all the way to variant annotation yet. The GA4GH Annotation group had many discussions on the topic several years ago. While a set of JSON schemas were created in that effort, there wasn't enough momentum to make this a new standard.

While there is some overlap in general file formats (JSON vs VCF vs TSV), none of those are compatible with each other. I.e. the VCF representation in VEP and snpEff is different just like the JSON schemas used by VEP, Nirvana, and GA4GH are different.

SourceFormats
VEPJSON, TSV, VCF
snpEffVCF
AnnovarTSV
NirvanaJSON
GA4GHJSON

We are interested in working together with others in the annotation space to develop a common annotation file format. Our belief is that this would accelerate methods development and benchmarking activities within annotation much in the same way the creation of SAM/BAM & VCF/BCF accelerated secondary analysis development.

What do we gain by using JSON?

  • JSON files are better at showing hierarchical and other relational data. For example when we output ClinVar data, we often want to output several overlapping RCV entries (variants coupled with a disease phenotype). In each, we would want to output a list of phenotypes, clinical significance, etc. That is difficult to accomplish in a human-readable way using VCF files (without resorting to growing lexicon of delimiters).
  • JSON files use JavaScript data types, while VCF INFO fields don't directly have data types. Instead, external metadata located in the VCF header is required to indicated the preferred data type.
  • JSON files are more verbose. Often this is seen as a negative, but compression largely compensates for this. Given the following excerpt from the VCF example above HGNC:27184|||5|||||||||Ensembl it's not immediately obvious what the 5 refers to (without checking the VCF header for details). With JSON files, you would always see a key name associated with a value.
  • JSON files can be natively imported into different search and analytics solutions like Elasticsearch and Snowflake.
  • JSON strings do not have any limitations on the use of whitespace.

Parsing JSON

Our JSON files are organized similarly to original VCF variants:

Nirvana JSON files can get very large and sometimes we receive feedback that a bioinformatician tried to read the JSON file into Python or R resulting in a program that ran out of available RAM. This happens because those parsers try to load everything into memory all at once.

To get around those issues, we play some clever tricks with newlines that enables our users to parse our JSON files quickly and efficiently.

Organization

Our JSON file is arranged as follows:

  • the header section is located on the first line
  • each line after that corresponds to a position (same as a row in a VCF file)
    • until you reach the genes section ],"genes":[
  • each line after that corresponds to a gene
    • until you reach the end ]}

Knowing this, you can load each position line as an independent JSON object and extract the information you need.

Jupyter Notebook

To demonstrate this, we have put together a Jupyter notebook demonstrating how to do this in Python and a R version as well.

JASIX

One of the tools that we really like in the VCF ecosystem is tabix. Unfortunately, tabix only works for tab-delimited file formats. As a result, we created a similar tool for Nirvana JSON files called JASIX.

Here's an example of how you might use JASIX:

dotnet bin/Release/net6.0/Jasix.dll -i dragen.json.gz -q chr1:942450-942455
  • the -i argument specifies the Nirvana JSON path
  • the -q argument specifies a genomic range (you can use as many of these as you want)

JASIX also includes additional options for showing the Nirvana header or for extracting different sections (like the genes section).

The output from JASIX is compliant JSON object shown in pretty-printed form:

{"positions":[
{
"chromosome": "chr1",
"position": 942451,
"refAllele": "T",
"altAlleles": [
"C"
],
"quality": 484.23,
"filters": [
"PASS"
],
"cytogeneticBand": "1p36.33",
"samples": [
{
"genotype": "1/1",
"variantFrequencies": [
1
],
"totalDepth": 21,
"genotypeQuality": 60,
"alleleDepths": [
0,
21
]
},
{
"genotype": "1/1",
"variantFrequencies": [
1
],
"totalDepth": 32,
"genotypeQuality": 93,
"alleleDepths": [
0,
32
]
},
{
"genotype": "1/1",
"variantFrequencies": [
1
],
"totalDepth": 36,
"genotypeQuality": 105,
"alleleDepths": [
0,
36
]
}
],
"variants": [
{
"vid": "1-942451-T-C",
"chromosome": "chr1",
"begin": 942451,
"end": 942451,
"refAllele": "T",
"altAllele": "C",
"variantType": "SNV",
"hgvsg": "NC_000001.11:g.942451T>C",
"phylopScore": -0.1,
"clinvar": [
{
"id": "VCV000836156.1",
"reviewStatus": "criteria provided, single submitter",
"significance": [
"uncertain significance"
],
"refAllele": "T",
"altAllele": "T",
"lastUpdatedDate": "2020-08-20"
},
{
"id": "RCV001037211.1",
"variationId": 836156,
"reviewStatus": "criteria provided, single submitter",
"alleleOrigins": [
"germline"
],
"refAllele": "T",
"altAllele": "T",
"phenotypes": [
"not provided"
],
"medGenIds": [
"CN517202"
],
"significance": [
"uncertain significance"
],
"lastUpdatedDate": "2020-08-20",
"pubMedIds": [
"28492532"
]
}
],
"dbsnp": [
"rs6672356"
],
"gnomad": {
"coverage": 25,
"allAf": 0.999855,
"allAn": 123742,
"allAc": 123724,
"allHc": 61853,
"afrAf": 0.999416,
"afrAn": 10278,
"afrAc": 10272,
"afrHc": 5133,
"amrAf": 0.99995,
"amrAn": 20008,
"amrAc": 20007,
"amrHc": 10003,
"easAf": 1,
"easAn": 6054,
"easAc": 6054,
"easHc": 3027,
"finAf": 1,
"finAn": 8696,
"finAc": 8696,
"finHc": 4348,
"nfeAf": 0.999899,
"nfeAn": 49590,
"nfeAc": 49585,
"nfeHc": 24790,
"asjAf": 1,
"asjAn": 7208,
"asjAc": 7208,
"asjHc": 3604,
"sasAf": 0.99967,
"sasAn": 18160,
"sasAc": 18154,
"sasHc": 9074,
"othAf": 1,
"othAn": 3748,
"othAc": 3748,
"othHc": 1874,
"maleAf": 0.9999,
"maleAn": 69780,
"maleAc": 69773,
"maleHc": 34883,
"femaleAf": 0.999796,
"femaleAn": 53962,
"femaleAc": 53951,
"femaleHc": 26970,
"controlsAllAf": 0.999815,
"controlsAllAn": 48654,
"controlsAllAc": 48645
},
"oneKg": {
"allAf": 1,
"afrAf": 1,
"amrAf": 1,
"easAf": 1,
"eurAf": 1,
"sasAf": 1,
"allAn": 5008,
"afrAn": 1322,
"amrAn": 694,
"easAn": 1008,
"eurAn": 1006,
"sasAn": 978,
"allAc": 5008,
"afrAc": 1322,
"amrAc": 694,
"easAc": 1008,
"eurAc": 1006,
"sasAc": 978
},
"primateAI": [
{
"hgnc": "SAMD11",
"scorePercentile": 0.87
}
],
"revel": {
"score": 0.145
},
"topmed": {
"allAf": 0.999809,
"allAn": 125568,
"allAc": 125544,
"allHc": 62760
},
"transcripts": [
{
"transcript": "ENST00000420190.6",
"source": "Ensembl",
"bioType": "protein_coding",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"downstream_gene_variant"
],
"proteinId": "ENSP00000411579.2"
},
{
"transcript": "ENST00000342066.7",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "1110",
"cdsPos": "1027",
"exons": "10/14",
"proteinPos": "343",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000342066.7:c.1027T>C",
"hgvsp": "ENSP00000342313.3:p.(Trp343Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000342313.3",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000618181.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "732",
"cdsPos": "652",
"exons": "7/11",
"proteinPos": "218",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000618181.4:c.652T>C",
"hgvsp": "ENSP00000480870.1:p.(Trp218Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000480870.1",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000622503.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "1110",
"cdsPos": "1030",
"exons": "10/14",
"proteinPos": "344",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000622503.4:c.1030T>C",
"hgvsp": "ENSP00000482138.1:p.(Trp344Arg)",
"isCanonical": true,
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000482138.1",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000618323.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "cTg/cCg",
"aminoAcids": "L/P",
"cdnaPos": "712",
"cdsPos": "632",
"exons": "8/12",
"proteinPos": "211",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000618323.4:c.632T>C",
"hgvsp": "ENSP00000480678.1:p.(Leu211Pro)",
"polyPhenScore": 0,
"polyPhenPrediction": "unknown",
"proteinId": "ENSP00000480678.1",
"siftScore": 0.03,
"siftPrediction": "deleterious - low confidence"
},
{
"transcript": "ENST00000616016.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "ccT/ccC",
"aminoAcids": "P",
"cdnaPos": "944",
"cdsPos": "864",
"exons": "9/13",
"proteinPos": "288",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"synonymous_variant"
],
"hgvsc": "ENST00000616016.4:c.864T>C",
"hgvsp": "ENST00000616016.4:c.864T>C(p.(Pro288=))",
"proteinId": "ENSP00000478421.1"
},
{
"transcript": "ENST00000618779.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "921",
"cdsPos": "841",
"exons": "9/13",
"proteinPos": "281",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000618779.4:c.841T>C",
"hgvsp": "ENSP00000484256.1:p.(Trp281Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000484256.1",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000616125.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "783",
"cdsPos": "703",
"exons": "8/12",
"proteinPos": "235",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000616125.4:c.703T>C",
"hgvsp": "ENSP00000484643.1:p.(Trp235Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000484643.1",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000620200.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "cTg/cCg",
"aminoAcids": "L/P",
"cdnaPos": "427",
"cdsPos": "347",
"exons": "5/9",
"proteinPos": "116",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000620200.4:c.347T>C",
"hgvsp": "ENSP00000484820.1:p.(Leu116Pro)",
"polyPhenScore": 0,
"polyPhenPrediction": "unknown",
"proteinId": "ENSP00000484820.1",
"siftScore": 0.16,
"siftPrediction": "tolerated - low confidence"
},
{
"transcript": "ENST00000617307.4",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "867",
"cdsPos": "787",
"exons": "9/13",
"proteinPos": "263",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000617307.4:c.787T>C",
"hgvsp": "ENSP00000482090.1:p.(Trp263Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000482090.1",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "NM_152486.2",
"source": "RefSeq",
"bioType": "protein_coding",
"codons": "Cgg/Cgg",
"aminoAcids": "R",
"cdnaPos": "1107",
"cdsPos": "1027",
"exons": "10/14",
"proteinPos": "343",
"geneId": "148398",
"hgnc": "SAMD11",
"consequence": [
"synonymous_variant"
],
"hgvsc": "NM_152486.2:c.1027T>C",
"hgvsp": "NM_152486.2:c.1027T>C(p.(Arg343=))",
"isCanonical": true,
"proteinId": "NP_689699.2"
},
{
"transcript": "ENST00000341065.8",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "750",
"cdsPos": "751",
"exons": "8/12",
"proteinPos": "251",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000341065.8:c.750T>C",
"hgvsp": "ENSP00000349216.4:p.(Trp251Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000349216.4",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000455979.1",
"source": "Ensembl",
"bioType": "protein_coding",
"codons": "Tgg/Cgg",
"aminoAcids": "W/R",
"cdnaPos": "507",
"cdsPos": "508",
"exons": "4/7",
"proteinPos": "170",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"missense_variant"
],
"hgvsc": "ENST00000455979.1:c.507T>C",
"hgvsp": "ENSP00000412228.1:p.(Trp170Arg)",
"polyPhenScore": 0,
"polyPhenPrediction": "benign",
"proteinId": "ENSP00000412228.1",
"siftScore": 1,
"siftPrediction": "tolerated"
},
{
"transcript": "ENST00000478729.1",
"source": "Ensembl",
"bioType": "processed_transcript",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"downstream_gene_variant"
]
},
{
"transcript": "ENST00000474461.1",
"source": "Ensembl",
"bioType": "retained_intron",
"cdnaPos": "389",
"exons": "3/4",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"non_coding_transcript_exon_variant"
],
"hgvsc": "ENST00000474461.1:n.389T>C"
},
{
"transcript": "ENST00000466827.1",
"source": "Ensembl",
"bioType": "retained_intron",
"cdnaPos": "191",
"exons": "2/2",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"non_coding_transcript_exon_variant"
],
"hgvsc": "ENST00000466827.1:n.191T>C"
},
{
"transcript": "ENST00000464948.1",
"source": "Ensembl",
"bioType": "retained_intron",
"cdnaPos": "286",
"exons": "1/2",
"geneId": "ENSG00000187634",
"hgnc": "SAMD11",
"consequence": [
"non_coding_transcript_exon_variant"
],
"hgvsc": "ENST00000464948.1:n.286T>C"
},
{
"transcript": "NM_015658.3",
"source": "RefSeq",
"bioType": "protein_coding",
"geneId": "26155",
"hgnc": "NOC2L",
"consequence": [
"downstream_gene_variant"
],
"isCanonical": true,
"proteinId": "NP_056473.2"
},
{
"transcript": "ENST00000483767.5",
"source": "Ensembl",
"bioType": "retained_intron",
"geneId": "ENSG00000188976",
"hgnc": "NOC2L",
"consequence": [
"downstream_gene_variant"
]
},
{
"transcript": "ENST00000327044.6",
"source": "Ensembl",
"bioType": "protein_coding",
"geneId": "ENSG00000188976",
"hgnc": "NOC2L",
"consequence": [
"downstream_gene_variant"
],
"isCanonical": true,
"proteinId": "ENSP00000317992.6"
},
{
"transcript": "ENST00000477976.5",
"source": "Ensembl",
"bioType": "retained_intron",
"geneId": "ENSG00000188976",
"hgnc": "NOC2L",
"consequence": [
"downstream_gene_variant"
]
},
{
"transcript": "ENST00000496938.1",
"source": "Ensembl",
"bioType": "processed_transcript",
"geneId": "ENSG00000188976",
"hgnc": "NOC2L",
"consequence": [
"downstream_gene_variant"
]
}
]
}
]
}
]}
- - - - \ No newline at end of file diff --git a/3.21/utilities/jasix/index.html b/3.21/utilities/jasix/index.html deleted file mode 100644 index 22139e68..00000000 --- a/3.21/utilities/jasix/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -Jasix | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

Jasix

Overview

The Jasix index is aimed at providing TABIX like indexing capabilities for the Nirvana JSON output.

Creating the Jasix index

The Jasix index (that comes in a .jsi) file is generated on-the-fly with Nirvana output. It can also be generated independently by running the Jasix command line utility on the JSON output file. Please note that the Jasix utility can only consume JSON files that follow the Nirvana JSON output format. The following code blocks demonstrate the help menu and index generating functionalities of Jasix.

Example

dotnet Jasix.dll -h
USAGE: dotnet Jasix.dll -i in.json.gz [options]
Indexes a Nirvana annotated JSON file

OPTIONS:
--header, -t print also the header lines
--only-header, -H print only the header lines
--chromosomes, -l list chromosome names
--index, -c create index
--in, -i <VALUE> input
--out, -o <VALUE> compressed output file name (default:console)
--query, -q <VALUE> query range
--section, -s <VALUE> complete section (positions or genes) to output
--help, -h displays the help menu
--version, -v displays the version
dotnet Jasix.dll --index -i input.json.gz
---------------------------------------------------------------------------
Jasix (c) 2023 Illumina, Inc.
Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0
---------------------------------------------------------------------------

Ref Sequence chrM indexed in 00:00:00.2
Ref Sequence chr1 indexed in 00:00:05.8
Ref Sequence chr2 indexed in 00:00:06.0
.
.
.
Peak memory usage: 28.5 MB
Time: 00:01:14.8

Querying the index

The Jasix query format is chr:start-end. If not provided, it assumes end=start. If only chr is provided, all entries for that chromosome will be provided.

dotnet Jasix.dll -i input.json.gz chrM:5000-7000
{
"positions":[
{
"chromosome":"chrM",
"refAllele":"C",
"position":5581,
"quality":3070.00,
"filters":[
"LowGQXHomSNP"
],
"altAlleles":[
"T"
],
"samples":[
{
"variantFreq":1,
"totalDepth":1625,
"genotypeQuality":1,
"alleleDepths":[
0,
1625
],
"genotype":"1/1"
}
],
"variants":[
{
"altAllele":"T",
"refAllele":"C",
"begin":5581,
"chromosome":"chrM",
"end":5581,
"variantType":"SNV",
"vid":"MT:5581:T"
}
]
},
{
"chromosome":"chrM",
"refAllele":"A",
"position":6267,
"quality":1637.00,
"filters":[
"LowGQXHetSNP"
],
"altAlleles":[
"G"
],
"samples":[
{
"variantFreq":0.6873,
"totalDepth":323,
"genotypeQuality":1,
"alleleDepths":[
101,
222
],
"genotype":"0/1"
}
],
"variants":[
{
"altAllele":"G",
"refAllele":"A",
"begin":6267,
"chromosome":"chrM",
"end":6267,
"variantType":"SNV",
"vid":"MT:6267:G"
}
]
}
]
}

The default output stream is Console. However, if an output filename is provided, Jasix outputs the results to that file in a bgzip compressed format. The output is always a valid JSON entry. If requested (via -t option) the header of the indexed file will be provided. Multiple queries can be submitted in the same command and the output will contain them within the same "positions" block in order of the submitted queries (Warning: if the queries are out of order, or overlapping, the output will be out or order and intersecting).

dotnet Jasix.dll -i input.json.gz  -q chrM:5000-7000 -q chrM:8500-9500 -t
{
"header":{
"annotator":"Illumina Annotation Engine 1.6.2.0",
"creationTime":"2017-08-30 11:42:57",
"genomeAssembly":"GRCh37",
"schemaVersion":6,
"dataVersion":"84.24.39",
"dataSources":[
{
"name":"VEP",
"version":"84",
"description":"Ensembl",
"releaseDate":"2017-01-16"
}
],
"samples":[
"Mother"
]
},
"positions":[
{
"chromosome":"chrM",
"refAllele":"C",
"position":5581,
"quality":3070.00,
"filters":[
"LowGQXHomSNP"
],
"altAlleles":[
"T"
],
"samples":[
{
"variantFreq":1,
"totalDepth":1625,
"genotypeQuality":1,
"alleleDepths":[
0,
1625
],
"genotype":"1/1"
}
],
"variants":[
{
"altAllele":"T",
"refAllele":"C",
"begin":5581,
"chromosome":"chrM",
"end":5581,
"variantType":"SNV",
"vid":"MT:5581:T"
}
]
},
{
"chromosome":"chrM",
"refAllele":"A",
"position":6267,
"quality":1637.00,
"filters":[
"LowGQXHetSNP"
],
"altAlleles":[
"G"
],
"samples":[
{
"variantFreq":0.6873,
"totalDepth":323,
"genotypeQuality":1,
"alleleDepths":[
101,
222
],
"genotype":"0/1"
}
],
"variants":[
{
"altAllele":"G",
"refAllele":"A",
"begin":6267,
"chromosome":"chrM",
"end":6267,
"variantType":"SNV",
"vid":"MT:6267:G"
}
]
},
{
"chromosome":"chrM",
"refAllele":"G",
"position":8702,
"quality":3070.00,
"filters":[
"LowGQXHomSNP"
],
"altAlleles":[
"A"
],
"samples":[
{
"variantFreq":0.9987,
"totalDepth":1534,
"genotypeQuality":1,
"alleleDepths":[
2,
1532
],
"genotype":"1/1"
}
],
"variants":[
{
"altAllele":"A",
"refAllele":"G",
"begin":8702,
"chromosome":"chrM",
"end":8702,
"variantType":"SNV",
"vid":"MT:8702:A"
}
]
},
{
"chromosome":"chrM",
"refAllele":"G",
"position":9378,
"quality":3070.00,
"filters":[
"LowGQXHomSNP"
],
"altAlleles":[
"A"
],
"samples":[
{
"variantFreq":1,
"totalDepth":1018,
"genotypeQuality":1,
"alleleDepths":[
0,
1018
],
"genotype":"1/1"
}
],
"variants":[
{
"altAllele":"A",
"refAllele":"G",
"begin":9378,
"chromosome":"chrM",
"end":9378,
"variantType":"SNV",
"vid":"MT:9378:A"
}
]
}
]
}

Extracting a section

The Nirvana JSON file has three sections: header, positions and genes. Header can be printed using the -H option. If you are interested in only the positions or genes section, you can use the -s or --section option.

dotnet Jasix.dll -i input.json.gz  -s genes
[
{
"name": "ABCB10",
"omim": [
{
"mimNumber": 605454,
"geneName": "ATP-binding cassette, subfamily B, member 10"
}
]
},
{
"name": "ABCD3",
"omim": [
{
"mimNumber": 170995,
"geneName": "ATP-binding cassette, subfamily D, member 3 (peroxisomal membrane protein 1, 70kD)",
"description": "The ABCD3 gene encodes a peroxisomal membrane transporter involved in the transport of branched-chain fatty acids and C27 bile acids into the peroxisome; the latter function is a crucial step in bile acid biosynthesis (summary by Ferdinandusse et al., 2015).",
"phenotypes": [
{
"mimNumber": 616278,
"phenotype": "?Bile acid synthesis defect, congenital, 5",
"mapping": "molecular basis of the disorder is known",
"inheritances": [
"Autosomal recessive"
],
"comments": [
"unconfirmed or possibly spurious mapping"
]
}
]
}
]
}
]
- - - - \ No newline at end of file diff --git a/3.21/utilities/sautils/index.html b/3.21/utilities/sautils/index.html deleted file mode 100644 index da3c8231..00000000 --- a/3.21/utilities/sautils/index.html +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - -SAUtils | IlluminaConnectedAnnotations - - - - -
-
Skip to main content
Version: 3.21

SAUtils

Overview

SAUtils is a utility tool that creates binary supplementary annotation files (.nsa, .gsa, .npd, .nsi, etc.) from original data files (e.g. VCFs, TSVs, XML, HTML, etc.) for various data sources (e.g. ClinVar, dbSNP, gnomAD, etc.). These binary files can be fed into the Nirvana Annotation engine to provide supplementary annotations in the output.

The SAUtils Menu

SAUtils supports building binary files for many data sources. The help menu lists them out in the form of sub-commands.

dotnet Nirvana/bin/Release/net6.0/SAUtils.dll
---------------------------------------------------------------------------
SAUtils (c) 2023 Illumina, Inc.
Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0
---------------------------------------------------------------------------

Utilities focused on supplementary annotation

USAGE: dotnet SAUtils.dll <command> [options]

COMMAND: AutoDownloadGenerate auto download and generate Omim, Clinvar, Clingen
AaCon create AA conservation database
ancestralAllele create Ancestral allele database from 1000Genomes data
ClinGen create ClinGen database
Downloader download ClinGen database
clinvar create ClinVar database
concat merge multiple NSA files for the same data source having non-overlapping regions
Cosmic create COSMIC database
CosmicSv create COSMIC SV database
CosmicFusion create COSMIC gene fusion database
CosmicCGC create COSMIC cancer gene census database
CustomGene create custom gene annotation database
CustomVar create custom variant annotation database
Dann create DANN database
Dbsnp create dbSNP database
Dgv create DGV database
DiseaseValidity create disease validity database
DosageMapRegions create dosage map regions
DosageSensitivity create dosage sensitivity database
DownloadOmim download OMIM database
ExtractMiniSA extracts mini SA
ExtractMiniXml extracts mini XML (ClinVar)
FilterSpliceNetTsv filter SpliceNet predictions
FusionCatcher create FusionCatcher database
Gerp create GERP conservation database
GlobalMinor create global minor allele database
Gnomad create gnomAD database
Gnomad-lcr create gnomAD low complexity region database
GnomadGeneScores create gnomAD gene scores database
GnomadSV create gnomAD structural variant database
Index edit an index file
MitoHet create mitochondrial Heteroplasmy database
MitomapSvDb create MITOMAP structural variants database
MitomapVarDb create MITOMAP small variants database
Omim create OMIM database
OneKGen create 1000 Genome small variants database
OneKGenSv create 1000 Genomes structural variants database
OneKGenSvVcfToBed convert 1000 Genomes structural variants VCF file into a BED-like file
PhyloP create PhyloP database
PrimateAi create PrimateAI database
RefMinor create Reference Minor database from 1000 Genome
RemapWithDbsnp remap a VCF file given source and destination rsID mappings
Revel create REVEL database
SpliceAi create SpliceAI database
TopMed create TOPMed database
Gme create GME Variome database
Decipher create Decipher database

You can get further detailed help for each sub-command by typing in the subcommand. For example:

dotnet Nirvana/bin/Release/net6.0/SAUtils.dll clinvar
---------------------------------------------------------------------------
SAUtils (c) 2023 Illumina, Inc.
Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0
---------------------------------------------------------------------------

USAGE: dotnet SAUtils.dll clinvar [options]
Creates a supplementary database with ClinVar annotations

OPTIONS:
--ref, -r <VALUE> compressed reference sequence file
--rcv, -i <VALUE> ClinVar Full release XML file
--vcv, -c <VALUE> ClinVar Variation release XML file
--out, -o <VALUE> output directory
--help, -h displays the help menu
--version, -v displays the version

More detailed instructions about each sub-command can be found in documentation of respective data sources.

Output File Formats

The format of the binary file SAUtils produce depend on the type of annotation data represented in that file (e.g. small variant vs. structural variants vs. genes).

File ExtensionDescription
.nsaSmall variant annotations (e.g. SNV, insertions, deletions, etc.)
.gsaCompact variant annotations (e.g. SNV, insertions, deletions, etc.)
.idxIndex file
.nsiInterval annotations (e.g. SV, CNVs, intervals)
.ngaGene annotations
.npdConservation scores
.rmaReference Minor allele
.gfsGene fusions source
.gfjGene fusions JSON
.schemaJSON schema
- - - - \ No newline at end of file diff --git a/404.html b/404.html index f6940874..3e0b26b0 100644 --- a/404.html +++ b/404.html @@ -6,13 +6,13 @@ Page Not Found | IlluminaConnectedAnnotations - - + +
-
Skip to main content

Page Not Found

We could not find what you were looking for.

Please contact the owner of the site that linked you to the original URL and let them know their link is broken.

- - +
Skip to main content

Page Not Found

We could not find what you were looking for.

Please contact the owner of the site that linked you to the original URL and let them know their link is broken.

+ + \ No newline at end of file diff --git a/assets/files/TestNirvana-393f155ae4157b0ffbd1b7e399348477.sh b/assets/files/TestNirvana-393f155ae4157b0ffbd1b7e399348477.sh deleted file mode 100644 index 1b915bfc..00000000 --- a/assets/files/TestNirvana-393f155ae4157b0ffbd1b7e399348477.sh +++ /dev/null @@ -1,101 +0,0 @@ -#!/bin/bash - -# The first argument is the path to the release zip file. -# adjust these paths to reflect where you have downloaded the Nirvana data files -# In this example, we assume that the Cache, References, and SupplementaryDatabase -# folders have been downloaded into the NIRVANA_ROOT folder. - -# In addition to downloading the Nirvana data files, make sure you have .NET 6.0 -# installed on your computer: -# https://www.microsoft.com/net/download/core - - -NIRVANA_BUILD_ZIP=$1 -NIRVANA_ROOT=~/NirvanaTest -NIRVANA_BUILD_DIR=$NIRVANA_ROOT/build -NIRVANA_BIN=$NIRVANA_BUILD_DIR/Nirvana.dll -DOWNLOADER_BIN=$NIRVANA_BUILD_DIR/Downloader.dll -DATA_DIR=$NIRVANA_ROOT/Data - -VCF_PATH=HiSeq.10000.vcf.gz - -# just change this to GRCh38 if you want to set everything up for hg38 -GENOME_ASSEMBLY=GRCh37 - -SA_DIR=$DATA_DIR/SupplementaryAnnotation/$GENOME_ASSEMBLY -REF_DIR=$DATA_DIR/References -CACHE_DIR=$DATA_DIR/Cache -REF_TEST=$REF_DIR/Homo_sapiens.${GENOME_ASSEMBLY}.Nirvana.dat - -########## Help function ############# -PrintHelp(){ - echo "USAGE: ./TestNirvana.sh /path/to/build/Nirvana.zip" -} -############ Checking arguments ######## -if [ "$#" -neq 1 ] ; then - PrintHelp - exit -fi -####################################### - - -# ===================================================================== - -YELLOW='\033[1;33m' -RESET='\033[0m' - -echo -ne $YELLOW -echo " _ _ _ " -echo "| \ | (_) " -echo "| \| |_ _ ____ ____ _ _ __ __ _ " -echo "| . \` | | '__\ \ / / _\` | '_ \ / _\` |" -echo "| |\ | | | \ V / (_| | | | | (_| |" -echo "|_| \_|_|_| \_/ \__,_|_| |_|\__,_|" -echo -e $RESET - -# create the data directories -create_dir() { - echo "making directory $1" - if [ ! -d $1 ] - then - mkdir -p $1 - fi -} - -# silence pushd and popd -pushd () { - command pushd "$@" > /dev/null -} - -popd () { - command popd "$@" > /dev/null -} - -# ============================== -# unzip the build -# ============================== -create_dir $NIRVANA_BUILD_DIR -cd $NIRVANA_BUILD_DIR -unzip $NIRVANA_BUILD_ZIP - -# ============================== -# download all of the data files -# ============================== -echo "Download all data files" -create_dir $DATA_DIR -dotnet $DOWNLOADER_BIN --ga $GENOME_ASSEMBLY --out $DATA_DIR - -# ============================== -# run Nirvana on a test VCF file -# ============================== -echo "run Nirvana on a test VCF file" -if [ ! -f $VCF_PATH ] -then - curl -O https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.vcf.gz -fi - -# analyze it with Nirvana -dotnet $NIRVANA_BIN -c $CACHE_DIR --sd $SA_DIR -r $REF_TEST -i $VCF_PATH -o HiSeq.10000 - -popd -popd diff --git a/assets/images/BCFtools-csq-fig1a-a266b0be1c6d74f085fcacb2f433f750.png b/assets/images/BCFtools-csq-fig1a-a266b0be1c6d74f085fcacb2f433f750.png deleted file mode 100644 index 3b0dbc28..00000000 Binary files a/assets/images/BCFtools-csq-fig1a-a266b0be1c6d74f085fcacb2f433f750.png and /dev/null differ diff --git a/assets/images/SupplementaryAnnotations-d43d3f1c837f9b80fab530432e0e4b1d.svg b/assets/images/SupplementaryAnnotations-d43d3f1c837f9b80fab530432e0e4b1d.svg deleted file mode 100644 index a4e2cbe5..00000000 --- a/assets/images/SupplementaryAnnotations-d43d3f1c837f9b80fab530432e0e4b1d.svg +++ /dev/null @@ -1,9057 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/assets/images/multiple-reading-frames-19e896fe74a8781afdd1fa2539edff88.png b/assets/images/multiple-reading-frames-19e896fe74a8781afdd1fa2539edff88.png deleted file mode 100644 index e44de483..00000000 Binary files a/assets/images/multiple-reading-frames-19e896fe74a8781afdd1fa2539edff88.png and /dev/null differ diff --git a/assets/images/no-recomposition-b63eb855b0ed62b8ae331eafc538223d.png b/assets/images/no-recomposition-b63eb855b0ed62b8ae331eafc538223d.png deleted file mode 100644 index b9a3c4ec..00000000 Binary files a/assets/images/no-recomposition-b63eb855b0ed62b8ae331eafc538223d.png and /dev/null differ diff --git a/assets/images/three-SNVs-larger-separation-85b12d5bafd32ee312103a1b9b588720.png b/assets/images/three-SNVs-larger-separation-85b12d5bafd32ee312103a1b9b588720.png deleted file mode 100644 index ea2e031e..00000000 Binary files a/assets/images/three-SNVs-larger-separation-85b12d5bafd32ee312103a1b9b588720.png and /dev/null differ diff --git a/assets/images/three-SNVs-two-codons-bc45a465809b53d51dbfb32deaa6324a.png b/assets/images/three-SNVs-two-codons-bc45a465809b53d51dbfb32deaa6324a.png deleted file mode 100644 index 1f05bb40..00000000 Binary files a/assets/images/three-SNVs-two-codons-bc45a465809b53d51dbfb32deaa6324a.png and /dev/null differ diff --git a/assets/js/003c43c2.abc7a408.js b/assets/js/003c43c2.abc7a408.js deleted file mode 100644 index f3b8a1da..00000000 --- a/assets/js/003c43c2.abc7a408.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3332,5330],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>v});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},c=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),p=d(n),u=r,v=p["".concat(s,".").concat(u)]||p[u]||m[u]||i;return n?a.createElement(v,o(o({ref:t},c),{},{components:n})):a.createElement(v,o({ref:t},c))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[p]="string"==typeof e?e:r,o[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>i,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/revel-json",id:"version-3.18/data-sources/revel-json",title:"revel-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/revel-json.md",sourceDirName:"data-sources",slug:"/data-sources/revel-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/revel-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/revel-json.md",tags:[],version:"3.18",frontMatter:{}},s=[],d={toc:s},c="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"revel":{ \n "score":0.027\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"score"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1.0")))))}p.isMDXComponent=!0},35237:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>o,metadata:()=>s,toc:()=>d});var a=n(87462),r=(n(67294),n(3905)),i=n(30935);const o={title:"REVEL"},l=void 0,s={unversionedId:"data-sources/revel",id:"version-3.18/data-sources/revel",title:"REVEL",description:"Overview",source:"@site/versioned_docs/version-3.18/data-sources/revel.mdx",sourceDirName:"data-sources",slug:"/data-sources/revel",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/revel",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/revel.mdx",tags:[],version:"3.18",frontMatter:{title:"REVEL"},sidebar:"docs",previous:{title:"Primate AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/primate-ai"},next:{title:"Splice AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/splice-ai"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"CSV File",id:"csv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:d},p="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"REVEL is an ensemble method for predicting the pathogenicity of missense variants based on a combination of scores from 13 individual tools: MutPred, FATHMM v2.3, VEST 3.0, PolyPhen-2, SIFT, PROVEAN, MutationAssessor, MutationTaster, LRT, GERP++, SiPhy, phyloP, and phastCons."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Ioannidis, N. M. et al. REVEL: An Ensemble Method for Predicting the Pathogenicity of Rare Missense Variants. ",(0,r.kt)("em",{parentName:"p"},"The American Journal of Human Genetics")," ",(0,r.kt)("strong",{parentName:"p"},"99"),", 877-885 (2016). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1016/j.ajhg.2016.08.016"},"https://doi.org/10.1016/j.ajhg.2016.08.016")))),(0,r.kt)("h2",{id:"csv-file"},"CSV File"),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr,hg19_pos,grch38_pos,ref,alt,aaref,aaalt,REVEL\n1,35142,35142,G,A,T,M,0.027\n1,35142,35142,G,C,T,R,0.035\n1,35142,35142,G,T,T,K,0.043\n1,35143,35143,T,A,T,S,0.018\n1,35143,35143,T,C,T,A,0.034\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the CSV file, we're mainly interested in the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"chr")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"hg19_pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"grch38_pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"ref")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"alt")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"REVEL"))),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Sorting")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Since the input file contains positions for both GRCh37 and GRCh38, we split it into two ",(0,r.kt)("strong",{parentName:"p"},"TSV")," files (for the sake of better readability) with identical format. The positions for GRCh37 were sorted but not for GRCh38. So we re-sort the variants by position in the GRCh38 file."))),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Conflicting Scores")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"When there are multiple scores available for the same variant (i.e. the same position with the same alternative allele), we pick the highest score."))),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://sites.google.com/site/revelgenomics/downloads"},"https://sites.google.com/site/revelgenomics/downloads")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(i.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/00a416c2.77427015.js b/assets/js/00a416c2.77427015.js deleted file mode 100644 index aa68579f..00000000 --- a/assets/js/00a416c2.77427015.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8680],{3905:(t,e,n)=>{n.d(e,{Zo:()=>p,kt:()=>f});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function i(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var m=a.createContext({}),c=function(t){var e=a.useContext(m),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},p=function(t){var e=c(t.components);return a.createElement(m.Provider,{value:e},t.children)},s="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},u=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,i=t.originalType,m=t.parentName,p=l(t,["components","mdxType","originalType","parentName"]),s=c(n),u=r,f=s["".concat(m,".").concat(u)]||s[u]||d[u]||i;return n?a.createElement(f,o(o({ref:e},p),{},{components:n})):a.createElement(f,o({ref:e},p))}));function f(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var m in e)hasOwnProperty.call(e,m)&&(l[m]=e[m]);l.originalType=t,l[s]="string"==typeof t?t:r,o[1]=l;for(var c=2;c{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>s,frontMatter:()=>i,metadata:()=>l,toc:()=>m});var a=n(87462),r=(n(67294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/cosmic-json",id:"version-3.21/data-sources/cosmic-json",title:"cosmic-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/cosmic-json.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/cosmic-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/cosmic-json.md",tags:[],version:"3.21",frontMatter:{}},m=[],c={toc:m},p="wrapper";function s(t){let{components:e,...n}=t;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'{\n "id":"COSV58272668",\n "numSamples":8,\n "refAllele":"-",\n "altAllele":"CCT",\n "histologies":[\n {\n "name":"carcinoma (serous carcinoma)",\n "numSamples":2\n },\n {\n "name":"meningioma (fibroblastic)",\n "numSamples":1\n },\n {\n "name":"carcinoma",\n "numSamples":1\n },\n {\n "name":"carcinoma (squamous cell carcinoma)",\n "numSamples":1\n },\n {\n "name":"meningioma (transitional)",\n "numSamples":1\n },\n {\n "name":"carcinoma (adenocarcinoma)",\n "numSamples":1\n },\n {\n "name":"other (neoplasm)",\n "numSamples":1\n }\n ],\n "sites":[\n {\n "name":"ovary",\n "numSamples":2\n },\n {\n "name":"meninges",\n "numSamples":2\n },\n {\n "name":"thyroid",\n "numSamples":2\n },\n {\n "name":"cervix",\n "numSamples":1\n },\n {\n "name":"large intestine (colon)",\n "numSamples":1\n }\n ],\n "pubMedIds":[\n 25738363,\n 27548314\n ],\n "confirmedSomatic":true,\n "drugResistance":true, /* not in this particular COSMIC variant */\n "isAlleleSpecific":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"COSMIC Genomic Mutation ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"histologies"),(0,r.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypic descriptions")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sites"),(0,r.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"tissue types")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"confirmedSomatic"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant is a confirmed somatic variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"drugResistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant has been associated with drug resistance")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Count")),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"name"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"description")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"})))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/01904154.55cb202d.js b/assets/js/01904154.55cb202d.js deleted file mode 100644 index 1bef1986..00000000 --- a/assets/js/01904154.55cb202d.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6974],{3905:(t,e,r)=>{r.d(e,{Zo:()=>m,kt:()=>f});var n=r(67294);function a(t,e,r){return e in t?Object.defineProperty(t,e,{value:r,enumerable:!0,configurable:!0,writable:!0}):t[e]=r,t}function o(t,e){var r=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),r.push.apply(r,n)}return r}function i(t){for(var e=1;e=0||(a[r]=t[r]);return a}(t,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,r)&&(a[r]=t[r])}return a}var p=n.createContext({}),c=function(t){var e=n.useContext(p),r=e;return t&&(r="function"==typeof t?t(e):i(i({},e),t)),r},m=function(t){var e=c(t.components);return n.createElement(p.Provider,{value:e},t.children)},s="mdxType",u={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},d=n.forwardRef((function(t,e){var r=t.components,a=t.mdxType,o=t.originalType,p=t.parentName,m=l(t,["components","mdxType","originalType","parentName"]),s=c(r),d=a,f=s["".concat(p,".").concat(d)]||s[d]||u[d]||o;return r?n.createElement(f,i(i({ref:e},m),{},{components:r})):n.createElement(f,i({ref:e},m))}));function f(t,e){var r=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var o=r.length,i=new Array(o);i[0]=d;var l={};for(var p in e)hasOwnProperty.call(e,p)&&(l[p]=e[p]);l.originalType=t,l[s]="string"==typeof t?t:a,i[1]=l;for(var c=2;c{r.r(e),r.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>o,metadata:()=>l,toc:()=>p});var n=r(87462),a=(r(67294),r(3905));const o={},i=void 0,l={unversionedId:"data-sources/mitomap-structural-variants-json",id:"version-3.16/data-sources/mitomap-structural-variants-json",title:"mitomap-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/mitomap-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/mitomap-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/mitomap-structural-variants-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],c={toc:p},m="wrapper";function s(t){let{components:e,...r}=t;return(0,a.kt)(m,(0,n.Z)({},c,r,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "chromosome":"MT",\n "begin":3166,\n "end":14152,\n "variantType":"deletion",\n "reciprocalOverlap":0.18068,\n "annotationOverlap":0.42405\n }\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,a.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"end"),(0,a.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"reciprocalOverlap"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"annotationOverlap"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/0235443a.5358ad09.js b/assets/js/0235443a.5358ad09.js deleted file mode 100644 index b2b80f13..00000000 --- a/assets/js/0235443a.5358ad09.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4461,986,5200],{3905:(e,t,a)=>{a.d(t,{Zo:()=>d,kt:()=>h});var n=a(67294);function i(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function r(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function o(e){for(var t=1;t=0||(i[a]=e[a]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(i[a]=e[a])}return i}var s=n.createContext({}),m=function(e){var t=n.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):o(o({},t),e)),a},d=function(e){var t=m(e.components);return n.createElement(s.Provider,{value:t},e.children)},p="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},u=n.forwardRef((function(e,t){var a=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,d=l(e,["components","mdxType","originalType","parentName"]),p=m(a),u=i,h=p["".concat(s,".").concat(u)]||p[u]||c[u]||r;return a?n.createElement(h,o(o({ref:t},d),{},{components:a})):n.createElement(h,o({ref:t},d))}));function h(e,t){var a=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=a.length,o=new Array(r);o[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[p]="string"==typeof e?e:i,o[1]=l;for(var m=2;m{a.r(t),a.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var n=a(87462),i=(a(67294),a(3905));const r={},o=void 0,l={unversionedId:"data-sources/mitomap-small-variants-json",id:"version-3.21/data-sources/mitomap-small-variants-json",title:"mitomap-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/mitomap-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/mitomap-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/mitomap-small-variants-json.md",tags:[],version:"3.21",frontMatter:{}},s=[],m={toc:s},d="wrapper";function p(e){let{components:t,...a}=e;return(0,i.kt)(d,(0,n.Z)({},m,a,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "refAllele":"G",\n "altAllele":"A",\n "diseases":[ \n "Bipolar disorder",\n "Melanoma"\n ],\n "hasHomoplasmy":false,\n "hasHeteroplasmy":true,\n "status":"Reported",\n "clinicalSignificance":"confirmed pathogenic",\n "scorePercentile":83.30,\n "numGenBankFullLengthSeqs":2,\n "pubMedIds":["2316527","6299878","6301949"],\n "isAlleleSpecific":true\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"diseases"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"associated diseases")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hasHomoplasmy"),(0,i.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hasHeteroplasmy"),(0,i.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"status"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"record status")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"clinicalSignificance"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"predicted pathogenicity")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float"),(0,i.kt)("td",{parentName:"tr",align:"left"},"MitoTIP score")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numGenBankFullLengthSeqs"),(0,i.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,i.kt)("td",{parentName:"tr",align:"left"},"# of GenBank full-length sequences")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,i.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,i.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the MITOMAP alternate allele")))))}p.isMDXComponent=!0},96392:(e,t,a)=>{a.r(t),a.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var n=a(87462),i=(a(67294),a(3905));const r={},o=void 0,l={unversionedId:"data-sources/mitomap-structural-variants-json",id:"version-3.21/data-sources/mitomap-structural-variants-json",title:"mitomap-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/mitomap-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/mitomap-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/mitomap-structural-variants-json.md",tags:[],version:"3.21",frontMatter:{}},s=[],m={toc:s},d="wrapper";function p(e){let{components:t,...a}=e;return(0,i.kt)(d,(0,n.Z)({},m,a,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "chromosome":"MT",\n "begin":3166,\n "end":14152,\n "variantType":"deletion",\n "reciprocalOverlap":0.18068,\n "annotationOverlap":0.42405\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,i.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"end"),(0,i.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"annotationOverlap"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")))))}p.isMDXComponent=!0},86314:(e,t,a)=>{a.r(t),a.d(t,{contentTitle:()=>s,default:()=>u,frontMatter:()=>l,metadata:()=>m,toc:()=>d});var n=a(87462),i=(a(67294),a(3905)),r=a(16006),o=a(96392);const l={title:"MITOMAP"},s=void 0,m={unversionedId:"data-sources/mitomap",id:"version-3.21/data-sources/mitomap",title:"MITOMAP",description:"Overview",source:"@site/versioned_docs/version-3.21/data-sources/mitomap.mdx",sourceDirName:"data-sources",slug:"/data-sources/mitomap",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/mitomap",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/mitomap.mdx",tags:[],version:"3.21",frontMatter:{title:"MITOMAP"},sidebar:"docs",previous:{title:"Mitochondrial Heteroplasmy",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/mito-heteroplasmy"},next:{title:"OMIM",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/omim"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"Scraping HTML Pages",id:"scraping-html-pages",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Allele Parsing",id:"allele-parsing",children:[],level:4}],level:3}],level:2},{value:"PostgreSQL Dump File",id:"postgresql-dump-file",children:[{value:"Example",id:"example-1",children:[],level:3},{value:"Parsing",id:"parsing-1",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URLs",id:"download-urls",children:[],level:2},{value:"JSON Output",id:"json-output",children:[{value:"Small Variants",id:"small-variants",children:[],level:3},{value:"Structural Variants",id:"structural-variants",children:[],level:3}],level:2}],p={toc:d},c="wrapper";function u(e){let{components:t,...l}=e;return(0,i.kt)(c,(0,n.Z)({},p,l,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"MITOMAP provides a compendium of polymorphisms and mutations in human mitochondrial DNA."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Lott, M.T., Leipzig, J.N., Derbeneva, O., Xie, H.M., Chalkia, D., Sarmady, M., Procaccio, V., and Wallace, D.C. mtDNA variation and analysis using MITOMAP and MITOMASTER. ",(0,i.kt)("em",{parentName:"p"},"Current Protocols in Bioinformatics")," 1(123):1.23.1-26 (2013). ",(0,i.kt)("a",{parentName:"p",href:"http://www.mitomap.org"},"http://www.mitomap.org")))),(0,i.kt)("h2",{id:"scraping-html-pages"},"Scraping HTML Pages"),(0,i.kt)("h3",{id:"example"},"Example"),(0,i.kt)("p",null,"MITOMAP is unique in that it doesn't offer the data in a downloadable format. As a result, the annotation content in Nirvana is scraped from the following MITOMAP pages:"),(0,i.kt)("ol",null,(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/PolymorphismsControl"},"mtDNA Control Region Sequence Variants")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/PolymorphismsCoding"},"mtDNA Coding Region & RNA Sequence Variants")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/MutationsRNA"},"Reported Mitochondrial DNA Base Substitution Diseases: rRNA/tRNA mutations")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/MutationsCodingControl"},"Reported Mitochondrial DNA Base Substitution Diseases: Coding and Control Region Point Mutations")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/DeletionsSingle"},"Reported mtDNA Deletions")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/InsertionsSimple"},"mtDNA Simple Insertions"))),(0,i.kt)("p",null,(0,i.kt)("img",{src:a(9798).Z})),(0,i.kt)("h3",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"Here's what the HTML code looks like:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-html"},"[\"582\",\"MT-TF\",\"Mitochondrial myopathy\",\"T582C\",\"tRNA Phe\",\"-\",\"+\",\"Reported\",\"72.90% \",\"0\",\"2\"],\n[\"583\",\"MT-TF\",\"MELAS / MM & EXIT\",\"G583A\",\"tRNA Phe\",\"-\",\"+\",\"Cfrm\",\"93.10% \",\"0\",\"3\"],\n")),(0,i.kt)("p",null,"We're mainly interested in the following columns (numbers indicate the HTML page above):"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"Position",(0,i.kt)("sup",null,"1,2,3,4")),(0,i.kt)("li",{parentName:"ul"},"Disease",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Nucleotide Change",(0,i.kt)("sup",null,"1,2")),(0,i.kt)("li",{parentName:"ul"},"Allele",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Homoplasmy",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Heteroplasmy",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Status",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"MitoTIP",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"GB Seqs FL(CR)",(0,i.kt)("sup",null,"1,2,3,4")),(0,i.kt)("li",{parentName:"ul"},"Deletion Junction",(0,i.kt)("sup",null,"5")),(0,i.kt)("li",{parentName:"ul"},"Insert (nt)",(0,i.kt)("sup",null,"6")),(0,i.kt)("li",{parentName:"ul"},"Insert Point (nt)",(0,i.kt)("sup",null,"6")),(0,i.kt)("li",{parentName:"ul"},"References/Curated References",(0,i.kt)("sup",null,"1,2,3,4"))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"MitoTIP")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The MitoTIP information is used to populate the ",(0,i.kt)("inlineCode",{parentName:"p"},"clinicalSignificance")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"scorePercentile"),' JSON keys. The "frequency alert" entries are skipped since it\'s not directly relevant to clinical significance.'))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Left alignment")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Many of the variants in MITOMAP have not been normalized. As part of our import procedure, we left align all insertions and deletions."))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Variant Enumeration")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Sometimes MITOMAP provides data that indicates that multiple values have been observed. Some examples of this are ",(0,i.kt)("inlineCode",{parentName:"p"},"C-C(2-8)")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"A-AC or ACC"),". Alternate alleles containing IUPAC ambiguity codes are similarly enumerated."))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Inversions")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"MITOMAP inversions are currently treated as MNVs."))),(0,i.kt)("h4",{id:"allele-parsing"},"Allele Parsing"),(0,i.kt)("p",null,"The following MITOMAP allele parsing conventions are supported:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"C123T"),(0,i.kt)("li",{parentName:"ul"},"16021_16022del"),(0,i.kt)("li",{parentName:"ul"},"8042del2"),(0,i.kt)("li",{parentName:"ul"},"C9537insC"),(0,i.kt)("li",{parentName:"ul"},"3902_3908invACCTTGC"),(0,i.kt)("li",{parentName:"ul"},"A-AC or ACC"),(0,i.kt)("li",{parentName:"ul"},"C-C(2-8)"),(0,i.kt)("li",{parentName:"ul"},"8042delAT")),(0,i.kt)("h2",{id:"postgresql-dump-file"},"PostgreSQL Dump File"),(0,i.kt)("h3",{id:"example-1"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"COPY mitomap.reference (id, authors, title, publication, editors, volume, number, pages, date, city, publisher, keywords, abstract, nlmid) FROM stdin;\n1 Albring, M., Griffith, J. and Attardi, G. Association of a protein structure of probable membrane derivation with HeLa cell mitochondrial DNA near its origin of replication Proceedings of the National Academy of Sciences of the United States of America . 74 4 1348-1352 1977 . . Deoxyribonucleoproteins; DNA Replication; DNA, Mitochondrial; Hela Cells; Membrane Proteins; Microscopy, Electron; Molecular Weight; Neoplasm Proteins; Protein Binding Almost all (about 95 percent) of the mitochondrial DNA molecules released by Triton X-100 lysis of HeLa cell mitochondria in the presence of 0.15 M salt are associated with a single protein-containing structure varying in appearance between a 10-20 nm knob and a 100-500 nm membrane-like patch. Analysis by high resolution electron microscopy and by polyacrylamide gel electrophoresis after cleavage of mitochondrial DNA with the endonucleases EcoRI, HindIII, and Hpa II has shown that the protein structure is attached to the DNA in the region of the D-loop, and probably near the origin of mitochondrial DNA replication. The data strongly suggest that HeLa cell mitochondrial DNA is attached in vivo to the inner mitochondrial membrane at or near the origin of replication, and that a membrane fragment of variable size remains associated with the DNA during the isolation. After sodium dodecyl sulfate extraction of mitochondrial DNA, a small 5-10 nm protein is found at the same site on a fraction of the mitochondrial DNA molecules. 266177\n2 Anderson, S., Bankier, A.T., Barrell, B.G., de Bruijn, M.H., Coulson, A.R., Drouin, J., Eperon, I.C., Nierlich, D.P., Roe, B.A., Sanger, F., Schreier, P.H., Smith, A.J., Staden, R., Young, I.G. Sequence and organization of the human mitochondrial genome Nature . 290 5806 457-465 1981 . . Base Sequence; Codon; DNA Replication; mtDNA; Evolution; Genes, Structural; Human; Nucleic Acid Precursors; Peptide Chain Initiation; Peptide Chain Termination; RNA, Ribosomal; RNA, Transfer; Transcription, Genetic The complete sequence of the 16,569-base pair human mitochondrial genome is presented. The genes for the 12S and 16S rRNAs, 22 tRNAs, cytochrome c oxidase subunits I, II and III, ATPase subunit 6, cytochrome b and eight other predicted protein coding genes have been located. The sequence shows extreme economy in that the genes have none or only a few noncoding bases between them, and in many cases the termination codons are not coded in the DNA but are created post- transcriptionally by polyadenylation of the mRNAs. 7219534\n")),(0,i.kt)("h3",{id:"parsing-1"},"Parsing"),(0,i.kt)("p",null,"From the PostgreSQL dump file, we're interested in parsing the mapping between reference IDs and the PubMed IDs:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"id"),(0,i.kt)("li",{parentName:"ul"},"nlmid")),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Why not use the PostgreSQL file for everything?")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Ideally we would use this file for parsing all of our data, but the schema contains 80+ tables and we haven't invested the time yet to see how the tables are linked together to produce the 6 main HTML pages that we're interested in."))),(0,i.kt)("h2",{id:"known-issues"},"Known Issues"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Duplicated records")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Multiple records describing the same nucleotide change are merged into the same record. If any conflicting information is found (homoplasmy, heteroplasmy, status, clinical significance, score percentile, end coordinate, variant type), an exception is thrown."),(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"For diseases and PubMed IDs, we take the union of the values in the duplicated records."),(0,i.kt)("li",{parentName:"ul"},"For full length GenBank sequences, we take the largest number from each of the duplicated records since it provides the strongest evidence for this variant.")))),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Skipped records")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Records that represent an alternate notation of the original variant are skipped. Similarly some variants with confusing alleles (T961delT+ / -C(n)ins) are also skipped."))),(0,i.kt)("h2",{id:"download-urls"},"Download URLs"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"see ",(0,i.kt)("a",{parentName:"li",href:"#example"},"HTML Pages")," above"),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/downloads/mitomap.dump.sql.gz"},"PostgreSQL dump file"))),(0,i.kt)("h2",{id:"json-output"},"JSON Output"),(0,i.kt)("h3",{id:"small-variants"},"Small Variants"),(0,i.kt)(r.default,{mdxType:"SmallJSON"}),(0,i.kt)("h3",{id:"structural-variants"},"Structural Variants"),(0,i.kt)(o.default,{mdxType:"SVJSON"}))}u.isMDXComponent=!0},9798:(e,t,a)=>{a.d(t,{Z:()=>n});const n=a.p+"assets/images/MITOMAP-d8d4dd35c2336fdba5fcced77ec438e6.png"}}]); \ No newline at end of file diff --git a/assets/js/02b71e27.eb75fe78.js b/assets/js/02b71e27.eb75fe78.js deleted file mode 100644 index cf30866d..00000000 --- a/assets/js/02b71e27.eb75fe78.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2439],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>g});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var p=a.createContext({}),u=function(t){var e=a.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},m=function(t){var e=u(t.components);return a.createElement(p.Provider,{value:e},t.children)},s="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},c=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),s=u(n),c=r,g=s["".concat(p,".").concat(c)]||s[c]||d[c]||l;return n?a.createElement(g,o(o({ref:e},m),{},{components:n})):a.createElement(g,o({ref:e},m))}));function g(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=c;var i={};for(var p in e)hasOwnProperty.call(e,p)&&(i[p]=e[p]);i.originalType=t,i[s]="string"==typeof t?t:r,o[1]=i;for(var u=2;u{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>s,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/1000Genomes-sv-json",id:"version-3.16/data-sources/1000Genomes-sv-json",title:"1000Genomes-sv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/1000Genomes-sv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-sv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/1000Genomes-sv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/1000Genomes-sv-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],u={toc:p},m="wrapper";function s(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},u,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":[\n {\n "chromosome":"1",\n "begin":1595369,\n "end":1612441,\n "variantType": "copy_number_variation",\n "id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",\n "allAn": 5008,\n "allAc": 2702,\n "allAf": 0.539537,\n "afrAf": 0.6052,\n "amrAf": 0.3675,\n "eurAf": 0.5357,\n "easAf": 0.5368,\n "sasAf": 0.5797,\n "reciprocalOverlap": 0.07555\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"range: 0 - 1.")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/03b0fa1d.522d42d7.js b/assets/js/03b0fa1d.522d42d7.js deleted file mode 100644 index 40304710..00000000 --- a/assets/js/03b0fa1d.522d42d7.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7185],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>N});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var p=a.createContext({}),u=function(t){var e=a.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},m=function(t){var e=u(t.components);return a.createElement(p.Provider,{value:e},t.children)},d="mdxType",g={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},k=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),d=u(n),k=r,N=d["".concat(p,".").concat(k)]||d[k]||g[k]||l;return n?a.createElement(N,o(o({ref:e},m),{},{components:n})):a.createElement(N,o({ref:e},m))}));function N(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=k;var i={};for(var p in e)hasOwnProperty.call(e,p)&&(i[p]=e[p]);i.originalType=t,i[d]="string"==typeof t?t:r,o[1]=i;for(var u=2;u{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>d,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/gnomad-structural-variants-json",id:"version-3.18/data-sources/gnomad-structural-variants-json",title:"gnomad-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/gnomad-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gnomad-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/gnomad-structural-variants-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],u={toc:p},m="wrapper";function d(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},u,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD-preview": [\n {\n "chromosome": "1",\n "begin": 40001,\n "end": 47200,\n "variantId": "gnomAD-SV_v2.1_DUP_1_1",\n "variantType": "duplication",\n "failedFilter": true,\n "allAf": 0.068963,\n "afrAf": 0.135694,\n "amrAf": 0.022876,\n "easAf": 0.01101,\n "eurAf": 0.007846,\n "othAf": 0.017544,\n "femaleAf": 0.065288,\n "maleAf": 0.07255,\n "allAc": 943,\n "afrAc": 866,\n "amrAc": 21,\n "easAc": 17,\n "eurAc": 37,\n "othAc": 2,\n "femaleAc": 442,\n "maleAc": 499,\n "allAn": 13674,\n "afrAn": 6382,\n "amrAn": 918,\n "easAn": 1544,\n "eurAn": 4716,\n "othAn": 114,\n "femaleAn": 6770,\n "maleAn": 6878,\n "allHc": 91,\n "afrHc": 90,\n "amrHc": 1,\n "easHc": 0,\n "eurHc": 0,\n "othHc": 55,\n "femaleHc": 44,\n "maleHc": 47,\n "reciprocalOverlap": 0.01839,\n "annotationOverlap": 0.16667\n }\n]\n\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"chromosome number")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"position interval start")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"position internal end")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"structural variant type")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantId"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"gnomAD ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all other populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the African super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Ad Mixed American super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the European super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all other populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for male population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for female population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the African super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Ad Mixed American super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the European super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all other populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for female population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for male population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the African / African American population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Latino population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the East Asian population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the European super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all other populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"boolean"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Reciprocal overlap. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Reciprocal overlap. Range: 0 - 1.0")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Note:")," Following fields are not available in ",(0,r.kt)("em",{parentName:"p"},"GRCh38")," because the source file does not contain this information:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAn")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAn")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/03f4f7a2.5b8d1654.js b/assets/js/03f4f7a2.5b8d1654.js deleted file mode 100644 index a894b85d..00000000 --- a/assets/js/03f4f7a2.5b8d1654.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5330],{3905:(e,t,r)=>{r.d(t,{Zo:()=>p,kt:()=>f});var n=r(67294);function a(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function o(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function c(e){for(var t=1;t=0||(a[r]=e[r]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(a[r]=e[r])}return a}var i=n.createContext({}),s=function(e){var t=n.useContext(i),r=t;return e&&(r="function"==typeof e?e(t):c(c({},t),e)),r},p=function(e){var t=s(e.components);return n.createElement(i.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},d=n.forwardRef((function(e,t){var r=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),u=s(r),d=a,f=u["".concat(i,".").concat(d)]||u[d]||m[d]||o;return r?n.createElement(f,c(c({ref:t},p),{},{components:r})):n.createElement(f,c({ref:t},p))}));function f(e,t){var r=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=r.length,c=new Array(o);c[0]=d;var l={};for(var i in t)hasOwnProperty.call(t,i)&&(l[i]=t[i]);l.originalType=e,l[u]="string"==typeof e?e:a,c[1]=l;for(var s=2;s{r.r(t),r.d(t,{contentTitle:()=>c,default:()=>u,frontMatter:()=>o,metadata:()=>l,toc:()=>i});var n=r(87462),a=(r(67294),r(3905));const o={},c=void 0,l={unversionedId:"data-sources/revel-json",id:"version-3.18/data-sources/revel-json",title:"revel-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/revel-json.md",sourceDirName:"data-sources",slug:"/data-sources/revel-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/revel-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/revel-json.md",tags:[],version:"3.18",frontMatter:{}},i=[],s={toc:i},p="wrapper";function u(e){let{components:t,...r}=e;return(0,a.kt)(p,(0,n.Z)({},s,r,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"revel":{ \n "score":0.027\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"score"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1.0")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/03fecf9a.be385e37.js b/assets/js/03fecf9a.be385e37.js deleted file mode 100644 index b319117f..00000000 --- a/assets/js/03fecf9a.be385e37.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5606],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>m});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},p=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},d="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},g=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,p=o(e,["components","mdxType","originalType","parentName"]),d=c(n),g=i,m=d["".concat(s,".").concat(g)]||d[g]||u[g]||r;return n?a.createElement(m,l(l({ref:t},p),{},{components:n})):a.createElement(m,l({ref:t},p))}));function m(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,l=new Array(r);l[0]=g;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[d]="string"==typeof e?e:i,l[1]=o;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>d,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={},l=void 0,o={unversionedId:"data-sources/clingen-dosage-json",id:"version-3.18/data-sources/clingen-dosage-json",title:"clingen-dosage-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/clingen-dosage-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-dosage-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/clingen-dosage-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/clingen-dosage-json.md",tags:[],version:"3.18",frontMatter:{}},s=[],c={toc:s},p="wrapper";function d(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clingenDosageSensitivityMap": [{\n "chromosome": "15",\n "begin": 30900686,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 0.33994\n},\n{\n "chromosome": "15",\n "begin": 31727418,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "dosage sensitivity unlikely",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 1\n}]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Field"),(0,i.kt)("th",{parentName:"tr",align:null},"Type"),(0,i.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clingenDosageSensitivityMap"),(0,i.kt)("td",{parentName:"tr",align:null},"object array"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"begin"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"end"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"haploinsufficiency"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"triplosensitivity"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"(same as haploinsufficiency)\xa0")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"haploinsufficiency and triplosensitivity")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no evidence to suggest that dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"little evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"emerging evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"gene associated with autosomal recessive phenotype"),(0,i.kt)("li",{parentName:"ul"},"dosage sensitivity unlikely")))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/0509e6a2.8e530be0.js b/assets/js/0509e6a2.8e530be0.js deleted file mode 100644 index c43d8d2b..00000000 --- a/assets/js/0509e6a2.8e530be0.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8943],{3905:(e,t,a)=>{a.d(t,{Zo:()=>d,kt:()=>b});var n=a(67294);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function i(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function l(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var s=n.createContext({}),c=function(e){var t=n.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):l(l({},t),e)),a},d=function(e){var t=c(e.components);return n.createElement(s.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},p=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,d=o(e,["components","mdxType","originalType","parentName"]),u=c(a),p=r,b=u["".concat(s,".").concat(p)]||u[p]||m[p]||i;return a?n.createElement(b,l(l({ref:t},d),{},{components:a})):n.createElement(b,l({ref:t},d))}));function b(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=a.length,l=new Array(i);l[0]=p;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[u]="string"==typeof e?e:r,l[1]=o;for(var c=2;c{a.r(t),a.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>i,metadata:()=>o,toc:()=>s});var n=a(87462),r=(a(67294),a(3905));const i={title:"SAUtils"},l=void 0,o={unversionedId:"utilities/sautils",id:"version-3.21/utilities/sautils",title:"SAUtils",description:"Overview",source:"@site/versioned_docs/version-3.21/utilities/sautils.mdx",sourceDirName:"utilities",slug:"/utilities/sautils",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/utilities/sautils",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/utilities/sautils.mdx",tags:[],version:"3.21",frontMatter:{title:"SAUtils"},sidebar:"docs",previous:{title:"Jasix",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/utilities/jasix"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"The SAUtils Menu",id:"the-sautils-menu",children:[],level:2},{value:"Output File Formats",id:"output-file-formats",children:[],level:2}],c={toc:s},d="wrapper";function u(e){let{components:t,...a}=e;return(0,r.kt)(d,(0,n.Z)({},c,a,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"SAUtils is a utility tool that creates binary supplementary annotation files (",(0,r.kt)("em",{parentName:"p"},".nsa, "),".gsa, ",(0,r.kt)("em",{parentName:"p"},".npd, "),".nsi, etc.) from original data files (e.g. VCFs, TSVs, XML, HTML, etc.) for various data sources (e.g. ClinVar, dbSNP, gnomAD, etc.). These binary files can be fed into the Nirvana Annotation engine to provide supplementary annotations in the output."),(0,r.kt)("h2",{id:"the-sautils-menu"},"The SAUtils Menu"),(0,r.kt)("p",null,"SAUtils supports building binary files for many data sources. The help menu lists them out in the form of sub-commands."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Nirvana/bin/Release/net6.0/SAUtils.dll\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0\n---------------------------------------------------------------------------\n\nUtilities focused on supplementary annotation\n\nUSAGE: dotnet SAUtils.dll [options]\n\nCOMMAND: AutoDownloadGenerate auto download and generate Omim, Clinvar, Clingen\n AaCon create AA conservation database\n ancestralAllele create Ancestral allele database from 1000Genomes data\n ClinGen create ClinGen database\n Downloader download ClinGen database\n clinvar create ClinVar database\n concat merge multiple NSA files for the same data source having non-overlapping regions\n Cosmic create COSMIC database\n CosmicSv create COSMIC SV database\n CosmicFusion create COSMIC gene fusion database\n CosmicCGC create COSMIC cancer gene census database\n CustomGene create custom gene annotation database\n CustomVar create custom variant annotation database\n Dann create DANN database\n Dbsnp create dbSNP database\n Dgv create DGV database\n DiseaseValidity create disease validity database\n DosageMapRegions create dosage map regions\n DosageSensitivity create dosage sensitivity database\n DownloadOmim download OMIM database\n ExtractMiniSA extracts mini SA\n ExtractMiniXml extracts mini XML (ClinVar)\n FilterSpliceNetTsv filter SpliceNet predictions\n FusionCatcher create FusionCatcher database\n Gerp create GERP conservation database\n GlobalMinor create global minor allele database\n Gnomad create gnomAD database\n Gnomad-lcr create gnomAD low complexity region database\n GnomadGeneScores create gnomAD gene scores database\n GnomadSV create gnomAD structural variant database\n Index edit an index file\n MitoHet create mitochondrial Heteroplasmy database\n MitomapSvDb create MITOMAP structural variants database\n MitomapVarDb create MITOMAP small variants database\n Omim create OMIM database\n OneKGen create 1000 Genome small variants database\n OneKGenSv create 1000 Genomes structural variants database\n OneKGenSvVcfToBed convert 1000 Genomes structural variants VCF file into a BED-like file\n PhyloP create PhyloP database\n PrimateAi create PrimateAI database\n RefMinor create Reference Minor database from 1000 Genome \n RemapWithDbsnp remap a VCF file given source and destination rsID mappings\n Revel create REVEL database\n SpliceAi create SpliceAI database\n TopMed create TOPMed database\n Gme create GME Variome database\n Decipher create Decipher database\n")),(0,r.kt)("p",null,"You can get further detailed help for each sub-command by typing in the subcommand. For example:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Nirvana/bin/Release/net6.0/SAUtils.dll clinvar\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll clinvar [options]\nCreates a supplementary database with ClinVar annotations\n\nOPTIONS:\n --ref, -r compressed reference sequence file\n --rcv, -i ClinVar Full release XML file\n --vcv, -c ClinVar Variation release XML file\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n")),(0,r.kt)("p",null,"More detailed instructions about each sub-command can be found in documentation of respective data sources."),(0,r.kt)("h2",{id:"output-file-formats"},"Output File Formats"),(0,r.kt)("p",null,"The format of the binary file SAUtils produce depend on the type of annotation data represented in that file (e.g. small variant vs. structural variants vs. genes)."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"File Extension"),(0,r.kt)("th",{parentName:"tr",align:null},"Description"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".nsa"),(0,r.kt)("td",{parentName:"tr",align:null},"Small variant annotations (e.g. SNV, insertions, deletions, etc.)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".gsa"),(0,r.kt)("td",{parentName:"tr",align:null},"Compact variant annotations (e.g. SNV, insertions, deletions, etc.)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".idx"),(0,r.kt)("td",{parentName:"tr",align:null},"Index file")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".nsi"),(0,r.kt)("td",{parentName:"tr",align:null},"Interval annotations (e.g. SV, CNVs, intervals)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".nga"),(0,r.kt)("td",{parentName:"tr",align:null},"Gene annotations")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".npd"),(0,r.kt)("td",{parentName:"tr",align:null},"Conservation scores")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".rma"),(0,r.kt)("td",{parentName:"tr",align:null},"Reference Minor allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".gfs"),(0,r.kt)("td",{parentName:"tr",align:null},"Gene fusions source")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".gfj"),(0,r.kt)("td",{parentName:"tr",align:null},"Gene fusions JSON")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".schema"),(0,r.kt)("td",{parentName:"tr",align:null},"JSON schema")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/05c1f6d3.420f1617.js b/assets/js/05c1f6d3.420f1617.js deleted file mode 100644 index dfd44f52..00000000 --- a/assets/js/05c1f6d3.420f1617.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6625],{3905:(_,E,t)=>{t.d(E,{Zo:()=>N,kt:()=>o});var e=t(67294);function A(_,E,t){return E in _?Object.defineProperty(_,E,{value:t,enumerable:!0,configurable:!0,writable:!0}):_[E]=t,_}function n(_,E){var t=Object.keys(_);if(Object.getOwnPropertySymbols){var e=Object.getOwnPropertySymbols(_);E&&(e=e.filter((function(E){return Object.getOwnPropertyDescriptor(_,E).enumerable}))),t.push.apply(t,e)}return t}function a(_){for(var E=1;E=0||(A[t]=_[t]);return A}(_,E);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(_);for(e=0;e=0||Object.prototype.propertyIsEnumerable.call(_,t)&&(A[t]=_[t])}return A}var M=e.createContext({}),l=function(_){var E=e.useContext(M),t=E;return _&&(t="function"==typeof _?_(E):a(a({},E),_)),t},N=function(_){var E=l(_.components);return e.createElement(M.Provider,{value:E},_.children)},R="mdxType",i={inlineCode:"code",wrapper:function(_){var E=_.children;return e.createElement(e.Fragment,{},E)}},F=e.forwardRef((function(_,E){var t=_.components,A=_.mdxType,n=_.originalType,M=_.parentName,N=r(_,["components","mdxType","originalType","parentName"]),R=l(t),F=A,o=R["".concat(M,".").concat(F)]||R[F]||i[F]||n;return t?e.createElement(o,a(a({ref:E},N),{},{components:t})):e.createElement(o,a({ref:E},N))}));function o(_,E){var t=arguments,A=E&&E.mdxType;if("string"==typeof _||A){var n=t.length,a=new Array(n);a[0]=F;var r={};for(var M in E)hasOwnProperty.call(E,M)&&(r[M]=E[M]);r.originalType=_,r[R]="string"==typeof _?_:A,a[1]=r;for(var l=2;l{t.r(E),t.d(E,{contentTitle:()=>a,default:()=>R,frontMatter:()=>n,metadata:()=>r,toc:()=>M});var e=t(87462),A=(t(67294),t(3905));const n={},a=void 0,r={unversionedId:"data-sources/gnomad-structural-variants-data_description",id:"version-3.18/data-sources/gnomad-structural-variants-data_description",title:"gnomad-structural-variants-data_description",description:"Bed Example",source:"@site/versioned_docs/version-3.18/data-sources/gnomad-structural-variants-data_description.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-structural-variants-data_description",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gnomad-structural-variants-data_description",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/gnomad-structural-variants-data_description.md",tags:[],version:"3.18",frontMatter:{}},M=[{value:"Bed Example",id:"bed-example",children:[],level:4},{value:"TSV Example",id:"tsv-example",children:[],level:4},{value:"Structural Variant Type Mapping",id:"structural-variant-type-mapping",children:[],level:4}],l={toc:M},N="wrapper";function R(_){let{components:E,...t}=_;return(0,A.kt)(N,(0,e.Z)({},l,t,{components:E,mdxType:"MDXLayout"}),(0,A.kt)("h4",{id:"bed-example"},"Bed Example"),(0,A.kt)("p",null,"The bed file was obtained from original source for GRCh37"),(0,A.kt)("pre",null,(0,A.kt)("code",{parentName:"pre",className:"language-scss"},"#chrom start end name svtype ALGORITHMS BOTHSIDES_SUPPORT CHR2 CPX_INTERVALS CPX_TYPE END2 ENDEVIDENCE HIGH_SR_BACKGROUND PCRPLUS_DEPLETED PESR_GT_OVERDISPERSION POS2 PROTEIN_CODING__COPY_GAIN PROTEIN_CODING__DUP_LOF PROTEIN_CODING__DUP_PARTIAL PROTEIN_CODING__INTERGENIC PROTEIN_CODING__INTRONIC PROTEIN_CODING__INV_SPAN PROTEIN_CODING__LOF PROTEIN_CODING__MSV_EXON_OVR PROTEIN_CODING__NEAREST_TSS PROTEIN_CODING__PROMOTER PROTEIN_CODING__UTR SOURCE STRANDS SVLEN SVTYPE UNRESOLVED_TYPE UNSTABLE_AF_PCRPLUS VARIABLE_ACROSS_BATCHES AN AC AF N_BI_GENOS N_HOMREF N_HET N_HOMALT FREQ_HOMREF FREQ_HET FREQ_HOMALT MALE_AN MALE_AC MALE_AF MALE_N_BI_GENOS MALE_N_HOMREF MALE_N_HET MALE_N_HOMALT MALE_FREQ_HOMREF MALE_FREQ_HET MALE_FREQ_HOMALT MALE_N_HEMIREF MALE_N_HEMIALT MALE_FREQ_HEMIREF MALE_FREQ_HEMIALT PAR FEMALE_AN FEMALE_AC FEMALE_AF FEMALE_N_BI_GENOS FEMALE_N_HOMREF FEMALE_N_HET FEMALE_N_HOMALT FEMALE_FREQ_HOMREF FEMALE_FREQ_HET FEMALE_FREQ_HOMALT POPMAX_AF AFR_AN AFR_AC AFR_AF AFR_N_BI_GENOS AFR_N_HOMREF AFR_N_HET AFR_N_HOMALT AFR_FREQ_HOMREF AFR_FREQ_HEAFR_FREQ_HOMALT AFR_MALE_AN AFR_MALE_AC AFR_MALE_AF AFR_MALE_N_BI_GENOS AFR_MALE_N_HOMREF AFR_MALE_N_HET AFR_MALE_N_HOMALT AFR_MALE_FREQ_HOMREF AFR_MALE_FREQ_HET AFR_MALE_FREQ_HOMALT AFR_MALE_N_HEMIREF AFR_MALE_N_HEMIALT AFR_MALE_FREQ_HEMIREF AFR_MALE_FREQ_HEMIALT AFR_FEMALE_AN AFR_FEMALE_AC AFR_FEMALE_AF AFR_FEMALE_N_BI_GENOS AFR_FEMALE_N_HOMREF AFR_FEMALE_N_HET AFR_FEMALE_N_HOMALT AFR_FEMALE_FREQ_HOMREF AFR_FEMALE_FREQ_HET AFR_FEMALE_FREQ_HOMALT AMR_AN AMR_AC AMR_AF AMR_N_BI_GENOS AMR_N_HOMREF AMR_N_HET AMR_N_HOMALT AMR_FREQ_HOMREF AMR_FREQ_HET AMR_FREQ_HOMALT AMR_MALE_AN AMR_MALE_AC AMR_MALE_AF AMR_MALE_N_BI_GENOS AMR_MALE_N_HOMREF AMR_MALE_N_HET AMR_MALE_N_HOMALT AMR_MALE_FREQ_HOMREF AMR_MALE_FREQ_HET AMR_MALE_FREQ_HOMALT AMR_MALE_N_HEMIREF AMR_MALE_N_HEMIALT AMR_MALE_FREQ_HEMIREF AMR_MALE_FREQ_HEMIALT AMR_FEMALE_AN AMR_FEMALE_AC AMR_FEMALE_AF AMR_FEMALE_N_BI_GENOS AMR_FEMALE_N_HOMREF AMR_FEMALE_N_HET AMR_FEMALE_N_HOMALT AMR_FEMALE_FREQ_HOMREF AMR_FEMALE_FREQ_HET AMR_FEMALE_FREQ_HOMALT EAS_AN EAS_AC EAS_AF EAS_N_BI_GENOS EAS_N_HOMREF EAS_N_HET EAS_N_HOMALT EAS_FREQ_HOMREF EAS_FREQ_HET EAS_FREQ_HOMALT EAS_MALE_AN EAS_MALE_AC EAS_MALE_AF EAS_MALE_N_BI_GENOS EAS_MALE_N_HOMREF EAS_MALE_N_HET EAS_MALE_N_HOMALT EAS_MALE_FREQ_HOMREF EAS_MALE_FREQ_HET EAS_MALE_FREQ_HOMALT EAS_MALE_N_HEMIREF EAS_MALE_N_HEMIALT EAS_MALE_FREQ_HEMIREF EAS_MALE_FREQ_HEMIALT EAS_FEMALE_AN EAS_FEMALE_AC EAS_FEMALE_AF EAS_FEMALE_N_BI_GENOS EAS_FEMALE_N_HOMREF EAS_FEMALE_N_HET EAS_FEMALE_N_HOMALT EAS_FEMALE_FREQ_HOMREF EAS_FEMALE_FREQ_HET EAS_FEMALE_FREQ_HOMALT EUR_AN EUR_AC EUR_AF EUR_N_BI_GENOS EUR_N_HOMREF EUR_N_HET EUR_N_HOMALT EUR_FREQ_HOMREF EUR_FREQ_HET EUR_FREQ_HOMALT EUR_MALE_AN EUR_MALE_AC EUR_MALE_AF EUR_MALE_N_BI_GENOS EUR_MALE_N_HOMREF EUR_MALE_N_HET EUR_MALE_N_HOMALT EUR_MALE_FREQ_HOMREF EUR_MALE_FREQ_HET EUR_MALE_FREQ_HOMALT EUR_MALE_N_HEMIREF EUR_MALE_N_HEMIALT EUR_MALE_FREQ_HEMIREF EUR_MALE_FREQ_HEMIALT EUR_FEMALE_AN EUR_FEMALE_AC EUR_FEMALE_AF EUR_FEMALE_N_BI_GENOS EUR_FEMALE_N_HOMREF EUR_FEMALE_N_HET EUR_FEMALE_N_HOMALT EUR_FEMALE_FREQ_HOMREF EUR_FEMALE_FREQ_HET EUR_FEMALE_FREQ_HOMALT OTH_AN OTH_AC OTH_AF OTH_N_BI_GENOS OTH_N_HOMREF OTH_N_HET OTH_N_HOMALT OTH_FREQ_HOMREF OTH_FREQ_HET OTH_FREQ_HOMALT OTH_MALE_AN OTH_MALE_AC OTH_MALE_AF OTH_MALE_N_BI_GENOS OTH_MALE_N_HOMREF OTH_MALE_N_HET OTH_MALE_N_HOMALT OTH_MALE_FREQ_HOMREF OTH_MALE_FREQ_HET OTH_MALE_FREQ_HOMALT OTH_MALE_N_HEMIREF OTH_MALE_N_HEMIALT OTH_MALE_FREQ_HEMIREF OTH_MALE_FREQ_HEMIALT OTH_FEMALE_AN OTH_FEMALE_AC OTH_FEMALE_AF OTH_FEMALE_N_BI_GENOS OTH_FEMALE_N_HOMREF OTH_FEMALE_N_HET OTH_FEMALE_N_HOMALT OTH_FEMALE_FREQ_HOMREF OTH_FEMALE_FREQ_HET OTH_FEMALE_FREQ_HOMALT FILTER\n1 10641 10642 gnomAD-SV_v2.1_BND_1_1 BND manta False 15 NA NA 10643 10643 PE,SR False False True 10642 NA NA NA False NA NA NA NA NA NA NA NA NA -1 BND SINGLE_ENDER_-- False False 21366 145 0.006785999983549118 10683 10543 135 5 0.9868950247764587 0.012636899948120117 0.00046803298755548894 10866 69 0.00634999992325902 5433 5366 65 2 0.987667977809906 0.011963900178670883 0.000368120992789045 NA NA NA NA False 10454 76 0.007269999943673615227 5154 70 3 0.9860339760780334 0.013392000459134579 0.0005739430198445916 0.015956999734044075 93972 0.007660999894142151 4699 4629 68 2 0.9851030111312866 0.014471200294792652 0.0004256220126990229 5154 33 0.006403000093996525 2577 2544 33 0 0.9871940016746521 0.012805599719285965 0.0NA NA NA NA 4232 39 0.009216000325977802 2116 2079 35 2 0.9825140237808228 0.01654059998691082 0.0009451800142414868 1910 7 0.003664999967440963 955 949 5 1 0.9937170147895813 0.00523559981957078 0.001047119963914156 950 4 0.004211000166833401 475 472 2 1 0.9936839938163757 0.00421052984893322 0.0021052600350230932 NA NA NA NA 952 3 0.0031510000117123127 476473 3 0 0.9936969876289368 0.006302520167082548 0.0 2296 31 0.013501999899744987 1148 11131 0 0.9729970097541809 0.02700350061058998 0.0 1312 13 0.009909000247716904 656 643 13 0.9801830053329468 0.01981710083782673 0.0 NA NA NA NA 976 18 0.018442999571561813 488470 18 0 0.9631149768829346 0.03688519820570946 0.0 7574 32 0.004224999807775021 3787 37528 2 0.9920780062675476 0.007393720094114542 0.0005281229969114065 3374 17 0.005038999952375889 1681671 15 1 0.9905160069465637 0.008891520090401173 0.000592768017668277 NA NA NA NA 41815 0.003587000072002411 2091 2077 13 1 0.9933050274848938 0.006217120215296745 0.00047823999193497188 3 0.015956999734044075 94 91 3 0 0.968084990978241 0.03191490098834038 0.0 76 0.026316000148653984 38 36 2 0 0.9473680257797241 0.05263160169124603 0.0 NA NA NA NA 112 1 0.008929000236093998 56 55 1 0 0.982142984867096 0.017857100814580917 0.0UNRESOLVED \n")),(0,A.kt)("h4",{id:"tsv-example"},"TSV Example"),(0,A.kt)("p",null,"The tsv was obtained from lifted over dataset created by dbVar for GRCh38"),(0,A.kt)("pre",null,(0,A.kt)("code",{parentName:"pre",className:"language-scss"},"#variant_call_accession variant_call_id variant_call_type experiment_id sample_id sampleset_id assembly chrcontig outer_start start inner_start inner_stop stop outer_stop insertion_length variant_region_acc variant_region_id copy_number description validation zygosity origin phenotype hgvs_name placement_method placement_rank placements_per_assembly remap_alignment remap_best_within_cluster remap_coverage remap_diff_chr remap_failure_code allele_count allele_frequency allele_number\nnssv15777856 gnomAD-SV_v2.1_CNV_10_564_alt_1 copy number variation 1 1 GRCh38.p12 10 736806 738184 nsv4039284 10__782746___784124______GRCh37.p13_copy_number_variation 0 Remapped BestAvailable Single First Pass 0 1 AC=21,AFR_AC=10,AMR_AC=9,EAS_AC=0,EUR_AC=2,OTH_AC=0AF=0.038889,AFR_AF=0.044643,AMR_AF=0.03913,EAS_AF=0,EUR_AF=0.023256,OTH_AF=0 AN=540,AFR_AN=224,AMR_AN=230,EAS_AN=0,EUR_AN=86,OTH_AN=0\n")),(0,A.kt)("h4",{id:"structural-variant-type-mapping"},"Structural Variant Type Mapping"),(0,A.kt)("p",null,"The source files represented the structural variants with keys using various naming conventions.\nIn the Nirvana JSON output, these keys will be mapped according to the following. "),(0,A.kt)("table",null,(0,A.kt)("thead",{parentName:"table"},(0,A.kt)("tr",{parentName:"thead"},(0,A.kt)("th",{parentName:"tr",align:null},"Nirvana JSON SV Type Key"),(0,A.kt)("th",{parentName:"tr",align:null},"GRCh37 Source SV Type Key"),(0,A.kt)("th",{parentName:"tr",align:null},"GRCh38 Source SV Type Key"))),(0,A.kt)("tbody",{parentName:"table"},(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"copy_number_variation"),(0,A.kt)("td",{parentName:"tr",align:null}),(0,A.kt)("td",{parentName:"tr",align:null},"copy number variation")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"deletion"),(0,A.kt)("td",{parentName:"tr",align:null},"DEL, CN=0"),(0,A.kt)("td",{parentName:"tr",align:null},"deletion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"duplication"),(0,A.kt)("td",{parentName:"tr",align:null},"DUP"),(0,A.kt)("td",{parentName:"tr",align:null},"duplication")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"insertion"),(0,A.kt)("td",{parentName:"tr",align:null},"INS"),(0,A.kt)("td",{parentName:"tr",align:null},"insertion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"inversion"),(0,A.kt)("td",{parentName:"tr",align:null},"INV"),(0,A.kt)("td",{parentName:"tr",align:null},"inversion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,A.kt)("td",{parentName:"tr",align:null},"INS:ME"),(0,A.kt)("td",{parentName:"tr",align:null},"mobile element insertion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,A.kt)("td",{parentName:"tr",align:null},"INS:ME:ALU"),(0,A.kt)("td",{parentName:"tr",align:null},"alu insertion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,A.kt)("td",{parentName:"tr",align:null},"INS:ME:LINE1"),(0,A.kt)("td",{parentName:"tr",align:null},"line1 insertion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,A.kt)("td",{parentName:"tr",align:null},"INS:ME:SVA"),(0,A.kt)("td",{parentName:"tr",align:null},"sva insertion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"structural alteration"),(0,A.kt)("td",{parentName:"tr",align:null}),(0,A.kt)("td",{parentName:"tr",align:null},"sequence alteration")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"complex_structural_alteration"),(0,A.kt)("td",{parentName:"tr",align:null},"CPX"),(0,A.kt)("td",{parentName:"tr",align:null})))))}R.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/0654e1ab.c1b45991.js b/assets/js/0654e1ab.c1b45991.js deleted file mode 100644 index 57f24784..00000000 --- a/assets/js/0654e1ab.c1b45991.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1931],{3905:(e,t,a)=>{a.d(t,{Zo:()=>d,kt:()=>b});var n=a(67294);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function i(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function l(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var s=n.createContext({}),c=function(e){var t=n.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):l(l({},t),e)),a},d=function(e){var t=c(e.components);return n.createElement(s.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},p=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,d=o(e,["components","mdxType","originalType","parentName"]),u=c(a),p=r,b=u["".concat(s,".").concat(p)]||u[p]||m[p]||i;return a?n.createElement(b,l(l({ref:t},d),{},{components:a})):n.createElement(b,l({ref:t},d))}));function b(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=a.length,l=new Array(i);l[0]=p;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[u]="string"==typeof e?e:r,l[1]=o;for(var c=2;c{a.r(t),a.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>i,metadata:()=>o,toc:()=>s});var n=a(87462),r=(a(67294),a(3905));const i={title:"SAUtils"},l=void 0,o={unversionedId:"utilities/sautils",id:"version-3.18/utilities/sautils",title:"SAUtils",description:"Overview",source:"@site/versioned_docs/version-3.18/utilities/sautils.mdx",sourceDirName:"utilities",slug:"/utilities/sautils",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/utilities/sautils",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/utilities/sautils.mdx",tags:[],version:"3.18",frontMatter:{title:"SAUtils"},sidebar:"docs",previous:{title:"Jasix",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/utilities/jasix"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"The SAUtils Menu",id:"the-sautils-menu",children:[],level:2},{value:"Output File Formats",id:"output-file-formats",children:[],level:2}],c={toc:s},d="wrapper";function u(e){let{components:t,...a}=e;return(0,r.kt)(d,(0,n.Z)({},c,a,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"SAUtils is a utility tool that creates binary supplementary annotation files (",(0,r.kt)("em",{parentName:"p"},".nsa, "),".gsa, ",(0,r.kt)("em",{parentName:"p"},".npd, "),".nsi, etc.) from original data files (e.g. VCFs, TSVs, XML, HTML, etc.) for various data sources (e.g. ClinVar, dbSNP, gnomAD, etc.). These binary files can be fed into the Nirvana Annotation engine to provide supplementary annotations in the output."),(0,r.kt)("h2",{id:"the-sautils-menu"},"The SAUtils Menu"),(0,r.kt)("p",null,"SAUtils supports building binary files for many data sources. The help menu lists them out in the form of sub-commands."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Nirvana/bin/Debug/netcoreapp3.1/SAUtils.dll\n---------------------------------------------------------------------------\nSAUtils (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.18.0\n---------------------------------------------------------------------------\n\nUtilities focused on supplementary annotation\n\nUSAGE: dotnet SAUtils.dll [options]\n\nCOMMAND: AaCon create AA conservation database\n ancestralAllele create Ancestral allele database from 1000Genomes data\n ClinGen create ClinGen database\n clinvar create ClinVar database\n concat merge multiple NSA files for the same data source having non-overlapping regions\n Cosmic create COSMIC database\n CosmicSv create COSMIC SV database\n CosmicFusion create COSMIC gene fusion database\n CustomGene create custom gene annotation database\n CustomVar create custom variant annotation database\n Dann create DANN database\n Dbsnp create dbSNP database\n Dgv create DGV database\n DiseaseValidity create disease validity database\n DosageMapRegions create dosage map regions\n DosageSensitivity create dosage sensitivity database\n DownloadOmim download OMIM database\n ExacScores create ExAC gene scores database\n ExtractMiniSA extracts mini SA\n ExtractMiniXml extracts mini XML (ClinVar)\n FilterSpliceNetTsv filter SpliceNet predictions\n FusionCatcher create FusionCatcher database\n Gerp create GERP conservation database\n GlobalMinor create global minor allele database\n GME Variome create GME Variome database\n Gnomad create gnomAD database\n Gnomad-lcr create gnomAD low complexity region database\n GnomadGeneScores create gnomAD gene scores database\n Index edit an index file\n MitoHet create mitochondrial Heteroplasmy database\n MitomapSvDb create MITOMAP structural variants database\n MitomapVarDb create MITOMAP small variants database\n Omim create OMIM database\n OneKGen create 1000 Genome small variants database\n OneKGenSv create 1000 Genomes structural variants database\n OneKGenSvVcfToBed convert 1000 Genomes structural variants VCF file into a BED-like file\n PhyloP create PhyloP database\n PrimateAi create PrimateAI database\n RefMinor create Reference Minor database from 1000 Genome\n RemapWithDbsnp remap a VCF file given source and destination rsID mappings\n Revel create REVEL database\n SpliceAi create SpliceAI database\n TopMed create TOPMed database\n")),(0,r.kt)("p",null,"You can get further detailed help for each sub-command by typing in the subcommand. For example:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Nirvana/bin/Debug/netcoreapp3.1/SAUtils.dll clinvar\n---------------------------------------------------------------------------\nSAUtils (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.18.0\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll clinvar [options]\nCreates a supplementary database with ClinVar annotations\n\nOPTIONS:\n --ref, -r compressed reference sequence file\n --rcv, -i ClinVar Full release XML file\n --vcv, -c ClinVar Variation release XML file\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n")),(0,r.kt)("p",null,"More detailed instructions about each sub-command can be found in documentation of respective data sources."),(0,r.kt)("h2",{id:"output-file-formats"},"Output File Formats"),(0,r.kt)("p",null,"The format of the binary file SAUtils produce depend on the type of annotation data represented in that file (e.g. small variant vs. structural variants vs. genes)."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"File Extension"),(0,r.kt)("th",{parentName:"tr",align:null},"Description"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".nsa"),(0,r.kt)("td",{parentName:"tr",align:null},"Small variant annotations (e.g. SNV, insertions, deletions, etc.)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".gsa"),(0,r.kt)("td",{parentName:"tr",align:null},"Compact variant annotations (e.g. SNV, insertions, deletions, etc.)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".idx"),(0,r.kt)("td",{parentName:"tr",align:null},"Index file")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".nsi"),(0,r.kt)("td",{parentName:"tr",align:null},"Interval annotations (e.g. SV, CNVs, intervals)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".nga"),(0,r.kt)("td",{parentName:"tr",align:null},"Gene annotations")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".npd"),(0,r.kt)("td",{parentName:"tr",align:null},"Conservation scores")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".rma"),(0,r.kt)("td",{parentName:"tr",align:null},"Reference Minor allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".gfs"),(0,r.kt)("td",{parentName:"tr",align:null},"Gene fusions source")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".gfj"),(0,r.kt)("td",{parentName:"tr",align:null},"Gene fusions JSON")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".schema"),(0,r.kt)("td",{parentName:"tr",align:null},"JSON schema")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/0716d66c.bb39a370.js b/assets/js/0716d66c.bb39a370.js deleted file mode 100644 index 3e6a1f94..00000000 --- a/assets/js/0716d66c.bb39a370.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5442,7185,5330,5606,8001,4680,1562,6729,7454,8113,1865,6923,6446,9653,6754,6698,9351,3499,880,6776,3130,2031,4226],{3905:(t,e,a)=>{a.d(e,{Zo:()=>m,kt:()=>c});var n=a(67294);function r(t,e,a){return e in t?Object.defineProperty(t,e,{value:a,enumerable:!0,configurable:!0,writable:!0}):t[e]=a,t}function l(t,e){var a=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),a.push.apply(a,n)}return a}function i(t){for(var e=1;e=0||(r[a]=t[a]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,a)&&(r[a]=t[a])}return r}var p=n.createContext({}),d=function(t){var e=n.useContext(p),a=e;return t&&(a="function"==typeof t?t(e):i(i({},e),t)),a},m=function(t){var e=d(t.components);return n.createElement(p.Provider,{value:e},t.children)},s="mdxType",u={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},g=n.forwardRef((function(t,e){var a=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,m=o(t,["components","mdxType","originalType","parentName"]),s=d(a),g=r,c=s["".concat(p,".").concat(g)]||s[g]||u[g]||l;return a?n.createElement(c,i(i({ref:e},m),{},{components:a})):n.createElement(c,i({ref:e},m))}));function c(t,e){var a=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=a.length,i=new Array(l);i[0]=g;var o={};for(var p in e)hasOwnProperty.call(e,p)&&(o[p]=e[p]);o.originalType=t,o[s]="string"==typeof t?t:r,i[1]=o;for(var d=2;d{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/1000Genomes-snv-json",id:"version-3.18/data-sources/1000Genomes-snv-json",title:"1000Genomes-snv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/1000Genomes-snv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-snv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/1000Genomes-snv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/1000Genomes-snv-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":{\n "allAf":0.200879,\n "afrAf":0.210287,\n "amrAf":0.139769,\n "easAf":0.275794,\n "eurAf":0.181909,\n "sasAf":0.173824,\n "allAn":5008,\n "afrAn":1322,\n "amrAn":694,\n "easAn":1008,\n "eurAn":1006,\n "sasAn":978,\n "allAc":1006,\n "afrAc":278,\n "amrAc":97,\n "easAc":278,\n "eurAc":183,\n "sasAc":170\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the African super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the African super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the Ad Mixed American super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the Ad Mixed American super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the East Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the East Asian super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the European super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the European super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the South Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the South Asian super population. Non-zero integer.")))))}s.isMDXComponent=!0},17656:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/1000Genomes-sv-json",id:"version-3.18/data-sources/1000Genomes-sv-json",title:"1000Genomes-sv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/1000Genomes-sv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-sv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/1000Genomes-sv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/1000Genomes-sv-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":[\n {\n "chromosome":"1",\n "begin":1595369,\n "end":1612441,\n "variantType": "copy_number_variation",\n "id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",\n "allAn": 5008,\n "allAc": 2702,\n "allAf": 0.539537,\n "afrAf": 0.6052,\n "amrAf": 0.3675,\n "eurAf": 0.5357,\n "easAf": 0.5368,\n "sasAf": 0.5797,\n "reciprocalOverlap": 0.07555\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"range: 0 - 1.")))))}s.isMDXComponent=!0},70163:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/amino-acid-conservation-json",id:"version-3.18/data-sources/amino-acid-conservation-json",title:"amino-acid-conservation-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/amino-acid-conservation-json.md",sourceDirName:"data-sources",slug:"/data-sources/amino-acid-conservation-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/amino-acid-conservation-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/amino-acid-conservation-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"aminoAcidConservation": {\n "scores": [0.34]\n} \n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"aminoAcidConservation"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scores"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array of doubles"),(0,r.kt)("td",{parentName:"tr",align:"left"},"percent conserved with respect to human amino acid residue. Range: 0.01 - 1.00")))))}s.isMDXComponent=!0},67769:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clingen-dosage-json",id:"version-3.18/data-sources/clingen-dosage-json",title:"clingen-dosage-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/clingen-dosage-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-dosage-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/clingen-dosage-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/clingen-dosage-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingenDosageSensitivityMap": [{\n "chromosome": "15",\n "begin": 30900686,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 0.33994\n},\n{\n "chromosome": "15",\n "begin": 31727418,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "dosage sensitivity unlikely",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 1\n}]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingenDosageSensitivityMap"),(0,r.kt)("td",{parentName:"tr",align:null},"object array"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"haploinsufficiency"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"triplosensitivity"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"(same as haploinsufficiency)\xa0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"haploinsufficiency and triplosensitivity")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no evidence to suggest that dosage sensitivity is associated with clinical phenotype"),(0,r.kt)("li",{parentName:"ul"},"little evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,r.kt)("li",{parentName:"ul"},"emerging evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,r.kt)("li",{parentName:"ul"},"sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,r.kt)("li",{parentName:"ul"},"gene associated with autosomal recessive phenotype"),(0,r.kt)("li",{parentName:"ul"},"dosage sensitivity unlikely")))}s.isMDXComponent=!0},95733:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clingen-gene-validity-json",id:"version-3.18/data-sources/clingen-gene-validity-json",title:"clingen-gene-validity-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/clingen-gene-validity-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-gene-validity-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/clingen-gene-validity-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/clingen-gene-validity-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingenGeneValidity":[\n {\n "diseaseId":"MONDO_0007893",\n "disease":"Noonan syndrome with multiple lentigines",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n },\n {\n "diseaseId":"MONDO_0015280",\n "disease":"cardiofaciocutaneous syndrome",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingenGeneValidity"),(0,r.kt)("td",{parentName:"tr",align:null},"object"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"diseaseId"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Monarch Disease Ontology ID (MONDO)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"disease"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"disease label")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"classification"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see below for possible values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"classificationDate"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"yyyy-MM-dd")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"classification")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no reported evidence"),(0,r.kt)("li",{parentName:"ul"},"disputed"),(0,r.kt)("li",{parentName:"ul"},"limited"),(0,r.kt)("li",{parentName:"ul"},"moderate"),(0,r.kt)("li",{parentName:"ul"},"definitive"),(0,r.kt)("li",{parentName:"ul"},"strong"),(0,r.kt)("li",{parentName:"ul"},"refuted"),(0,r.kt)("li",{parentName:"ul"},"no known disease relationship")))}s.isMDXComponent=!0},96177:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clingen-json",id:"version-3.18/data-sources/clingen-json",title:"clingen-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/clingen-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/clingen-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/clingen-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingen":[\n {\n "chromosome":"17",\n "begin":525,\n "end":14667519,\n "variantType":"copy_number_gain",\n "id":"nsv996083",\n "clinicalInterpretation":"pathogenic",\n "observedGains":1,\n "validated":true,\n "phenotypes":[\n "Intrauterine growth retardation"\n ],\n "phenotypeIds":[\n "HP:0001511",\n "MedGen:C1853481"\n ],\n "reciprocalOverlap":0.00131\n },\n {\n "chromosome":"17",\n "begin":45835,\n "end":7600330,\n "variantType":"copy_number_loss",\n "id":"nsv869419",\n "clinicalInterpretation":"pathogenic",\n "observedLosses":1,\n "validated":true,\n "phenotypes":[\n "Developmental delay AND/OR other significant developmental or morphological phenotypes"\n ],\n "reciprocalOverlap":0.00254\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingen"),(0,r.kt)("td",{parentName:"tr",align:null},"object array"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Any of the\xa0sequence alterations defined here.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Identifier from the data source. Alternatively a VID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clinicalInterpretation"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"observedGains"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,r.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"observedLosses"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,r.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"validated"),(0,r.kt)("td",{parentName:"tr",align:null},"boolean"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:null},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"Description of the phenotype.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"phenotypeIds"),(0,r.kt)("td",{parentName:"tr",align:null},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"Description of the phenotype IDs.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"clinicalInterpretation")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"curated benign"),(0,r.kt)("li",{parentName:"ul"},"curated pathogenic"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"path gain"),(0,r.kt)("li",{parentName:"ul"},"path loss"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"uncertain")))}s.isMDXComponent=!0},41384:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clinvar-json",id:"version-3.18/data-sources/clinvar-json",title:"clinvar-json",description:"small variants:",source:"@site/versioned_docs/version-3.18/data-sources/clinvar-json.md",sourceDirName:"data-sources",slug:"/data-sources/clinvar-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/clinvar-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/clinvar-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"small variants:")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "id":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "significance":[\n "benign"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "lastUpdatedDate":"2020-03-01",\n "isAlleleSpecific":true\n },\n {\n "id":"RCV000030258.4",\n "variationId":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "alleleOrigins":[\n "germline"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "phenotypes":[\n "Lynch syndrome"\n ],\n "medGenIds":[\n "C1333990"\n ],\n "omimIds":[\n "120435"\n ],\n "significance":[\n "benign"\n ],\n "lastUpdatedDate":"2017-05-01",\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"large variants:")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "chromosome":"1", \n "begin":629025, \n "end":8537745, \n "variantType":"copy_number_loss", \n "id":"RCV000051993.4", \n "variationId":"VCV000058242.1", \n "reviewStatus":"criteria provided, single submitter", \n "alleleOrigins":[\n "not provided"\n ], \n "phenotypes":[\n "See cases"\n ], \n "significance":[\n "pathogenic"\n ], \n "lastUpdatedDate":"2022-04-21", \n "pubMedIds":[\n "21844811"\n ]\n },\n {\n "id":"VCV000058242.1",\n "reviewStatus":"criteria provided, single submitter",\n "significance":[\n "pathogenic"\n ],\n "lastUpdatedDate":"2022-04-21"\n },\n ......\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variationId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar VCV ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"variant type")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"reviewStatus"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"alleleOrigins"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"medGenIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MedGen IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"omimIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"orphanetIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Orphanet IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"significance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"lastUpdatedDate"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the ClinVar alternate allele")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"reviewStatus:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no assertion provided"),(0,r.kt)("li",{parentName:"ul"},"no assertion criteria provided"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, single submitter"),(0,r.kt)("li",{parentName:"ul"},"practice guideline"),(0,r.kt)("li",{parentName:"ul"},"classified by multiple submitters"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, conflicting interpretations"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, multiple submitters, no conflicts"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"alleleOrigins:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"unknown"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"germline"),(0,r.kt)("li",{parentName:"ul"},"somatic"),(0,r.kt)("li",{parentName:"ul"},"inherited"),(0,r.kt)("li",{parentName:"ul"},"paternal"),(0,r.kt)("li",{parentName:"ul"},"maternal"),(0,r.kt)("li",{parentName:"ul"},"de-novo"),(0,r.kt)("li",{parentName:"ul"},"biparental"),(0,r.kt)("li",{parentName:"ul"},"uniparental"),(0,r.kt)("li",{parentName:"ul"},"not-tested"),(0,r.kt)("li",{parentName:"ul"},"tested-inconclusive")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"significance:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"uncertain significance"),(0,r.kt)("li",{parentName:"ul"},"not provided"),(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"drug response"),(0,r.kt)("li",{parentName:"ul"},"histocompatibility"),(0,r.kt)("li",{parentName:"ul"},"association"),(0,r.kt)("li",{parentName:"ul"},"risk factor"),(0,r.kt)("li",{parentName:"ul"},"protective"),(0,r.kt)("li",{parentName:"ul"},"affects"),(0,r.kt)("li",{parentName:"ul"},"conflicting data from submitters"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant"),(0,r.kt)("li",{parentName:"ul"},"conflicting interpretations of pathogenicity")))}s.isMDXComponent=!0},11030:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/dann-json",id:"version-3.18/data-sources/dann-json",title:"dann-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/dann-json.md",sourceDirName:"data-sources",slug:"/data-sources/dann-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/dann-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/dann-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"dannScore": 0.27\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"dannScore"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1.0")))))}s.isMDXComponent=!0},54909:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/dbsnp-json",id:"version-3.18/data-sources/dbsnp-json",title:"dbsnp-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/dbsnp-json.md",sourceDirName:"data-sources",slug:"/data-sources/dbsnp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/dbsnp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/dbsnp-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"dbsnp":[\n "rs1042821"\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"dbsnp"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"dbSNP rsIDs")))))}s.isMDXComponent=!0},22529:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/decipher-json",id:"version-3.18/data-sources/decipher-json",title:"decipher-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/decipher-json.md",sourceDirName:"data-sources",slug:"/data-sources/decipher-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/decipher-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/decipher-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"decipher":[\n {\n "chromosome":"1",\n "begin":13516,\n "end":91073,\n "numDeletions":27,\n "deletionFrequency":0.675,\n "numDuplications":27,\n "duplicationFrequency":0.675,\n "sampleSize":40,\n "reciprocalOverlap": 0.27555,\n "annotationOverlap": 0.5901\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"numDeletions"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"# of observed deletions")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"deletionFrequency"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"deletion frequency")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"numDuplications"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"# of observed duplications")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"duplicationFrequency"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"duplication frequency")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sampleSize"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"total # of samples")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% annotation overlap")))))}s.isMDXComponent=!0},61201:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gerp-json",id:"version-3.18/data-sources/gerp-json",title:"gerp-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/gerp-json.md",sourceDirName:"data-sources",slug:"/data-sources/gerp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gerp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/gerp-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gerpScore": 1.27\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"gerpScore"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: -\u221e to +\u221e")))))}s.isMDXComponent=!0},97398:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gme-json",id:"version-3.18/data-sources/gme-json",title:"gme-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/gme-json.md",sourceDirName:"data-sources",slug:"/data-sources/gme-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gme-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/gme-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gmeVariome":{\n "allAc":10,\n "allAn":202,\n "allAf":0.049504,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"GME allele count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"GME allele number")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"GME allele frequency")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}s.isMDXComponent=!0},77953:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gnomad-lof-json",id:"version-3.18/data-sources/gnomad-lof-json",title:"gnomad-lof-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/gnomad-lof-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-lof-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gnomad-lof-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/gnomad-lof-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD":{ \n "pLi":1.00e0,\n "pNull":8.94e-40,\n "pRec":1.84e-16,\n "synZ":-8.44e-2,\n "misZ":5.96e-1,\n "loeuf":1.13e0\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"pLi"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"pNull"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"pRec"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"synZ"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"misZ"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))))}s.isMDXComponent=!0},42182:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gnomad-small-variants-json",id:"version-3.18/data-sources/gnomad-small-variants-json",title:"gnomad-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/gnomad-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gnomad-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/gnomad-small-variants-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomad":{ \n "coverage":20,\n "allAf":0.190317,\n "maleAf":0.193,\n "femaleAf": 0.1935,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "maleAn":15096,\n "femaleAn":15700\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "maleAc":2930,\n "femaleAc": 2931,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "maleHc":280,\n "femaleHc":281,\n "controlsAllAf":0.190317,\n "controlsAllAn":30796,\n "controlsAllAc":5861,\n "lowComplexityRegion":true,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"coverage"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"controlsAllAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the controls subset. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for male population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for female population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"controlsAllAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the controls subset. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for male population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for female population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"controlsAllAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the controls subset. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the South Asian population Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the South Asian population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the South Asian population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"lowComplexityRegion"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant is located in a low complexity region.")))))}s.isMDXComponent=!0},40853:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gnomad-structural-variants-json",id:"version-3.18/data-sources/gnomad-structural-variants-json",title:"gnomad-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/gnomad-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gnomad-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/gnomad-structural-variants-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD-preview": [\n {\n "chromosome": "1",\n "begin": 40001,\n "end": 47200,\n "variantId": "gnomAD-SV_v2.1_DUP_1_1",\n "variantType": "duplication",\n "failedFilter": true,\n "allAf": 0.068963,\n "afrAf": 0.135694,\n "amrAf": 0.022876,\n "easAf": 0.01101,\n "eurAf": 0.007846,\n "othAf": 0.017544,\n "femaleAf": 0.065288,\n "maleAf": 0.07255,\n "allAc": 943,\n "afrAc": 866,\n "amrAc": 21,\n "easAc": 17,\n "eurAc": 37,\n "othAc": 2,\n "femaleAc": 442,\n "maleAc": 499,\n "allAn": 13674,\n "afrAn": 6382,\n "amrAn": 918,\n "easAn": 1544,\n "eurAn": 4716,\n "othAn": 114,\n "femaleAn": 6770,\n "maleAn": 6878,\n "allHc": 91,\n "afrHc": 90,\n "amrHc": 1,\n "easHc": 0,\n "eurHc": 0,\n "othHc": 55,\n "femaleHc": 44,\n "maleHc": 47,\n "reciprocalOverlap": 0.01839,\n "annotationOverlap": 0.16667\n }\n]\n\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"chromosome number")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"position interval start")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"position internal end")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"structural variant type")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantId"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"gnomAD ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all other populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the African super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Ad Mixed American super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the European super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all other populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for male population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for female population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the African super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Ad Mixed American super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the European super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all other populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for female population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for male population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the African / African American population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Latino population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the East Asian population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the European super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all other populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"boolean"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Reciprocal overlap. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Reciprocal overlap. Range: 0 - 1.0")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Note:")," Following fields are not available in ",(0,r.kt)("em",{parentName:"p"},"GRCh38")," because the source file does not contain this information:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAn")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAn")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter")))))}s.isMDXComponent=!0},91471:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/mitomap-small-variants-json",id:"version-3.18/data-sources/mitomap-small-variants-json",title:"mitomap-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/mitomap-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/mitomap-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/mitomap-small-variants-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "refAllele":"G",\n "altAllele":"A",\n "diseases":[ \n "Bipolar disorder",\n "Melanoma"\n ],\n "hasHomoplasmy":false,\n "hasHeteroplasmy":true,\n "status":"Reported",\n "clinicalSignificance":"confirmed pathogenic",\n "scorePercentile":83.30,\n "numGenBankFullLengthSeqs":2,\n "pubMedIds":["2316527","6299878","6301949"],\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"diseases"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"associated diseases")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHomoplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHeteroplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"status"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"record status")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"clinicalSignificance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"predicted pathogenicity")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MitoTIP score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numGenBankFullLengthSeqs"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"# of GenBank full-length sequences")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the MITOMAP alternate allele")))))}s.isMDXComponent=!0},81167:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/mitomap-structural-variants-json",id:"version-3.18/data-sources/mitomap-structural-variants-json",title:"mitomap-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/mitomap-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/mitomap-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/mitomap-structural-variants-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "chromosome":"MT",\n "begin":3166,\n "end":14152,\n "variantType":"deletion",\n "reciprocalOverlap":0.18068,\n "annotationOverlap":0.42405\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"end"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")))))}s.isMDXComponent=!0},43056:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/omim-json",id:"version-3.18/data-sources/omim-json",title:"omim-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/omim-json.md",sourceDirName:"data-sources",slug:"/data-sources/omim-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/omim-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/omim-json.md",tags:[],version:"3.18",frontMatter:{}},p=[{value:"Phenotype",id:"phenotype",children:[],level:4},{value:"Mapping",id:"mapping",children:[],level:4},{value:"Inheritance",id:"inheritance",children:[],level:4},{value:"Comments",id:"comments",children:[],level:4}],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"omim":[ \n { \n "mimNumber":600678,\n "geneName":"MutS, E. coli, homolog of, 6",\n "description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",\n "phenotypes":[ \n { \n "mimNumber":614350,\n "phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",\n "description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal dominant"\n ]\n },\n { \n "mimNumber":608089,\n "phenotype":"Endometrial cancer, familial",\n "mapping":"molecular basis of the disorder is known"\n },\n { \n "mimNumber":276300,\n "phenotype":"Mismatch repair cancer syndrome",\n "description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal recessive"\n ],\n "comments" : [\n "contribute to susceptibility to multifactorial disorders or to susceptibility to infection",\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM ID for gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneName"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene name")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"left"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#phenotype"},"Phenotype entry below"))))),(0,r.kt)("h4",{id:"phenotype"},"Phenotype"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mapping"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#mapping"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"inheritance"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#inheritance"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"comments"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#comments"},"possible values below"))))),(0,r.kt)("h4",{id:"mapping"},"Mapping"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"disorder was positioned by mapping of the wild type gene"),(0,r.kt)("li",{parentName:"ol"},"disease phenotype itself was mapped"),(0,r.kt)("li",{parentName:"ol"},"molecular basis of the disorder is known"),(0,r.kt)("li",{parentName:"ol"},"disorder is a chromosome deletion or duplication syndrome")),(0,r.kt)("h4",{id:"inheritance"},"Inheritance"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"autosomal recessive"),(0,r.kt)("li",{parentName:"ul"},"autosomal dominant")),(0,r.kt)("h4",{id:"comments"},"Comments"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"contributes to the susceptibility to multifactorial disorders"),(0,r.kt)("li",{parentName:"ul"},"variations that lead to apparently abnormal laboratory test values"),(0,r.kt)("li",{parentName:"ul"},"unconfirmed mapping")))}s.isMDXComponent=!0},20837:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/primate-ai-json",id:"version-3.18/data-sources/primate-ai-json",title:"primate-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/primate-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/primate-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/primate-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/primate-ai-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI":[\n {\n "hgnc":"TP53",\n "scorePercentile":0.3,\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))))}s.isMDXComponent=!0},30935:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/revel-json",id:"version-3.18/data-sources/revel-json",title:"revel-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/revel-json.md",sourceDirName:"data-sources",slug:"/data-sources/revel-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/revel-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/revel-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"revel":{ \n "score":0.027\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"score"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1.0")))))}s.isMDXComponent=!0},43247:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/splice-ai-json",id:"version-3.18/data-sources/splice-ai-json",title:"splice-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/splice-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/splice-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/splice-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/splice-ai-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"spliceAI":[ \n {\n "hgnc":"BLCAP",\n "acceptorGainDistance":-3,\n "acceptorGainScore":0.3,\n "donorLossDistance":7,\n "donorLossScore":0.9\n },\n { \n "hgnc":"NNAT",\n "acceptorGainDistance":-1,\n "acceptorGainScore":0.2,\n "donorGainDistance":-2,\n "donorGainScore":0.3\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")))))}s.isMDXComponent=!0},89201:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/topmed-json",id:"version-3.18/data-sources/topmed-json",title:"topmed-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/topmed-json.md",sourceDirName:"data-sources",slug:"/data-sources/topmed-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/topmed-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/topmed-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"topmed":{ \n "allAc":20,\n "allAn":125568,\n "allAf":0.000159,\n "allHc":0,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele number. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele frequency (computed by Nirvana)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed homozygous count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}s.isMDXComponent=!0},85982:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>M,default:()=>F,frontMatter:()=>T,metadata:()=>C,toc:()=>S});var n=a(87462),r=(a(67294),a(3905)),l=a(70163),i=a(41384),o=a(96177),p=a(67769),d=a(95733),m=a(54909),s=a(20837),u=a(30935),g=a(11030),c=a(61201),k=a(43247),N=a(91471),f=a(81167),y=a(42182),h=a(77953),b=a(24029),v=a(17656),A=a(43056),j=a(89201),D=a(40853),w=a(97398),I=a(22529);const T={title:"Nirvana JSON File Format"},M=void 0,C={unversionedId:"file-formats/nirvana-json-file-format",id:"version-3.18/file-formats/nirvana-json-file-format",title:"Nirvana JSON File Format",description:"Overview",source:"@site/versioned_docs/version-3.18/file-formats/nirvana-json-file-format.mdx",sourceDirName:"file-formats",slug:"/file-formats/nirvana-json-file-format",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/file-formats/nirvana-json-file-format",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/file-formats/nirvana-json-file-format.mdx",tags:[],version:"3.18",frontMatter:{title:"Nirvana JSON File Format"},sidebar:"docs",previous:{title:"TOPMed",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/topmed"},next:{title:"Custom Annotations",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/file-formats/custom-annotations"}},S=[{value:"Overview",id:"overview",children:[{value:"Conventions",id:"conventions",children:[],level:3},{value:"JSON Layout",id:"json-layout",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Header",id:"header",children:[{value:"Data Source",id:"data-source",children:[],level:4},{value:"Genome Assemblies",id:"genome-assemblies",children:[],level:4}],level:2},{value:"Positions",id:"positions",children:[{value:"ClinGen",id:"clingen",children:[],level:3},{value:"1000 Genomes (SV)",id:"1000-genomes-sv",children:[],level:3},{value:"gnomAD (SV)",id:"gnomad-sv",children:[],level:3},{value:"MITOMAP (SV)",id:"mitomap-sv",children:[],level:3}],level:2},{value:"Samples",id:"samples",children:[],level:2},{value:"Variants",id:"variants",children:[{value:"Transcripts",id:"transcripts",children:[{value:"PolyPhen",id:"polyphen",children:[],level:4},{value:"SIFT",id:"sift",children:[],level:4},{value:"Amino Acid Conservation",id:"amino-acid-conservation",children:[],level:4},{value:"Gene Fusions",id:"gene-fusions",children:[],level:4},{value:"Fusion",id:"fusion",children:[],level:4}],level:3},{value:"Regulatory Regions",id:"regulatory-regions",children:[{value:"Regulatory Types",id:"regulatory-types",children:[],level:4},{value:"Regulatory Consequences",id:"regulatory-consequences",children:[],level:4}],level:3},{value:"ClinVar",id:"clinvar",children:[],level:3},{value:"1000 Genomes",id:"1000-genomes",children:[],level:3},{value:"DANN",id:"dann",children:[],level:3},{value:"dbSNP",id:"dbsnp",children:[],level:3},{value:"DECIPHER",id:"decipher",children:[],level:3},{value:"GERP",id:"gerp",children:[],level:3},{value:"GME Variome",id:"gme-variome",children:[],level:3},{value:"gnomAD",id:"gnomad",children:[],level:3},{value:"MITOMAP",id:"mitomap",children:[],level:3},{value:"Primate AI",id:"primate-ai",children:[],level:3},{value:"REVEL",id:"revel",children:[],level:3},{value:"Splice AI",id:"splice-ai",children:[],level:3},{value:"TOPMed",id:"topmed",children:[],level:3}],level:2},{value:"Genes",id:"genes",children:[{value:"OMIM",id:"omim",children:[],level:3},{value:"gnomAD LoF Gene Metrics",id:"gnomad-lof-gene-metrics",children:[],level:3},{value:"ClinGen Disease Validity",id:"clingen-disease-validity",children:[],level:3}],level:2}],R={toc:S},O="wrapper";function F(t){let{components:e,...T}=t;return(0,r.kt)(O,(0,n.Z)({},R,T,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("h3",{id:"conventions"},"Conventions"),(0,r.kt)("p",null,"In the Nirvana JSON representation, we try to maximize the amount of useful information that is relayed in the output file. As such, we have several conventions that are useful to know about:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"With boolean key/value pairs, we only output the keys that have a true value. I.e. there's no reason to display ",(0,r.kt)("inlineCode",{parentName:"li"},'"isStructuralVariant":false')," a few million times when annotating a small variant VCF."),(0,r.kt)("li",{parentName:"ul"},"When transferring data from the VCF file to the JSON (e.g. for allele depths (AD)), it is common to use a period (.) as a placeholder for missing data in the VCF file. Nirvana treats periods like empty or null strings and therefore will not output those entries.")),(0,r.kt)("h3",{id:"json-layout"},"JSON Layout"),(0,r.kt)("p",null,(0,r.kt)("img",{src:a(10886).Z})),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"In general, each position corresponds to a row in the original VCF file."),(0,r.kt)("p",{parentName:"div"},"For each gene that was referenced in the transcripts found in the positions section, there will be additional gene-level annotation in the gene section."))),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"We've put together a ",(0,r.kt)("a",{parentName:"p",href:"../introduction/parsing-json"},"new section that discusses how to parse our JSON files")," easily using examples in a ",(0,r.kt)("a",{parentName:"p",href:"https://github.com/Illumina/NirvanaDocumentation/blob/master/static/files/parse-nirvana-json-python.ipynb"},"Python Jupyter notebook")," and a ",(0,r.kt)("a",{parentName:"p",href:"https://github.com/Illumina/NirvanaDocumentation/blob/master/static/files/parse-nirvana-json-r.ipynb"},"R version")," as well. In addition, we have information about how to quickly dump content from our JSON file using a tabix-like utility called JASIX."))),(0,r.kt)("h2",{id:"header"},"Header"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'{\n "header":{\n "annotator":"Nirvana 3.0.0-alpha.5+g6c52e247",\n "creationTime":"2017-06-14 15:53:13",\n "genomeAssembly":"GRCh37",\n "dataSources":[\n {\n "name":"OMIM",\n "version":"unknown",\n "description":"An Online Catalog of Human Genes and Genetic Disorders",\n "releaseDate":"2017-05-03"\n },\n {\n "name":"VEP",\n "version":"84",\n "description":"BothRefSeqAndEnsembl",\n "releaseDate":"2017-01-16"\n },\n {\n "name":"ClinVar",\n "version":"20170503",\n "description":"A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence",\n "releaseDate":"2017-05-03"\n },\n {\n "name":"phyloP",\n "version":"hg19",\n "description":"46 way conservation score between humans and 45 other vertebrates",\n "releaseDate":"2009-11-10"\n }\n ],\n "samples":[\n "NA12878",\n "NA12891",\n "NA12892"\n ]\n },\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"annotator"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"the name of the annotator and the current version")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"creationTime"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd hh:mm:ss")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genomeAssembly"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#genome-assemblies"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"schemaVersion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"incremented whenever the core structure of the JSON file introduces breaking changes")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"dataVersion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"dataSources"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#data-source"},"Data Source entry below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"samples"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"the order of these sample names will be used throughout the JSON file when enumerating samples")))),(0,r.kt)("h4",{id:"data-source"},"Data Source"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"name"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"version"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"optional description of the data source")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"releaseDate"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")))),(0,r.kt)("h4",{id:"genome-assemblies"},"Genome Assemblies"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"GRCh37"),(0,r.kt)("li",{parentName:"ul"},"GRCh38"),(0,r.kt)("li",{parentName:"ul"},"hg19"),(0,r.kt)("li",{parentName:"ul"},"SARSCoV2")),(0,r.kt)("h2",{id:"positions"},"Positions"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"positions":[\n {\n "chromosome":"chr2",\n "position":48010488,\n "repeatUnit":"GGCCCC",\n "refRepeatCount":3,\n "svEnd":48020488,\n "refAllele":"G",\n "altAlleles":[\n "A",\n "GT"\n ],\n "quality":461,\n "filters":[\n "PASS"\n ],\n "ciPos":[\n -170,\n 170\n ],\n "ciEnd":[\n -175,\n 175\n ],\n "svLength":1000,\n "strandBias":1.23,\n "jointSomaticNormalQuality":29,\n "cytogeneticBand":"2p16.3",\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Variant Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"position"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf (1-based notation). Range: 1 - 250 million")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"repeatUnit"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"STR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by ExpansionHunter")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refRepeatCount"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"STR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by ExpansionHunter")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"svEnd"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"quality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf (Normally an integer, but some variant callers using floating point. Has been observed as high as 500k)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"filters"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ciPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ciEnd"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"svLength"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"strandBias"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"small variant"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by GATK (from SB)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"jointSomaticNormalQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by the Manta variant caller (SOMATICSCORE)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cytogeneticBand"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"e.g. 17p13.1")))),(0,r.kt)("h3",{id:"clingen"},"ClinGen"),(0,r.kt)(o.default,{mdxType:"ClinGen"}),(0,r.kt)(p.default,{mdxType:"ClinGenDosage"}),(0,r.kt)("h3",{id:"1000-genomes-sv"},"1000 Genomes (SV)"),(0,r.kt)(v.default,{mdxType:"ThousandGenomesSV"}),(0,r.kt)("h3",{id:"gnomad-sv"},"gnomAD (SV)"),(0,r.kt)(D.default,{mdxType:"GnomadSV"}),(0,r.kt)("h3",{id:"mitomap-sv"},"MITOMAP (SV)"),(0,r.kt)(f.default,{mdxType:"MitoMapSV"}),(0,r.kt)("h2",{id:"samples"},"Samples"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"samples":[\n {\n "genotype":"0/1",\n "variantFrequencies":[\n 0.333,\n 0.5\n ],\n "totalDepth":57,\n "genotypeQuality":12,\n "copyNumber":3,\n "repeatUnitCounts":[\n 10,\n 20\n ],\n "alleleDepths":[\n 10,\n 20,\n 30\n ],\n "failedFilter":true,\n "splitReadCounts":[\n 10,\n 20\n ],\n "pairedEndReadCounts":[\n 10,\n 20\n ],\n "isDeNovo":true,\n "diseaseAffectedStatuses":[\n "-"\n ],\n "artifactAdjustedQualityScore":89.3,\n "likelihoodRatioQualityScore":78.2,\n "heteroplasmyPercentile":[\n 23.13,\n 12.65\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"center"},"VCF"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genotype"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"GT"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantFrequencies"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"VF, AD"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. One value per alternate allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"totalDepth"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"DP"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genotypeQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"GQ"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values. Typically maxes out at 99")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"copyNumber"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"CN"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"minorHaplotypeCopyNumber"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"MCN"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"repeatUnitCounts"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"REPCN"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ExpansionHunter-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"alleleDepths"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AD"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"center"},"FT"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"splitReadCounts"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Manta-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pairedEndReadCounts"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"PR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Manta-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isDeNovo"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"center"},"DN"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"deNovoQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"DQ"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"diseaseAffectedStatuses"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"DST"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ExpansionHunter-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"artifactAdjustedQualityScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AQ"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PEPE-specific. Range: 0 - 100.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"likelihoodRatioQualityScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"LQ"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PEPE-specific. Range: 0 - 100.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"lossOfHeterozygosity"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"center"},"CN, MCN"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"somaticQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SQ"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"heteroplasmyPercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"VF"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 100. 2 decimal places. One value per alternate allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"binCount"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"BC"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Empty Samples")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"If a sample does not contain any entries, we will create a sample object that contains the ",(0,r.kt)("inlineCode",{parentName:"p"},"isEmpty")," key. This ensures that sample ordering is preserved while indicating that a sample is intentionally empty."),(0,r.kt)("pre",{parentName:"div"},(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"samples":[\n {\n "isEmpty":true\n }\n],\n')))),(0,r.kt)("h2",{id:"variants"},"Variants"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"variants":[\n {\n "vid":"2:48010488:A",\n "chromosome":"chr2",\n "begin":48010488,\n "end":48010488,\n "isReferenceMinorAllele":true,\n "isStructuralVariant":true,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "isDecomposedVariant":true,\n "isRecomposedVariant":true,\n "linkedVids":["2:48010488:GTA:ATC"],\n "hgvsg":"NC_000002.11:g.48010488G>A",\n "phylopScore":0.459\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"vid"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"../core-functionality/variant-ids"},"Variant Identifiers"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"1-based non-negative integer values. Range: 1 - 250 million")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"end"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"1-based non-negative integer values. Range: 1 - 250 million")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isReferenceMinorAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this is a reference minor allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isStructuralVariant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant is a structural variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"inLowComplexityRegion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant lies in a low complexity region (gnomAD low complexity regions)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"parsimonious representation of the reference allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"parsimonious representation of the alternate allele.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"uses\xa0",(0,r.kt)("a",{parentName:"td",href:"http://www.sequenceontology.org/browser/current_svn/term/SO:0001059"},"Sequence Ontology sequence alterations"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isDecomposedVariant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the decomposed variant has been used to create another recomposed variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isRecomposedVariant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant is recomposed from two or more decomposed variants")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"linkedVids"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"list of ",(0,r.kt)("a",{parentName:"td",href:"../core-functionality/variant-ids"},"VIDs")," for variants connecting decomposed and recomposed variants")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsg"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS g. notation")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phylopScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"phyloP conservation score. Range: -14.08 to 6.424")))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Reference Minor Alleles")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Nirvana supports annotating reference minor alleles. In such a case, ",(0,r.kt)("inlineCode",{parentName:"p"},"refAllele")," will be replaced by the global major allele and ",(0,r.kt)("inlineCode",{parentName:"p"},"altAllele")," will be replaced with the original reference allele."))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Flagging Decomposed & Recomposed Variants")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"When two or more decomposed variants are recomposed into an MNV, the decomposed variants will be marked with ",(0,r.kt)("inlineCode",{parentName:"p"},'"isDecomposedVariant":true'),"."),(0,r.kt)("p",{parentName:"div"},"Similarly, the recomposed variant will be shown as a new VCF position. This recomposed variant will be flagged with ",(0,r.kt)("inlineCode",{parentName:"p"},'"isRecomposedVariant":true'),"."))),(0,r.kt)("h3",{id:"transcripts"},"Transcripts"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"transcripts":[\n {\n "transcript":"ENST00000445503.1",\n "source":"Ensembl",\n "bioType":"nonsense_mediated_decay",\n "codons":"gGg/gAg",\n "aminoAcids":"G/E",\n "cdnaPos":"268",\n "cdsPos":"116",\n "exons":"1/9",\n "introns":"1/8",\n "proteinPos":"39",\n "geneId":"ENSG00000116062",\n "hgnc":"MSH6",\n "consequence":[\n "missense_variant",\n "NMD_transcript_variant"\n ],\n "hgvsc":"ENST00000445503.1:c.116G>A",\n "hgvsp":"ENSP00000405294.1:p.(Gly39Glu)",\n "geneFusion":{\n "exon":6,\n "intron":5,\n "fusions":[\n {\n "hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000437180.1}:c.58+568_1443",\n "exon":3,\n "intron":2\n },\n {\n "hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000300305.3}:c.58+568_1443",\n "exon":2,\n "intron":1\n }\n ]\n },\n "isCanonical":true,\n "polyPhenScore":0.95,\n "polyPhenPrediction":"probably damaging",\n "proteinId":"ENSP00000405294.1",\n "siftScore":0.61,\n "siftPrediction":"tolerated",\n "completeOverlap":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"transcript"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"transcript ID. e.g. ENST00000445503.1")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"source"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"RefSeq / Ensembl")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"bioType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"descriptions of the ",(0,r.kt)("a",{parentName:"td",href:"https://uswest.ensembl.org/info/genome/genebuild/biotypes.html"},"biotypes from Ensembl"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"codons"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"aminoAcids"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cdnaPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cdsPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"exons"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exons affected by the variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"introns"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"introns affected by the variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"proteinPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene ID. e.g. ENSG00000116062")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"consequence"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://www.sequenceontology.org/browser/obob.cgi"},"Sequence Ontology Consequences"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS coding nomenclature")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsp"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS protein nomenclature")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneFusion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#gene-fusions"},"Gene Fusions entry below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isCanonical"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this is a canonical transcript")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"polyPhenScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"polyPhenPrediction"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#polyphen"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"proteinId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"protein ID. E.g. ENSP00000405294.1")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"siftScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"siftPrediction"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#sift"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"completeOverlap"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this transcript is completely overlapped by the variant")))),(0,r.kt)("h4",{id:"polyphen"},"PolyPhen"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"probably damaging"),(0,r.kt)("li",{parentName:"ul"},"possibly damaging"),(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"unknown")),(0,r.kt)("h4",{id:"sift"},"SIFT"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"tolerated"),(0,r.kt)("li",{parentName:"ul"},"deleterious"),(0,r.kt)("li",{parentName:"ul"},"tolerated - low confidence"),(0,r.kt)("li",{parentName:"ul"},"deleterious - low confidence")),(0,r.kt)("h4",{id:"amino-acid-conservation"},"Amino Acid Conservation"),(0,r.kt)(l.default,{mdxType:"AminoAcidConservation"}),(0,r.kt)("h4",{id:"gene-fusions"},"Gene Fusions"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"exon"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual exon where the breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"intron"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual intron where the breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"fusions"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#fusion"},"Fusion entry below"))))),(0,r.kt)("h4",{id:"fusion"},"Fusion"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"exon"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual exon where the other breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"intron"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual intron where the other breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS coding nomenclature describing the two genes and the transcripts that are fused along with")))),(0,r.kt)("h3",{id:"regulatory-regions"},"Regulatory Regions"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"regulatoryRegions":[\n {\n "id":"ENSR00001542175",\n "type":"promoter",\n "consequence":[\n "regulatory_region_variant"\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"type"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see ",(0,r.kt)("a",{parentName:"td",href:"#regulatory-types"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"consequence"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"see ",(0,r.kt)("a",{parentName:"td",href:"#regulatory-consequences"},"possible values below"))))),(0,r.kt)("h4",{id:"regulatory-types"},"Regulatory Types"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"CTCF_binding_site"),(0,r.kt)("li",{parentName:"ul"},"enhancer"),(0,r.kt)("li",{parentName:"ul"},"open_chromatin_region"),(0,r.kt)("li",{parentName:"ul"},"promoter"),(0,r.kt)("li",{parentName:"ul"},"promoter_flanking_region"),(0,r.kt)("li",{parentName:"ul"},"TF_binding_site")),(0,r.kt)("h4",{id:"regulatory-consequences"},"Regulatory Consequences"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"regulatory_region_variant"),(0,r.kt)("li",{parentName:"ul"},"regulatory_region_ablation"),(0,r.kt)("li",{parentName:"ul"},"regulatory_region_amplification"),(0,r.kt)("li",{parentName:"ul"},"regulatory_region_truncation")),(0,r.kt)("h3",{id:"clinvar"},"ClinVar"),(0,r.kt)(i.default,{mdxType:"ClinVar"}),(0,r.kt)("h3",{id:"1000-genomes"},"1000 Genomes"),(0,r.kt)(b.default,{mdxType:"ThousandGenomesSmall"}),(0,r.kt)("h3",{id:"dann"},"DANN"),(0,r.kt)(g.default,{mdxType:"DANN"}),(0,r.kt)("h3",{id:"dbsnp"},"dbSNP"),(0,r.kt)(m.default,{mdxType:"DbSNP"}),(0,r.kt)("h3",{id:"decipher"},"DECIPHER"),(0,r.kt)(I.default,{mdxType:"DECIPHER"}),(0,r.kt)("h3",{id:"gerp"},"GERP"),(0,r.kt)(c.default,{mdxType:"GERP"}),(0,r.kt)("h3",{id:"gme-variome"},"GME Variome"),(0,r.kt)(w.default,{mdxType:"GME"}),(0,r.kt)("h3",{id:"gnomad"},"gnomAD"),(0,r.kt)(y.default,{mdxType:"GnomadSmall"}),(0,r.kt)("h3",{id:"mitomap"},"MITOMAP"),(0,r.kt)(N.default,{mdxType:"MitoMapSmall"}),(0,r.kt)("h3",{id:"primate-ai"},"Primate AI"),(0,r.kt)(s.default,{mdxType:"PrimateAI"}),(0,r.kt)("h3",{id:"revel"},"REVEL"),(0,r.kt)(u.default,{mdxType:"REVEL"}),(0,r.kt)("h3",{id:"splice-ai"},"Splice AI"),(0,r.kt)(k.default,{mdxType:"SpliceAI"}),(0,r.kt)("h3",{id:"topmed"},"TOPMed"),(0,r.kt)(j.default,{mdxType:"TOPMed"}),(0,r.kt)("h2",{id:"genes"},"Genes"),(0,r.kt)("p",null,"Nirvana repots gene annotations for all genes that have an overlapping variant with the exception of flanking variants (i.e. variants that only cause upstream_gene_variant or downstream_gene_variant)."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"genes":[\n {\n "name":"MSH6",\n "hgncId":7329,\n "summary":"This gene encodes a member of the DNA mismatch repair MutS family. In E. coli, the MutS protein helps in the recognition of mismatched nucleotides prior to their repair. A highly conserved region of approximately 150 aa, called the Walker-A adenine nucleotide binding motif, exists in MutS homologs. The encoded protein heterodimerizes with MSH2 to form a mismatch recognition complex that functions as a bidirectional molecular switch that exchanges ADP and ATP as DNA mismatches are bound and dissociated. Mutations in this gene may be associated with hereditary nonpolyposis colon cancer, colorectal cancer, and endometrial cancer. Transcripts variants encoding different isoforms have been described. [provided by RefSeq, Jul 2013]",\n /* this is where gene-level data sources can be found e.g. OMIM */\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"name"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgncId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"summary"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"short description of the gene from ",(0,r.kt)("a",{parentName:"td",href:"https://www.omim.org/"},"OMIM"))))),(0,r.kt)("h3",{id:"omim"},"OMIM"),(0,r.kt)(A.default,{mdxType:"Omim"}),(0,r.kt)("h3",{id:"gnomad-lof-gene-metrics"},"gnomAD LoF Gene Metrics"),(0,r.kt)(h.default,{mdxType:"GnomadGeneLof"}),(0,r.kt)("h3",{id:"clingen-disease-validity"},"ClinGen Disease Validity"),(0,r.kt)(d.default,{mdxType:"ClinGenDiseaseValidity"}))}F.isMDXComponent=!0},10886:(t,e,a)=>{a.d(e,{Z:()=>n});const n=a.p+"assets/images/JSON-Layout-fc8e5c0cf4c8428981cd206fe9b6feac.svg"}}]); \ No newline at end of file diff --git a/assets/js/07bac56e.b99e9bbd.js b/assets/js/07bac56e.b99e9bbd.js new file mode 100644 index 00000000..ae411794 --- /dev/null +++ b/assets/js/07bac56e.b99e9bbd.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1342,8633],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>g});var a=n(7294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},d=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},p=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,d=l(e,["components","mdxType","originalType","parentName"]),u=c(n),p=r,g=u["".concat(s,".").concat(p)]||u[p]||m[p]||o;return n?a.createElement(g,i(i({ref:t},d),{},{components:n})):a.createElement(g,i({ref:t},d))}));function g(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=p;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[u]="string"==typeof e?e:r,i[1]=l;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(7462),r=(n(7294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/gme-json",id:"data-sources/gme-json",title:"gme-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gme-json.md",sourceDirName:"data-sources",slug:"/data-sources/gme-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gme-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gme-json.md",tags:[],version:"current",frontMatter:{}},s=[],c={toc:s},d="wrapper";function u(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gmeVariome":{\n "allAc":10,\n "allAn":202,\n "allAf":0.049504,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"GME allele count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"GME allele number")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"GME allele frequency")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}u.isMDXComponent=!0},2812:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>i,metadata:()=>s,toc:()=>c});var a=n(7462),r=(n(7294),n(3905)),o=n(8036);const i={title:"GME Variome"},l=void 0,s={unversionedId:"data-sources/gme",id:"data-sources/gme",title:"GME Variome",description:"Overview",source:"@site/docs/data-sources/gme.mdx",sourceDirName:"data-sources",slug:"/data-sources/gme",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gme",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gme.mdx",tags:[],version:"current",frontMatter:{title:"GME Variome"},sidebar:"docs",previous:{title:"GERP",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gerp"},next:{title:"gnomAD",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad"}},c=[{value:"Overview",id:"overview",children:[{value:"TSV Extraction",id:"tsv-extraction",children:[{value:"Parsing",id:"parsing",children:[],level:4}],level:3}],level:2},{value:"GRCh37 liftover",id:"grch37-liftover",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON output",id:"json-output",children:[],level:2}],d={toc:c},u="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(u,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"The ",(0,r.kt)("a",{parentName:"p",href:"http://igm.ucsd.edu/gme/index.php"},"Greater Middle East (GME) Variome")," Project is aimed at generating a coding base reference for the countries found in the Greater Middle East. Illumina Connected Annotations presents variant frequencies for the Greater Middle Eastern population."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Scott, E. M., Halees, A., Itan, Y., Spencer, E. G., He, Y., Azab, M. A., Gabriel, S. B., Belkadi, A., Boisson, B., Abel, L., Clark, A. G., Greater Middle East Variome Consortium, Alkuraya, F. S., Casanova, J. L., & Gleeson, J. G. (2016). Characterization of Greater Middle Eastern genetic variation for enhanced disease gene discovery. ",(0,r.kt)("em",{parentName:"p"},"Nature genetics"),", 48(9), 1071\u20131076. ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1038/ng.3592"},"https://doi.org/10.1038/ng.3592")))),(0,r.kt)("h3",{id:"tsv-extraction"},"TSV Extraction"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chrom pos ref alt AA filter FunctionGVS geneFunction Gene GeneID SIFT_pred GERP++ AF GME_GC GME_AC GME_AF NWA NEA AP Israel SD TP CA FunctionGVS_new Priority Polyphen2_HVAR_pred LRT_pred MutationTaster_pred rsid OMIM_MIM OMIM_Disease AA_AC EA_AC rsid_link position_link\n1 69134 A G A VQSRTrancheSNP99.90to100.00 nonsynonymous_SNV exonic OR4F5 79501 T 2.31 96:0:5 10,192 0.04950495049504951 4:0:0 59:0:2 12:0:0 0:0:0 6:0:0 9:0:2 13:0:2 nonsynonymous_SNV MODERATE B N N none - - none none - http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&org=human&position=chr1%3A69134-69133\n1 69270 A G A PASS synonymous_SNV exonic OR4F5 79501 . . 93:38:240 518,224 0.6981132075471698 5:5:11 63:30:86 12:5:28 1:0:2 2:2:18 7:3:46 7:2:52 synonymous_SNV LOW . . . rs201219564 - - none none http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?searchType=adhoc_search&type=rs&rs=rs201219564 http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&org=human&position=chr1%3A69270-69269\n1 69428 T G T PASS nonsynonymous_SNV exonic OR4F5 79501 D 0.891 676:44:15 74,1396 0.050340136054421766 43:0:2 313:16:10 88:7:3 6:0:0 44:8:0 102:9:0 102:4:2 nonsynonymous_SNV MODERATE D N N rs140739101 - - 14,3808 313,6535 http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?searchType=adhoc_search&type=rs&rs=rs140739101 http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&org=human&position=chr1%3A69428-69427\n")),(0,r.kt)("h4",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"We parse the GME tsv file and extract the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"chrom"),(0,r.kt)("li",{parentName:"ul"},"pos"),(0,r.kt)("li",{parentName:"ul"},"ref"),(0,r.kt)("li",{parentName:"ul"},"alt"),(0,r.kt)("li",{parentName:"ul"},"filter"),(0,r.kt)("li",{parentName:"ul"},"GME_AC"),(0,r.kt)("li",{parentName:"ul"},"GME_AF")),(0,r.kt)("h2",{id:"grch37-liftover"},"GRCh37 liftover"),(0,r.kt)("p",null,"The data is not available for GRCh38 on GME website. We performed a liftover from GRCh37 to GRCh38 using CrossMap."),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"http://igm.ucsd.edu/gme/download.shtml"},"http://igm.ucsd.edu/gme/download.shtml")),(0,r.kt)("h2",{id:"json-output"},"JSON output"),(0,r.kt)(o.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/07bac56e.fd2f6fea.js b/assets/js/07bac56e.fd2f6fea.js deleted file mode 100644 index b220964f..00000000 --- a/assets/js/07bac56e.fd2f6fea.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1342,8633],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>g});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},d=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},p=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,d=l(e,["components","mdxType","originalType","parentName"]),u=c(n),p=r,g=u["".concat(s,".").concat(p)]||u[p]||m[p]||o;return n?a.createElement(g,i(i({ref:t},d),{},{components:n})):a.createElement(g,i({ref:t},d))}));function g(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=p;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[u]="string"==typeof e?e:r,i[1]=l;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/gme-json",id:"data-sources/gme-json",title:"gme-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gme-json.md",sourceDirName:"data-sources",slug:"/data-sources/gme-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gme-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gme-json.md",tags:[],version:"current",frontMatter:{}},s=[],c={toc:s},d="wrapper";function u(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gmeVariome":{\n "allAc":10,\n "allAn":202,\n "allAf":0.049504,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"GME allele count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"GME allele number")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"GME allele frequency")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}u.isMDXComponent=!0},52812:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>i,metadata:()=>s,toc:()=>c});var a=n(87462),r=(n(67294),n(3905)),o=n(48036);const i={title:"GME Variome"},l=void 0,s={unversionedId:"data-sources/gme",id:"data-sources/gme",title:"GME Variome",description:"Overview",source:"@site/docs/data-sources/gme.mdx",sourceDirName:"data-sources",slug:"/data-sources/gme",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gme",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gme.mdx",tags:[],version:"current",frontMatter:{title:"GME Variome"},sidebar:"docs",previous:{title:"GERP",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gerp"},next:{title:"gnomAD",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad"}},c=[{value:"Overview",id:"overview",children:[{value:"TSV Extraction",id:"tsv-extraction",children:[{value:"Parsing",id:"parsing",children:[],level:4}],level:3}],level:2},{value:"GRCh37 liftover",id:"grch37-liftover",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON output",id:"json-output",children:[],level:2}],d={toc:c},u="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(u,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"The ",(0,r.kt)("a",{parentName:"p",href:"http://igm.ucsd.edu/gme/index.php"},"Greater Middle East (GME) Variome")," Project is aimed at generating a coding base reference for the countries found in the Greater Middle East. Illumina Connected Annotations presents variant frequencies for the Greater Middle Eastern population."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Scott, E. M., Halees, A., Itan, Y., Spencer, E. G., He, Y., Azab, M. A., Gabriel, S. B., Belkadi, A., Boisson, B., Abel, L., Clark, A. G., Greater Middle East Variome Consortium, Alkuraya, F. S., Casanova, J. L., & Gleeson, J. G. (2016). Characterization of Greater Middle Eastern genetic variation for enhanced disease gene discovery. ",(0,r.kt)("em",{parentName:"p"},"Nature genetics"),", 48(9), 1071\u20131076. ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1038/ng.3592"},"https://doi.org/10.1038/ng.3592")))),(0,r.kt)("h3",{id:"tsv-extraction"},"TSV Extraction"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chrom pos ref alt AA filter FunctionGVS geneFunction Gene GeneID SIFT_pred GERP++ AF GME_GC GME_AC GME_AF NWA NEA AP Israel SD TP CA FunctionGVS_new Priority Polyphen2_HVAR_pred LRT_pred MutationTaster_pred rsid OMIM_MIM OMIM_Disease AA_AC EA_AC rsid_link position_link\n1 69134 A G A VQSRTrancheSNP99.90to100.00 nonsynonymous_SNV exonic OR4F5 79501 T 2.31 96:0:5 10,192 0.04950495049504951 4:0:0 59:0:2 12:0:0 0:0:0 6:0:0 9:0:2 13:0:2 nonsynonymous_SNV MODERATE B N N none - - none none - http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&org=human&position=chr1%3A69134-69133\n1 69270 A G A PASS synonymous_SNV exonic OR4F5 79501 . . 93:38:240 518,224 0.6981132075471698 5:5:11 63:30:86 12:5:28 1:0:2 2:2:18 7:3:46 7:2:52 synonymous_SNV LOW . . . rs201219564 - - none none http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?searchType=adhoc_search&type=rs&rs=rs201219564 http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&org=human&position=chr1%3A69270-69269\n1 69428 T G T PASS nonsynonymous_SNV exonic OR4F5 79501 D 0.891 676:44:15 74,1396 0.050340136054421766 43:0:2 313:16:10 88:7:3 6:0:0 44:8:0 102:9:0 102:4:2 nonsynonymous_SNV MODERATE D N N rs140739101 - - 14,3808 313,6535 http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?searchType=adhoc_search&type=rs&rs=rs140739101 http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&org=human&position=chr1%3A69428-69427\n")),(0,r.kt)("h4",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"We parse the GME tsv file and extract the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"chrom"),(0,r.kt)("li",{parentName:"ul"},"pos"),(0,r.kt)("li",{parentName:"ul"},"ref"),(0,r.kt)("li",{parentName:"ul"},"alt"),(0,r.kt)("li",{parentName:"ul"},"filter"),(0,r.kt)("li",{parentName:"ul"},"GME_AC"),(0,r.kt)("li",{parentName:"ul"},"GME_AF")),(0,r.kt)("h2",{id:"grch37-liftover"},"GRCh37 liftover"),(0,r.kt)("p",null,"The data is not available for GRCh38 on GME website. We performed a liftover from GRCh37 to GRCh38 using CrossMap."),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"http://igm.ucsd.edu/gme/download.shtml"},"http://igm.ucsd.edu/gme/download.shtml")),(0,r.kt)("h2",{id:"json-output"},"JSON output"),(0,r.kt)(o.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/08a089c6.6eaac4e1.js b/assets/js/08a089c6.6eaac4e1.js deleted file mode 100644 index 7fdb61f3..00000000 --- a/assets/js/08a089c6.6eaac4e1.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3957,5360,6635,6458],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>N});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),m=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},c=function(e){var t=m(e.components);return a.createElement(s.Provider,{value:t},e.children)},d="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,c=o(e,["components","mdxType","originalType","parentName"]),d=m(n),u=i,N=d["".concat(s,".").concat(u)]||d[u]||p[u]||r;return n?a.createElement(N,l(l({ref:t},c),{},{components:n})):a.createElement(N,l({ref:t},c))}));function N(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,l=new Array(r);l[0]=u;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[d]="string"==typeof e?e:i,l[1]=o;for(var m=2;m{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>d,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={},l=void 0,o={unversionedId:"data-sources/cosmic-cancer-gene-census",id:"data-sources/cosmic-cancer-gene-census",title:"cosmic-cancer-gene-census",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/cosmic-cancer-gene-census.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-cancer-gene-census",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-cancer-gene-census",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/cosmic-cancer-gene-census.md",tags:[],version:"current",frontMatter:{}},s=[],m={toc:s},c="wrapper";function d(e){let{components:t,...n}=e;return(0,i.kt)(c,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},' {\n "name": "PRDM16",\n "hgncId": 14000,\n "ncbiGeneId": "63976",\n "ensemblGeneId": "ENSG00000142611",\n "cosmic": {\n "roleInCancer": [\n "oncogene",\n "fusion"\n ]\n }\n}\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"roleInCancer"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Possible roles in caner")))))}d.isMDXComponent=!0},7997:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>d,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={},l=void 0,o={unversionedId:"data-sources/cosmic-gene-fusion-json",id:"data-sources/cosmic-gene-fusion-json",title:"cosmic-gene-fusion-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/cosmic-gene-fusion-json.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-gene-fusion-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-gene-fusion-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/cosmic-gene-fusion-json.md",tags:[],version:"current",frontMatter:{}},s=[],m={toc:s},c="wrapper";function d(e){let{components:t,...n}=e;return(0,i.kt)(c,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},' "cosmicGeneFusions":[\n {\n "id":"COSF881",\n "numSamples":6,\n "geneSymbols":[\n "MYB",\n "NFIB"\n ],\n "hgvsr":"ENST00000341911.5(MYB):r.1_2368::ENST00000397581.2(NFIB):r.2592_3318",\n "histologies":[\n {\n "name":"adenoid cystic carcinoma",\n "numSamples":6\n }\n ],\n "sites":[\n {\n "name":"salivary gland (submandibular)",\n "numSamples":1\n },\n {\n "name":"salivary gland (parotid)",\n "numSamples":1\n },\n {\n "name":"salivary gland (nasal cavity)",\n "numSamples":1\n },\n {\n "name":"breast",\n "numSamples":3\n }\n ],\n "pubMedIds":[\n 19841262\n ]\n }\n ]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"id"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"COSMIC fusion ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"geneSymbols"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"5' gene & 3' gene")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hgvsr"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"HGVS RNA translocation fusion notation")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"histologies"),(0,i.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotypic descriptions")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"sites"),(0,i.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"tissue types")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Count")),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"name"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"description")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})))))}d.isMDXComponent=!0},20525:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>d,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={},l=void 0,o={unversionedId:"data-sources/cosmic-json",id:"data-sources/cosmic-json",title:"cosmic-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/cosmic-json.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/cosmic-json.md",tags:[],version:"current",frontMatter:{}},s=[],m={toc:s},c="wrapper";function d(e){let{components:t,...n}=e;return(0,i.kt)(c,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{\n "id":"COSV58272668",\n "numSamples":8,\n "refAllele":"-",\n "altAllele":"CCT",\n "histologies":[\n {\n "name":"carcinoma (serous carcinoma)",\n "numSamples":2\n },\n {\n "name":"meningioma (fibroblastic)",\n "numSamples":1\n },\n {\n "name":"carcinoma",\n "numSamples":1\n },\n {\n "name":"carcinoma (squamous cell carcinoma)",\n "numSamples":1\n },\n {\n "name":"meningioma (transitional)",\n "numSamples":1\n },\n {\n "name":"carcinoma (adenocarcinoma)",\n "numSamples":1\n },\n {\n "name":"other (neoplasm)",\n "numSamples":1\n }\n ],\n "sites":[\n {\n "name":"ovary",\n "numSamples":2\n },\n {\n "name":"meninges",\n "numSamples":2\n },\n {\n "name":"thyroid",\n "numSamples":2\n },\n {\n "name":"cervix",\n "numSamples":1\n },\n {\n "name":"large intestine (colon)",\n "numSamples":1\n }\n ],\n "pubMedIds":[\n 25738363,\n 27548314\n ],\n "confirmedSomatic":true,\n "drugResistance":true, /* not in this particular COSMIC variant */\n "isAlleleSpecific":true\n}\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"id"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"COSMIC Genomic Mutation ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"histologies"),(0,i.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotypic descriptions")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"sites"),(0,i.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"tissue types")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"confirmedSomatic"),(0,i.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,i.kt)("td",{parentName:"tr",align:"left"},"true when the variant is a confirmed somatic variant")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"drugResistance"),(0,i.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,i.kt)("td",{parentName:"tr",align:"left"},"true when the variant has been associated with drug resistance")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Count")),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"name"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"description")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})))))}d.isMDXComponent=!0},71335:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>m,default:()=>N,frontMatter:()=>s,metadata:()=>c,toc:()=>d});var a=n(87462),i=(n(67294),n(3905)),r=n(20525),l=n(7997),o=n(11273);const s={title:"COSMIC"},m=void 0,c={unversionedId:"data-sources/cosmic",id:"data-sources/cosmic",title:"COSMIC",description:"Overview",source:"@site/docs/data-sources/cosmic.mdx",sourceDirName:"data-sources",slug:"/data-sources/cosmic",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/cosmic.mdx",tags:[],version:"current",frontMatter:{title:"COSMIC"},sidebar:"docs",previous:{title:"ClinVar",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clinvar"},next:{title:"DANN",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dann"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF extraction",id:"vcf-extraction",children:[{value:"Example",id:"example",children:[],level:4},{value:"Parsing",id:"parsing",children:[],level:4}],level:3},{value:"TSV extraction",id:"tsv-extraction",children:[{value:"Example",id:"example-1",children:[],level:4},{value:"Parsing",id:"parsing-1",children:[],level:4},{value:"Parsing",id:"parsing-2",children:[],level:4},{value:"Aggregating Histologies & Sites",id:"aggregating-histologies--sites",children:[],level:4}],level:3},{value:"Download URL",id:"download-url",children:[{value:"GRCh37",id:"grch37",children:[],level:4},{value:"GRCh38",id:"grch38",children:[],level:4}],level:3},{value:"JSON Output",id:"json-output",children:[],level:3}],level:2},{value:"Gene Fusions",id:"gene-fusions",children:[{value:"TSV extraction",id:"tsv-extraction-1",children:[{value:"Example",id:"example-2",children:[],level:4},{value:"Parsing",id:"parsing-3",children:[],level:4},{value:"Parsing",id:"parsing-4",children:[],level:4},{value:"Aggregating Histologies & Sites",id:"aggregating-histologies--sites-1",children:[],level:4}],level:3},{value:"Known Issues",id:"known-issues",children:[],level:3},{value:"Download URL",id:"download-url-1",children:[{value:"GRCh37",id:"grch37-1",children:[],level:4},{value:"GRCh38",id:"grch38-1",children:[],level:4}],level:3},{value:"JSON Output",id:"json-output-1",children:[],level:3}],level:2},{value:"Cancer Gene Census",id:"cancer-gene-census",children:[{value:"TSV Extraction",id:"tsv-extraction-2",children:[{value:"Example",id:"example-3",children:[],level:4},{value:"Parsing",id:"parsing-5",children:[{value:"Columns",id:"columns",children:[],level:5},{value:"Possible Roles in Cancer",id:"possible-roles-in-cancer",children:[],level:5},{value:"Parsing Stats",id:"parsing-stats",children:[],level:5}],level:4}],level:3},{value:"Known Issues",id:"known-issues-1",children:[],level:3},{value:"Download URL",id:"download-url-2",children:[],level:3},{value:"JSON output",id:"json-output-2",children:[],level:3}],level:2}],p={toc:d},u="wrapper";function N(e){let{components:t,...n}=e;return(0,i.kt)(u,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"COSMIC, the Catalogue of Somatic Mutations in Cancer, is the world's largest source of expert manually curated somatic mutation information relating to human\ncancers."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"John G Tate, Sally Bamford, Harry C Jubb, Zbyslaw Sondka, David M Beare, Nidhi Bindal, Harry Boutselakis, Charlotte G Cole, Celestino Creatore, Elisabeth Dawson,\nPeter Fish, Bhavana Harsha, Charlie Hathaway, Steve C Jupe, Chai Yin Kok, Kate Noble, Laura Ponting, Christopher C Ramshaw, Claire E Rye, Helen E Speedy, Ray\nStefancsik, Sam L Thompson, Shicai Wang, Sari Ward, Peter J Campbell, Simon A Forbes. (2019) ",(0,i.kt)("a",{parentName:"p",href:"https://academic.oup.com/nar/article/47/D1/D941/5146192"},"COSMIC: the Catalogue Of Somatic Mutations In\nCancer"),", ",(0,i.kt)("em",{parentName:"p"},"Nucleic Acids Research"),", Volume 47, Issue D1"))),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Professional data source")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"This is a Professional data source and is not available freely. Please contact ",(0,i.kt)("a",{parentName:"p",href:"mailto:annotation_support@illumina.com"},"annotation_support@illumina.com")," if you would like to obtain it."))),(0,i.kt)("h2",{id:"small-variants"},"Small Variants"),(0,i.kt)("p",null,"Our main COSMIC deliverable provides annotations for both coding and non-coding variants throughout the genome. As of COSMIC v96, this includes 28.7M variants\nspanning the human genome. Illumina Connected Annotations currently parses four files to extract the relevant content:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"CosmicCodingMuts.vcf.gz"),(0,i.kt)("li",{parentName:"ul"},"CosmicNonCodingVariants.vcf.gz"),(0,i.kt)("li",{parentName:"ul"},"CosmicMutantExport.tsv.gz"),(0,i.kt)("li",{parentName:"ul"},"CosmicNCV.tsv.gz")),(0,i.kt)("h3",{id:"vcf-extraction"},"VCF extraction"),(0,i.kt)("h4",{id:"example"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 65797 COSV58737189 T C . . GENE=OR4F5_ENST00000641515;STRAND=+;LEGACY_ID=COSN23957695;CDS=c.9+224T>C;AA=p.?;HGVSC=ENST00000641515.2:c.9+224T>C;HGVSG=1:g.65797T>C;CNT=1\n")),(0,i.kt)("h4",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"From the VCF files, we're mainly interested in the following columns:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"CHROM")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"POS")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"ID")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"REF")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"ALT"))),(0,i.kt)("h3",{id:"tsv-extraction"},"TSV extraction"),(0,i.kt)("h4",{id:"example-1"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"Gene name Accession Number Gene CDS length HGNC ID Sample name ID_sample ID_tumour Primary site Site subtype 1 Site subtype 2 Site subtype 3 Primary histology Histology subtype 1 Histology subtype 2 Histology subtype 3 Genome-wide screen GENOMIC_MUTATION_ID LEGACY_MUTATION_ID MUTATION_ID Mutation CDS Mutation AA Mutation Description Mutation zygosity LOH GRCh Mutation genome position Mutation strand Resistance Mutation Mutation somatic status Pubmed_PMID ID_STUDY Sample Type Tumour origin Age HGVSP HGVSC HGVSG\nMCF2L_ENST00000375604 ENST00000375604.6 3372 14576 RK091_C01 1918867 1806188 liver NS NS NS carcinoma NS NS NS y COSV65049364 COSN1601909 113108365 c.73+3096A>G p.? Unknown het 38 13:113005079-113005079 + - Variant of unknown origin 322 fresh/frozen - NOS primary ENST00000375604.6:c.73+3096A>G 13:g.113005079A>G\n")),(0,i.kt)("h4",{id:"parsing-1"},"Parsing"),(0,i.kt)("p",null,"From the TSV file, we're mainly interested in the following columns:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"GENOMIC_MUTATION_ID")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"ID_sample")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Primary site")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Site subtype 1")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Primary histology")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Histology subtype 1")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Pubmed_PMID")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Resistance Mutation")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Mutation somatic status"))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"For all the histologies and sites, we replace all the underlines with spaces. ",(0,i.kt)("inlineCode",{parentName:"p"},"salivary_gland")," would become ",(0,i.kt)("inlineCode",{parentName:"p"},"salivary gland"),"."))),(0,i.kt)("h4",{id:"parsing-2"},"Parsing"),(0,i.kt)("p",null,"To aggregate the data in Illumina Connected Annotations, we perform the following:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"Parse the coding and non-coding TSV files to retrieve the histologies, sites, PubMed IDs, somatic status, and resistance mutation status. Histologies and sites\nare tracked with respect to sample IDs."),(0,i.kt)("li",{parentName:"ul"},"Parse the coding and non-coding VCF files to retrieve the genomic variant for each entry")),(0,i.kt)("h4",{id:"aggregating-histologies--sites"},"Aggregating Histologies & Sites"),(0,i.kt)("p",null,"For sites and histologies, we observe that the subtype provides additional description but is still dependent on the primary site value. For example, the primary\nsite might be ",(0,i.kt)("inlineCode",{parentName:"p"},"skin"),", but the subtype is ",(0,i.kt)("inlineCode",{parentName:"p"},"foot"),". Therefore, we will combine the values in the following manner: ",(0,i.kt)("inlineCode",{parentName:"p"},"skin (foot)"),". "),(0,i.kt)("p",null,"COSMIC uses ",(0,i.kt)("inlineCode",{parentName:"p"},"NS")," to show that a value is empty. If the subtype is ",(0,i.kt)("inlineCode",{parentName:"p"},"NS"),", we will use the primary histology instead."),(0,i.kt)("h3",{id:"download-url"},"Download URL"),(0,i.kt)("h4",{id:"grch37"},"GRCh37"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh37/cosmic/v96/VCF/CosmicCodingMuts.vcf.gz"},"CosmicCodingMuts.vcf.gz")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh37/cosmic/v96/VCF/CosmicNonCodingVariants.vcf.gz"},"CosmicNonCodingVariants.vcf.gz")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh37/cosmic/v96/CosmicMutantExport.tsv.gz"},"CosmicMutantExport.tsv.gz")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh37/cosmic/v96/CosmicNCV.tsv.gz"},"CosmicNCV.tsv.gz"))),(0,i.kt)("h4",{id:"grch38"},"GRCh38"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v96/VCF/CosmicCodingMuts.vcf.gz"},"CosmicCodingMuts.vcf.gz")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v96/VCF/CosmicNonCodingVariants.vcf.gz"},"CosmicNonCodingVariants.vcf.gz")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v96/CosmicMutantExport.tsv.gz"},"CosmicMutantExport.tsv.gz")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v96/CosmicNCV.tsv.gz"},"CosmicNCV.tsv.gz"))),(0,i.kt)("h3",{id:"json-output"},"JSON Output"),(0,i.kt)(r.default,{mdxType:"SmallVariantJSON"}),(0,i.kt)("h2",{id:"gene-fusions"},"Gene Fusions"),(0,i.kt)("p",null,"Gene fusions are manually curated from peer reviewed publications by expert COSMIC curators. A comprehensive literature curation is completed for each fusion\npair when it is released in the database. Currently COSMIC includes information on fusions involved in solid tumours and leukaemias."),(0,i.kt)("h3",{id:"tsv-extraction-1"},"TSV extraction"),(0,i.kt)("h4",{id:"example-2"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"SAMPLE_ID SAMPLE_NAME PRIMARY_SITE SITE_SUBTYPE_1 SITE_SUBTYPE_2 SITE_SUBTYPE_3 PRIMARY_HISTOLOGY HISTOLOGY_SUBTYPE_1 HISTOLOGY_SUBTYPE_2 HISTOLOGY_SUBTYPE_3 FUSION_ID TRANSLOCATION_NAME 5'_CHROMOSOME 5'_STRAND 5'_GENE_ID 5'_GENE_NAME 5'_LAST_OBSERVED_EXON 5'_GENOME_START_FROM 5'_GENOME_START_TO 5'_GENOME_STOP_FROM 5'_GENOME_STOP_TO 3'_CHROMOSOME 3'_STRAND 3'_GENE_ID 3'_GENE_NAME 3'_FIRST_OBSERVED_EXON 3'_GENOME_START_FROM 3'_GENOME_START_TO 3'_GENOME_STOP_FROM 3'_GENOME_STOP_TO FUSION_TYPE PUBMED_PMID\n749711 HCC1187 breast NS NS NS carcinoma ductal_carcinoma NS NS 665 ENST00000360863.10(RGS22):r.1_3555::ENST00000369518.1(SYCP1):r.2100_3452 8 - 197199 RGS22 22 99981937 99981937 100106116 100106116 1 + 212470 SYCP1_ENST00000369518 24 114944339 114944339 114995367 114995367 Inferred Breakpoint 20033038\n")),(0,i.kt)("h4",{id:"parsing-3"},"Parsing"),(0,i.kt)("p",null,"From the TSV file, we're mainly interested in the following columns:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"SAMPLE_ID")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"PRIMARY_SITE")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"PRIMARY_HISTOLOGY")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"HISTOLOGY_SUBTYPE_1")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"FUSION_ID")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"TRANSLOCATION_NAME")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"PUBMED_PMID"))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"For all the histologies and sites, we replace all the underlines with spaces. ",(0,i.kt)("inlineCode",{parentName:"p"},"salivary_gland")," would become ",(0,i.kt)("inlineCode",{parentName:"p"},"salivary gland"),"."))),(0,i.kt)("h4",{id:"parsing-4"},"Parsing"),(0,i.kt)("p",null,"To create the gene fusion entries in Illumina Connected Annotations, we perform the following on each row in the TSV file:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"Group all entries by FUSION_ID"),(0,i.kt)("li",{parentName:"ul"},"Using all the entries related to this FUSION_ID:",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"Collect all the PubMed IDs"),(0,i.kt)("li",{parentName:"ul"},"Tally the number of observed sample IDs"),(0,i.kt)("li",{parentName:"ul"},"Grab the HGVS r. notation (should not change throughout the FUSION_ID)"),(0,i.kt)("li",{parentName:"ul"},"Tally the number of samples observed for each histology"),(0,i.kt)("li",{parentName:"ul"},"Tally the number of samples observed for each site"))),(0,i.kt)("li",{parentName:"ul"},"Extract the transcript IDs from the HGVS notation and lookup the associated gene symbols")),(0,i.kt)("h4",{id:"aggregating-histologies--sites-1"},"Aggregating Histologies & Sites"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"#aggregating-histologies--sites"},"Aggregating Histologies & Sites")," was previously described in the small variants section."),(0,i.kt)("h3",{id:"known-issues"},"Known Issues"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"There are some issues with the HGVS RNA notation:"),(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"For coding transcripts, HGVS numbering should use CDS coordinates. Right now COSMIC is using cDNA coordinates for all their fusions.")))),(0,i.kt)("h3",{id:"download-url-1"},"Download URL"),(0,i.kt)("h4",{id:"grch37-1"},"GRCh37"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh37/cosmic/v96/CosmicFusionExport.tsv.gz"},"CosmicFusionExport.tsv.gz"))),(0,i.kt)("h4",{id:"grch38-1"},"GRCh38"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v96/CosmicFusionExport.tsv.gz"},"CosmicFusionExport.tsv.gz"))),(0,i.kt)("h3",{id:"json-output-1"},"JSON Output"),(0,i.kt)(l.default,{mdxType:"GeneFusionJSON"}),(0,i.kt)("h2",{id:"cancer-gene-census"},"Cancer Gene Census"),(0,i.kt)("h3",{id:"tsv-extraction-2"},"TSV Extraction"),(0,i.kt)("h4",{id:"example-3"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"GENE_NAME CELL_TYPE PUBMED_PMID HALLMARK IMPACT DESCRIPTION CELL_LINE\nPRDM16 18496560 role in cancer oncogene oncogene\nPRDM16 16015645 role in cancer fusion fusion\n")),(0,i.kt)("h4",{id:"parsing-5"},"Parsing"),(0,i.kt)("p",null,'To extract information about TSGs and oncogenes, the data based on the "role in cancer" attribute is filtered.\nFor tumor suppressor genes, rows with the value "TSG" and for oncogenes, rows with the value "oncogene" are filtered.\nSome genes have both "TSG/oncogene" as their role, which indicates that they can act as both.'),(0,i.kt)("h5",{id:"columns"},"Columns"),(0,i.kt)("p",null,"Only following columns are needed to gather required roles in cancer:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"GENE_NAME")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"IMPACT")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"HALLMARK"))),(0,i.kt)("h5",{id:"possible-roles-in-cancer"},"Possible Roles in Cancer"),(0,i.kt)("p",null,"While parsing, only following roles in cancer are found:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"fusion")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"TSG")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"oncogene"))),(0,i.kt)("h5",{id:"parsing-stats"},"Parsing Stats"),(0,i.kt)("p",null,"The file contained following number of instances for each role type"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Role in cancer"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Total Instances"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"fusion"),(0,i.kt)("td",{parentName:"tr",align:"center"},"149")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"TSG"),(0,i.kt)("td",{parentName:"tr",align:"center"},"195")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"oncogene"),(0,i.kt)("td",{parentName:"tr",align:"center"},"181")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"Total"),(0,i.kt)("td",{parentName:"tr",align:"center"},"525")))),(0,i.kt)("h3",{id:"known-issues-1"},"Known Issues"),(0,i.kt)("p",null,"None"),(0,i.kt)("h3",{id:"download-url-2"},"Download URL"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v97/Cancer_Gene_Census_Hallmarks_Of_Cancer.tsv.gz"},"Cancer_Gene_Census_Hallmarks_Of_Cancer.tsv.gz"))),(0,i.kt)("h3",{id:"json-output-2"},"JSON output"),(0,i.kt)(o.default,{mdxType:"CancerGeneCensusJSON"}))}N.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/08a089c6.72bdf954.js b/assets/js/08a089c6.72bdf954.js new file mode 100644 index 00000000..ff02df30 --- /dev/null +++ b/assets/js/08a089c6.72bdf954.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3957,5360,6635,6458],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>N});var a=n(7294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),m=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},c=function(e){var t=m(e.components);return a.createElement(s.Provider,{value:t},e.children)},d="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,c=o(e,["components","mdxType","originalType","parentName"]),d=m(n),u=i,N=d["".concat(s,".").concat(u)]||d[u]||p[u]||r;return n?a.createElement(N,l(l({ref:t},c),{},{components:n})):a.createElement(N,l({ref:t},c))}));function N(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,l=new Array(r);l[0]=u;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[d]="string"==typeof e?e:i,l[1]=o;for(var m=2;m{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>d,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var a=n(7462),i=(n(7294),n(3905));const r={},l=void 0,o={unversionedId:"data-sources/cosmic-cancer-gene-census",id:"data-sources/cosmic-cancer-gene-census",title:"cosmic-cancer-gene-census",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/cosmic-cancer-gene-census.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-cancer-gene-census",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-cancer-gene-census",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/cosmic-cancer-gene-census.md",tags:[],version:"current",frontMatter:{}},s=[],m={toc:s},c="wrapper";function d(e){let{components:t,...n}=e;return(0,i.kt)(c,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},' {\n "name": "PRDM16",\n "hgncId": 14000,\n "ncbiGeneId": "63976",\n "ensemblGeneId": "ENSG00000142611",\n "cosmic": {\n "roleInCancer": [\n "oncogene",\n "fusion"\n ]\n }\n}\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"roleInCancer"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Possible roles in caner")))))}d.isMDXComponent=!0},7997:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>d,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var a=n(7462),i=(n(7294),n(3905));const r={},l=void 0,o={unversionedId:"data-sources/cosmic-gene-fusion-json",id:"data-sources/cosmic-gene-fusion-json",title:"cosmic-gene-fusion-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/cosmic-gene-fusion-json.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-gene-fusion-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-gene-fusion-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/cosmic-gene-fusion-json.md",tags:[],version:"current",frontMatter:{}},s=[],m={toc:s},c="wrapper";function d(e){let{components:t,...n}=e;return(0,i.kt)(c,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},' "cosmicGeneFusions":[\n {\n "id":"COSF881",\n "numSamples":6,\n "geneSymbols":[\n "MYB",\n "NFIB"\n ],\n "hgvsr":"ENST00000341911.5(MYB):r.1_2368::ENST00000397581.2(NFIB):r.2592_3318",\n "histologies":[\n {\n "name":"adenoid cystic carcinoma",\n "numSamples":6\n }\n ],\n "sites":[\n {\n "name":"salivary gland (submandibular)",\n "numSamples":1\n },\n {\n "name":"salivary gland (parotid)",\n "numSamples":1\n },\n {\n "name":"salivary gland (nasal cavity)",\n "numSamples":1\n },\n {\n "name":"breast",\n "numSamples":3\n }\n ],\n "pubMedIds":[\n 19841262\n ]\n }\n ]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"id"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"COSMIC fusion ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"geneSymbols"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"5' gene & 3' gene")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hgvsr"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"HGVS RNA translocation fusion notation")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"histologies"),(0,i.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotypic descriptions")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"sites"),(0,i.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"tissue types")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Count")),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"name"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"description")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})))))}d.isMDXComponent=!0},525:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>d,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var a=n(7462),i=(n(7294),n(3905));const r={},l=void 0,o={unversionedId:"data-sources/cosmic-json",id:"data-sources/cosmic-json",title:"cosmic-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/cosmic-json.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/cosmic-json.md",tags:[],version:"current",frontMatter:{}},s=[],m={toc:s},c="wrapper";function d(e){let{components:t,...n}=e;return(0,i.kt)(c,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{\n "id":"COSV58272668",\n "numSamples":8,\n "refAllele":"-",\n "altAllele":"CCT",\n "histologies":[\n {\n "name":"carcinoma (serous carcinoma)",\n "numSamples":2\n },\n {\n "name":"meningioma (fibroblastic)",\n "numSamples":1\n },\n {\n "name":"carcinoma",\n "numSamples":1\n },\n {\n "name":"carcinoma (squamous cell carcinoma)",\n "numSamples":1\n },\n {\n "name":"meningioma (transitional)",\n "numSamples":1\n },\n {\n "name":"carcinoma (adenocarcinoma)",\n "numSamples":1\n },\n {\n "name":"other (neoplasm)",\n "numSamples":1\n }\n ],\n "sites":[\n {\n "name":"ovary",\n "numSamples":2\n },\n {\n "name":"meninges",\n "numSamples":2\n },\n {\n "name":"thyroid",\n "numSamples":2\n },\n {\n "name":"cervix",\n "numSamples":1\n },\n {\n "name":"large intestine (colon)",\n "numSamples":1\n }\n ],\n "pubMedIds":[\n 25738363,\n 27548314\n ],\n "confirmedSomatic":true,\n "drugResistance":true, /* not in this particular COSMIC variant */\n "isAlleleSpecific":true\n}\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"id"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"COSMIC Genomic Mutation ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"histologies"),(0,i.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotypic descriptions")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"sites"),(0,i.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"tissue types")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"confirmedSomatic"),(0,i.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,i.kt)("td",{parentName:"tr",align:"left"},"true when the variant is a confirmed somatic variant")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"drugResistance"),(0,i.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,i.kt)("td",{parentName:"tr",align:"left"},"true when the variant has been associated with drug resistance")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Count")),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"name"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"description")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})))))}d.isMDXComponent=!0},1335:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>m,default:()=>N,frontMatter:()=>s,metadata:()=>c,toc:()=>d});var a=n(7462),i=(n(7294),n(3905)),r=n(525),l=n(7997),o=n(1273);const s={title:"COSMIC"},m=void 0,c={unversionedId:"data-sources/cosmic",id:"data-sources/cosmic",title:"COSMIC",description:"Overview",source:"@site/docs/data-sources/cosmic.mdx",sourceDirName:"data-sources",slug:"/data-sources/cosmic",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/cosmic.mdx",tags:[],version:"current",frontMatter:{title:"COSMIC"},sidebar:"docs",previous:{title:"ClinVar",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clinvar"},next:{title:"DANN",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dann"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF extraction",id:"vcf-extraction",children:[{value:"Example",id:"example",children:[],level:4},{value:"Parsing",id:"parsing",children:[],level:4}],level:3},{value:"TSV extraction",id:"tsv-extraction",children:[{value:"Example",id:"example-1",children:[],level:4},{value:"Parsing",id:"parsing-1",children:[],level:4},{value:"Parsing",id:"parsing-2",children:[],level:4},{value:"Aggregating Histologies & Sites",id:"aggregating-histologies--sites",children:[],level:4}],level:3},{value:"Download URL",id:"download-url",children:[{value:"GRCh37",id:"grch37",children:[],level:4},{value:"GRCh38",id:"grch38",children:[],level:4}],level:3},{value:"JSON Output",id:"json-output",children:[],level:3}],level:2},{value:"Gene Fusions",id:"gene-fusions",children:[{value:"TSV extraction",id:"tsv-extraction-1",children:[{value:"Example",id:"example-2",children:[],level:4},{value:"Parsing",id:"parsing-3",children:[],level:4},{value:"Parsing",id:"parsing-4",children:[],level:4},{value:"Aggregating Histologies & Sites",id:"aggregating-histologies--sites-1",children:[],level:4}],level:3},{value:"Known Issues",id:"known-issues",children:[],level:3},{value:"Download URL",id:"download-url-1",children:[{value:"GRCh37",id:"grch37-1",children:[],level:4},{value:"GRCh38",id:"grch38-1",children:[],level:4}],level:3},{value:"JSON Output",id:"json-output-1",children:[],level:3}],level:2},{value:"Cancer Gene Census",id:"cancer-gene-census",children:[{value:"TSV Extraction",id:"tsv-extraction-2",children:[{value:"Example",id:"example-3",children:[],level:4},{value:"Parsing",id:"parsing-5",children:[{value:"Columns",id:"columns",children:[],level:5},{value:"Possible Roles in Cancer",id:"possible-roles-in-cancer",children:[],level:5},{value:"Parsing Stats",id:"parsing-stats",children:[],level:5}],level:4}],level:3},{value:"Known Issues",id:"known-issues-1",children:[],level:3},{value:"Download URL",id:"download-url-2",children:[],level:3},{value:"JSON output",id:"json-output-2",children:[],level:3}],level:2}],p={toc:d},u="wrapper";function N(e){let{components:t,...n}=e;return(0,i.kt)(u,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"COSMIC, the Catalogue of Somatic Mutations in Cancer, is the world's largest source of expert manually curated somatic mutation information relating to human\ncancers."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"John G Tate, Sally Bamford, Harry C Jubb, Zbyslaw Sondka, David M Beare, Nidhi Bindal, Harry Boutselakis, Charlotte G Cole, Celestino Creatore, Elisabeth Dawson,\nPeter Fish, Bhavana Harsha, Charlie Hathaway, Steve C Jupe, Chai Yin Kok, Kate Noble, Laura Ponting, Christopher C Ramshaw, Claire E Rye, Helen E Speedy, Ray\nStefancsik, Sam L Thompson, Shicai Wang, Sari Ward, Peter J Campbell, Simon A Forbes. (2019) ",(0,i.kt)("a",{parentName:"p",href:"https://academic.oup.com/nar/article/47/D1/D941/5146192"},"COSMIC: the Catalogue Of Somatic Mutations In\nCancer"),", ",(0,i.kt)("em",{parentName:"p"},"Nucleic Acids Research"),", Volume 47, Issue D1"))),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Professional data source")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"This is a Professional data source and is not available freely. Please contact ",(0,i.kt)("a",{parentName:"p",href:"mailto:annotation_support@illumina.com"},"annotation_support@illumina.com")," if you would like to obtain it."))),(0,i.kt)("h2",{id:"small-variants"},"Small Variants"),(0,i.kt)("p",null,"Our main COSMIC deliverable provides annotations for both coding and non-coding variants throughout the genome. As of COSMIC v96, this includes 28.7M variants\nspanning the human genome. Illumina Connected Annotations currently parses four files to extract the relevant content:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"CosmicCodingMuts.vcf.gz"),(0,i.kt)("li",{parentName:"ul"},"CosmicNonCodingVariants.vcf.gz"),(0,i.kt)("li",{parentName:"ul"},"CosmicMutantExport.tsv.gz"),(0,i.kt)("li",{parentName:"ul"},"CosmicNCV.tsv.gz")),(0,i.kt)("h3",{id:"vcf-extraction"},"VCF extraction"),(0,i.kt)("h4",{id:"example"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 65797 COSV58737189 T C . . GENE=OR4F5_ENST00000641515;STRAND=+;LEGACY_ID=COSN23957695;CDS=c.9+224T>C;AA=p.?;HGVSC=ENST00000641515.2:c.9+224T>C;HGVSG=1:g.65797T>C;CNT=1\n")),(0,i.kt)("h4",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"From the VCF files, we're mainly interested in the following columns:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"CHROM")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"POS")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"ID")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"REF")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"ALT"))),(0,i.kt)("h3",{id:"tsv-extraction"},"TSV extraction"),(0,i.kt)("h4",{id:"example-1"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"Gene name Accession Number Gene CDS length HGNC ID Sample name ID_sample ID_tumour Primary site Site subtype 1 Site subtype 2 Site subtype 3 Primary histology Histology subtype 1 Histology subtype 2 Histology subtype 3 Genome-wide screen GENOMIC_MUTATION_ID LEGACY_MUTATION_ID MUTATION_ID Mutation CDS Mutation AA Mutation Description Mutation zygosity LOH GRCh Mutation genome position Mutation strand Resistance Mutation Mutation somatic status Pubmed_PMID ID_STUDY Sample Type Tumour origin Age HGVSP HGVSC HGVSG\nMCF2L_ENST00000375604 ENST00000375604.6 3372 14576 RK091_C01 1918867 1806188 liver NS NS NS carcinoma NS NS NS y COSV65049364 COSN1601909 113108365 c.73+3096A>G p.? Unknown het 38 13:113005079-113005079 + - Variant of unknown origin 322 fresh/frozen - NOS primary ENST00000375604.6:c.73+3096A>G 13:g.113005079A>G\n")),(0,i.kt)("h4",{id:"parsing-1"},"Parsing"),(0,i.kt)("p",null,"From the TSV file, we're mainly interested in the following columns:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"GENOMIC_MUTATION_ID")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"ID_sample")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Primary site")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Site subtype 1")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Primary histology")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Histology subtype 1")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Pubmed_PMID")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Resistance Mutation")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Mutation somatic status"))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"For all the histologies and sites, we replace all the underlines with spaces. ",(0,i.kt)("inlineCode",{parentName:"p"},"salivary_gland")," would become ",(0,i.kt)("inlineCode",{parentName:"p"},"salivary gland"),"."))),(0,i.kt)("h4",{id:"parsing-2"},"Parsing"),(0,i.kt)("p",null,"To aggregate the data in Illumina Connected Annotations, we perform the following:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"Parse the coding and non-coding TSV files to retrieve the histologies, sites, PubMed IDs, somatic status, and resistance mutation status. Histologies and sites\nare tracked with respect to sample IDs."),(0,i.kt)("li",{parentName:"ul"},"Parse the coding and non-coding VCF files to retrieve the genomic variant for each entry")),(0,i.kt)("h4",{id:"aggregating-histologies--sites"},"Aggregating Histologies & Sites"),(0,i.kt)("p",null,"For sites and histologies, we observe that the subtype provides additional description but is still dependent on the primary site value. For example, the primary\nsite might be ",(0,i.kt)("inlineCode",{parentName:"p"},"skin"),", but the subtype is ",(0,i.kt)("inlineCode",{parentName:"p"},"foot"),". Therefore, we will combine the values in the following manner: ",(0,i.kt)("inlineCode",{parentName:"p"},"skin (foot)"),". "),(0,i.kt)("p",null,"COSMIC uses ",(0,i.kt)("inlineCode",{parentName:"p"},"NS")," to show that a value is empty. If the subtype is ",(0,i.kt)("inlineCode",{parentName:"p"},"NS"),", we will use the primary histology instead."),(0,i.kt)("h3",{id:"download-url"},"Download URL"),(0,i.kt)("h4",{id:"grch37"},"GRCh37"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh37/cosmic/v96/VCF/CosmicCodingMuts.vcf.gz"},"CosmicCodingMuts.vcf.gz")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh37/cosmic/v96/VCF/CosmicNonCodingVariants.vcf.gz"},"CosmicNonCodingVariants.vcf.gz")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh37/cosmic/v96/CosmicMutantExport.tsv.gz"},"CosmicMutantExport.tsv.gz")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh37/cosmic/v96/CosmicNCV.tsv.gz"},"CosmicNCV.tsv.gz"))),(0,i.kt)("h4",{id:"grch38"},"GRCh38"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v96/VCF/CosmicCodingMuts.vcf.gz"},"CosmicCodingMuts.vcf.gz")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v96/VCF/CosmicNonCodingVariants.vcf.gz"},"CosmicNonCodingVariants.vcf.gz")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v96/CosmicMutantExport.tsv.gz"},"CosmicMutantExport.tsv.gz")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v96/CosmicNCV.tsv.gz"},"CosmicNCV.tsv.gz"))),(0,i.kt)("h3",{id:"json-output"},"JSON Output"),(0,i.kt)(r.default,{mdxType:"SmallVariantJSON"}),(0,i.kt)("h2",{id:"gene-fusions"},"Gene Fusions"),(0,i.kt)("p",null,"Gene fusions are manually curated from peer reviewed publications by expert COSMIC curators. A comprehensive literature curation is completed for each fusion\npair when it is released in the database. Currently COSMIC includes information on fusions involved in solid tumours and leukaemias."),(0,i.kt)("h3",{id:"tsv-extraction-1"},"TSV extraction"),(0,i.kt)("h4",{id:"example-2"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"SAMPLE_ID SAMPLE_NAME PRIMARY_SITE SITE_SUBTYPE_1 SITE_SUBTYPE_2 SITE_SUBTYPE_3 PRIMARY_HISTOLOGY HISTOLOGY_SUBTYPE_1 HISTOLOGY_SUBTYPE_2 HISTOLOGY_SUBTYPE_3 FUSION_ID TRANSLOCATION_NAME 5'_CHROMOSOME 5'_STRAND 5'_GENE_ID 5'_GENE_NAME 5'_LAST_OBSERVED_EXON 5'_GENOME_START_FROM 5'_GENOME_START_TO 5'_GENOME_STOP_FROM 5'_GENOME_STOP_TO 3'_CHROMOSOME 3'_STRAND 3'_GENE_ID 3'_GENE_NAME 3'_FIRST_OBSERVED_EXON 3'_GENOME_START_FROM 3'_GENOME_START_TO 3'_GENOME_STOP_FROM 3'_GENOME_STOP_TO FUSION_TYPE PUBMED_PMID\n749711 HCC1187 breast NS NS NS carcinoma ductal_carcinoma NS NS 665 ENST00000360863.10(RGS22):r.1_3555::ENST00000369518.1(SYCP1):r.2100_3452 8 - 197199 RGS22 22 99981937 99981937 100106116 100106116 1 + 212470 SYCP1_ENST00000369518 24 114944339 114944339 114995367 114995367 Inferred Breakpoint 20033038\n")),(0,i.kt)("h4",{id:"parsing-3"},"Parsing"),(0,i.kt)("p",null,"From the TSV file, we're mainly interested in the following columns:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"SAMPLE_ID")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"PRIMARY_SITE")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"PRIMARY_HISTOLOGY")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"HISTOLOGY_SUBTYPE_1")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"FUSION_ID")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"TRANSLOCATION_NAME")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"PUBMED_PMID"))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"For all the histologies and sites, we replace all the underlines with spaces. ",(0,i.kt)("inlineCode",{parentName:"p"},"salivary_gland")," would become ",(0,i.kt)("inlineCode",{parentName:"p"},"salivary gland"),"."))),(0,i.kt)("h4",{id:"parsing-4"},"Parsing"),(0,i.kt)("p",null,"To create the gene fusion entries in Illumina Connected Annotations, we perform the following on each row in the TSV file:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"Group all entries by FUSION_ID"),(0,i.kt)("li",{parentName:"ul"},"Using all the entries related to this FUSION_ID:",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"Collect all the PubMed IDs"),(0,i.kt)("li",{parentName:"ul"},"Tally the number of observed sample IDs"),(0,i.kt)("li",{parentName:"ul"},"Grab the HGVS r. notation (should not change throughout the FUSION_ID)"),(0,i.kt)("li",{parentName:"ul"},"Tally the number of samples observed for each histology"),(0,i.kt)("li",{parentName:"ul"},"Tally the number of samples observed for each site"))),(0,i.kt)("li",{parentName:"ul"},"Extract the transcript IDs from the HGVS notation and lookup the associated gene symbols")),(0,i.kt)("h4",{id:"aggregating-histologies--sites-1"},"Aggregating Histologies & Sites"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"#aggregating-histologies--sites"},"Aggregating Histologies & Sites")," was previously described in the small variants section."),(0,i.kt)("h3",{id:"known-issues"},"Known Issues"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"There are some issues with the HGVS RNA notation:"),(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"For coding transcripts, HGVS numbering should use CDS coordinates. Right now COSMIC is using cDNA coordinates for all their fusions.")))),(0,i.kt)("h3",{id:"download-url-1"},"Download URL"),(0,i.kt)("h4",{id:"grch37-1"},"GRCh37"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh37/cosmic/v96/CosmicFusionExport.tsv.gz"},"CosmicFusionExport.tsv.gz"))),(0,i.kt)("h4",{id:"grch38-1"},"GRCh38"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v96/CosmicFusionExport.tsv.gz"},"CosmicFusionExport.tsv.gz"))),(0,i.kt)("h3",{id:"json-output-1"},"JSON Output"),(0,i.kt)(l.default,{mdxType:"GeneFusionJSON"}),(0,i.kt)("h2",{id:"cancer-gene-census"},"Cancer Gene Census"),(0,i.kt)("h3",{id:"tsv-extraction-2"},"TSV Extraction"),(0,i.kt)("h4",{id:"example-3"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"GENE_NAME CELL_TYPE PUBMED_PMID HALLMARK IMPACT DESCRIPTION CELL_LINE\nPRDM16 18496560 role in cancer oncogene oncogene\nPRDM16 16015645 role in cancer fusion fusion\n")),(0,i.kt)("h4",{id:"parsing-5"},"Parsing"),(0,i.kt)("p",null,'To extract information about TSGs and oncogenes, the data based on the "role in cancer" attribute is filtered.\nFor tumor suppressor genes, rows with the value "TSG" and for oncogenes, rows with the value "oncogene" are filtered.\nSome genes have both "TSG/oncogene" as their role, which indicates that they can act as both.'),(0,i.kt)("h5",{id:"columns"},"Columns"),(0,i.kt)("p",null,"Only following columns are needed to gather required roles in cancer:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"GENE_NAME")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"IMPACT")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"HALLMARK"))),(0,i.kt)("h5",{id:"possible-roles-in-cancer"},"Possible Roles in Cancer"),(0,i.kt)("p",null,"While parsing, only following roles in cancer are found:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"fusion")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"TSG")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"oncogene"))),(0,i.kt)("h5",{id:"parsing-stats"},"Parsing Stats"),(0,i.kt)("p",null,"The file contained following number of instances for each role type"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Role in cancer"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Total Instances"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"fusion"),(0,i.kt)("td",{parentName:"tr",align:"center"},"149")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"TSG"),(0,i.kt)("td",{parentName:"tr",align:"center"},"195")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"oncogene"),(0,i.kt)("td",{parentName:"tr",align:"center"},"181")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"Total"),(0,i.kt)("td",{parentName:"tr",align:"center"},"525")))),(0,i.kt)("h3",{id:"known-issues-1"},"Known Issues"),(0,i.kt)("p",null,"None"),(0,i.kt)("h3",{id:"download-url-2"},"Download URL"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v97/Cancer_Gene_Census_Hallmarks_Of_Cancer.tsv.gz"},"Cancer_Gene_Census_Hallmarks_Of_Cancer.tsv.gz"))),(0,i.kt)("h3",{id:"json-output-2"},"JSON output"),(0,i.kt)(o.default,{mdxType:"CancerGeneCensusJSON"}))}N.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/08a0df5e.6ab6932a.js b/assets/js/08a0df5e.6ab6932a.js deleted file mode 100644 index e94e5973..00000000 --- a/assets/js/08a0df5e.6ab6932a.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[384],{3905:(t,e,n)=>{n.d(e,{Zo:()=>d,kt:()=>k});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function i(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function o(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var p=r.createContext({}),c=function(t){var e=r.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},d=function(t){var e=c(t.components);return r.createElement(p.Provider,{value:e},t.children)},m="mdxType",u={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},s=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,i=t.originalType,p=t.parentName,d=l(t,["components","mdxType","originalType","parentName"]),m=c(n),s=a,k=m["".concat(p,".").concat(s)]||m[s]||u[s]||i;return n?r.createElement(k,o(o({ref:e},d),{},{components:n})):r.createElement(k,o({ref:e},d))}));function k(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var i=n.length,o=new Array(i);o[0]=s;var l={};for(var p in e)hasOwnProperty.call(e,p)&&(l[p]=e[p]);l.originalType=t,l[m]="string"==typeof t?t:a,o[1]=l;for(var c=2;c{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>m,frontMatter:()=>i,metadata:()=>l,toc:()=>p});var r=n(87462),a=(n(67294),n(3905));const i={title:"Dependencies"},o=void 0,l={unversionedId:"introduction/dependencies",id:"version-3.2.5/introduction/dependencies",title:"Dependencies",description:"All of the following dependencies have been included in this repository.",source:"@site/versioned_docs/version-3.2.5/introduction/dependencies.md",sourceDirName:"introduction",slug:"/introduction/dependencies",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/introduction/dependencies",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/introduction/dependencies.md",tags:[],version:"3.2.5",frontMatter:{title:"Dependencies"},sidebar:"version-3.2.5/docs",previous:{title:"Introduction",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/"},next:{title:"Getting Started",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/introduction/getting-started"}},p=[],c={toc:p},d="wrapper";function m(t){let{components:e,...n}=t;return(0,a.kt)(d,(0,r.Z)({},c,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("p",null,"All of the following dependencies have been included in this repository."),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Name"),(0,a.kt)("th",{parentName:"tr",align:"center"},"License"),(0,a.kt)("th",{parentName:"tr",align:null},"Usage"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/aws/aws-sdk-net/"},"AWSSDK")),(0,a.kt)("td",{parentName:"tr",align:"center"},"Apache"),(0,a.kt)("td",{parentName:"tr",align:null},"AWS Lambda, S3, SNS support")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://www.newtonsoft.com/json"},"Json.NET")),(0,a.kt)("td",{parentName:"tr",align:"center"},"MIT"),(0,a.kt)("td",{parentName:"tr",align:null},"JASIX utility")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/ebiggers/libdeflate"},"libdeflate")),(0,a.kt)("td",{parentName:"tr",align:"center"},"MIT"),(0,a.kt)("td",{parentName:"tr",align:null},"BlockCompression library")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/moq/moq4"},"Moq")),(0,a.kt)("td",{parentName:"tr",align:"center"},"BSD"),(0,a.kt)("td",{parentName:"tr",align:null},"Mocking framework for unit tests")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"http://www.ndesk.org/Options"},"NDesk.Options")),(0,a.kt)("td",{parentName:"tr",align:"center"},"MIT/X11"),(0,a.kt)("td",{parentName:"tr",align:null},"CommandLine library")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/xunit/xunit"},"xUnit")),(0,a.kt)("td",{parentName:"tr",align:"center"},"Apache"),(0,a.kt)("td",{parentName:"tr",align:null},"Unit testing framework")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/Dead2/zlib-ng"},"zlib-ng")),(0,a.kt)("td",{parentName:"tr",align:"center"},"zlib"),(0,a.kt)("td",{parentName:"tr",align:null},"BlockCompression library")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/facebook/zstd"},"zstd")),(0,a.kt)("td",{parentName:"tr",align:"center"},"BSD"),(0,a.kt)("td",{parentName:"tr",align:null},"BlockCompression library")))))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/09b8557a.e0e19723.js b/assets/js/09b8557a.e0e19723.js deleted file mode 100644 index 4361292b..00000000 --- a/assets/js/09b8557a.e0e19723.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9187],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>u});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},p=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},m="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},h=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),m=d(n),h=i,u=m["".concat(s,".").concat(h)]||m[h]||c[h]||r;return n?a.createElement(u,o(o({ref:t},p),{},{components:n})):a.createElement(u,o({ref:t},p))}));function u(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,o=new Array(r);o[0]=h;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[m]="string"==typeof e?e:i,o[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>m,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={title:"Mitochondrial Heteroplasmy"},o=void 0,l={unversionedId:"data-sources/mito-heteroplasmy",id:"version-3.18/data-sources/mito-heteroplasmy",title:"Mitochondrial Heteroplasmy",description:"Overview",source:"@site/versioned_docs/version-3.18/data-sources/mito-heteroplasmy.md",sourceDirName:"data-sources",slug:"/data-sources/mito-heteroplasmy",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/mito-heteroplasmy",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/mito-heteroplasmy.md",tags:[],version:"3.18",frontMatter:{title:"Mitochondrial Heteroplasmy"},sidebar:"docs",previous:{title:"gnomAD",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gnomad"},next:{title:"MITOMAP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/mitomap"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"JSON File",id:"json-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Binning VRF Data",id:"binning-vrf-data",children:[],level:4},{value:"Pre-processing the Data",id:"pre-processing-the-data",children:[],level:4},{value:"Algorithm",id:"algorithm",children:[],level:4}],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],d={toc:s},p="wrapper";function m(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"Mitochondrial Heteroplasmy is an aggregate population data set that characterizes the amount of heteroplasmy observed for each variant. The latest version of this data set is based on re-processed 1000 Genomes Project data using the Illumina DRAGEN pipeline."),(0,i.kt)("h2",{id:"json-file"},"JSON File"),(0,i.kt)("h3",{id:"example"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{\n "T:C":{\n "ad":[\n 1,\n 1,\n 1,\n 1,\n 1,\n 1\n ],\n "allele_type":"alt",\n "vrf":[\n 0.002369668246445498,\n 0.0024937655860349127,\n 0.0016129032258064516,\n 0.0025188916876574307,\n 0.0022935779816513763,\n 0.002008032128514056\n ],\n "vrf_stats":{\n "kurtosis":38.889891511122556,\n "max":0.0025188916876574307,\n "mean":5.4052190471990743e-05,\n "min":0.0,\n "nobs":246,\n "skewness":6.346664692283075,\n "stdev":0.0003461416264750575,\n "variance":1.1981402557879823e-07\n }\n }\n}\n\n')),(0,i.kt)("h3",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"From the JSON file, we're mainly interested in the following keys:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"variant")," (i.e. ",(0,i.kt)("inlineCode",{parentName:"li"},"T:C"),")"),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"ad")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"vrf")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"nobs")," (number of observations)")),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Adjusting for null observations")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The ",(0,i.kt)("inlineCode",{parentName:"p"},"nobs")," value indicates how many observations were made. Ideally this would have been represented in the ",(0,i.kt)("inlineCode",{parentName:"p"},"ad")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"vrf")," arrays, but it's left as an exercise for the reader."))),(0,i.kt)("h4",{id:"binning-vrf-data"},"Binning VRF Data"),(0,i.kt)("p",null,"The ",(0,i.kt)("inlineCode",{parentName:"p"},"vrf")," (variant read frequency) array in the JSON object above is paired with with the ",(0,i.kt)("inlineCode",{parentName:"p"},"ad")," array (allele depths) shown above."),(0,i.kt)("p",null,"The data in the JSON object has a crazy number of significant digits. This means that as the number of samples increase, this array will grow. To make this more future-proof, Nirvana bins everything according to 0.1% increments."),(0,i.kt)("p",null,"With the binned data, we end up having 775 distinct ",(0,i.kt)("inlineCode",{parentName:"p"},"vrf")," values in the entire JSON file. This also means that the variant with the largest number of VRFs would originally have 246 entries, but due to binning this will decrease to 143."),(0,i.kt)("h4",{id:"pre-processing-the-data"},"Pre-processing the Data"),(0,i.kt)("p",null,"The JSON file is converted into a small TSV file that is ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/Illumina/Nirvana/blob/main/MitoHeteroplasmy/Resources/MitoHeteroplasmy.tsv.gz"},"embedded in Nirvana"),". Here is an example of the TSV file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS REF ALT VRF_BINS VRF_COUNTS\nchrM 1 G . 0.981,0.987,0.988,0.989,0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.998,0.999 1,2,2,4,7,8,11,19,43,60,48,64,499,1736\nchrM 2 A . 0.981,0.987,0.988,0.989,0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.998,0.999 1,2,2,4,7,8,11,19,43,60,48,64,499,1736\n")),(0,i.kt)("h4",{id:"algorithm"},"Algorithm"),(0,i.kt)("p",null,"Nirvana will calculate mitochondrial heteroplasmy data for every sample in the VCF. Using the computed VRF for each sample, we compute where in the empirical mitochondrial heteroplasmy distribution that VRF occurs and express that as a percentile."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Percentiles")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Nirvana uses the ",(0,i.kt)("a",{parentName:"p",href:"https://en.wikipedia.org/wiki/Percentile"},"statistical definition of percentile")," (indicating the value below which a given percentage of observations in a group of observations falls). Unless the sample's VRF is higher than all the VRFs represented in the distribution, the range will be [0, 1)."))),(0,i.kt)("h2",{id:"download-url"},"Download URL"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Unavailable")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The original data set is only available internally at Illumina at the moment."))),(0,i.kt)("h2",{id:"json-output"},"JSON Output"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{14-17}","{14-17}":!0},'"samples":[\n {\n "genotype":"0/1",\n "variantFrequencies":[\n 0.333,\n 0.5\n ],\n ],\n "alleleDepths":[\n 10,\n 20,\n 30\n ],\n "heteroplasmyPercentile":[\n 23.13,\n 12.65\n ]\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"heteroplasmyPercentile"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"one percentile for each variant frequency (each alternate allele)")))))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/0ba7dc8d.f09e03c1.js b/assets/js/0ba7dc8d.f09e03c1.js deleted file mode 100644 index bbc72fc0..00000000 --- a/assets/js/0ba7dc8d.f09e03c1.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5471,5606,7454,9351],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>g});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function r(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):r(r({},t),e)),n},p=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},c="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,l=e.originalType,s=e.parentName,p=o(e,["components","mdxType","originalType","parentName"]),c=d(n),m=i,g=c["".concat(s,".").concat(m)]||c[m]||u[m]||l;return n?a.createElement(g,r(r({ref:t},p),{},{components:n})):a.createElement(g,r({ref:t},p))}));function g(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var l=n.length,r=new Array(l);r[0]=m;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[c]="string"==typeof e?e:i,r[1]=o;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>r,default:()=>c,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const l={},r=void 0,o={unversionedId:"data-sources/clingen-dosage-json",id:"version-3.18/data-sources/clingen-dosage-json",title:"clingen-dosage-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/clingen-dosage-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-dosage-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/clingen-dosage-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/clingen-dosage-json.md",tags:[],version:"3.18",frontMatter:{}},s=[],d={toc:s},p="wrapper";function c(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clingenDosageSensitivityMap": [{\n "chromosome": "15",\n "begin": 30900686,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 0.33994\n},\n{\n "chromosome": "15",\n "begin": 31727418,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "dosage sensitivity unlikely",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 1\n}]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Field"),(0,i.kt)("th",{parentName:"tr",align:null},"Type"),(0,i.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clingenDosageSensitivityMap"),(0,i.kt)("td",{parentName:"tr",align:null},"object array"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"begin"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"end"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"haploinsufficiency"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"triplosensitivity"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"(same as haploinsufficiency)\xa0")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"haploinsufficiency and triplosensitivity")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no evidence to suggest that dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"little evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"emerging evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"gene associated with autosomal recessive phenotype"),(0,i.kt)("li",{parentName:"ul"},"dosage sensitivity unlikely")))}c.isMDXComponent=!0},95733:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>r,default:()=>c,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const l={},r=void 0,o={unversionedId:"data-sources/clingen-gene-validity-json",id:"version-3.18/data-sources/clingen-gene-validity-json",title:"clingen-gene-validity-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/clingen-gene-validity-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-gene-validity-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/clingen-gene-validity-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/clingen-gene-validity-json.md",tags:[],version:"3.18",frontMatter:{}},s=[],d={toc:s},p="wrapper";function c(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clingenGeneValidity":[\n {\n "diseaseId":"MONDO_0007893",\n "disease":"Noonan syndrome with multiple lentigines",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n },\n {\n "diseaseId":"MONDO_0015280",\n "disease":"cardiofaciocutaneous syndrome",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Field"),(0,i.kt)("th",{parentName:"tr",align:null},"Type"),(0,i.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clingenGeneValidity"),(0,i.kt)("td",{parentName:"tr",align:null},"object"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"diseaseId"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Monarch Disease Ontology ID (MONDO)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"disease"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"disease label")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"classification"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"see below for possible values")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"classificationDate"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"yyyy-MM-dd")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"classification")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no reported evidence"),(0,i.kt)("li",{parentName:"ul"},"disputed"),(0,i.kt)("li",{parentName:"ul"},"limited"),(0,i.kt)("li",{parentName:"ul"},"moderate"),(0,i.kt)("li",{parentName:"ul"},"definitive"),(0,i.kt)("li",{parentName:"ul"},"strong"),(0,i.kt)("li",{parentName:"ul"},"refuted"),(0,i.kt)("li",{parentName:"ul"},"no known disease relationship")))}c.isMDXComponent=!0},96177:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>r,default:()=>c,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const l={},r=void 0,o={unversionedId:"data-sources/clingen-json",id:"version-3.18/data-sources/clingen-json",title:"clingen-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/clingen-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/clingen-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/clingen-json.md",tags:[],version:"3.18",frontMatter:{}},s=[],d={toc:s},p="wrapper";function c(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clingen":[\n {\n "chromosome":"17",\n "begin":525,\n "end":14667519,\n "variantType":"copy_number_gain",\n "id":"nsv996083",\n "clinicalInterpretation":"pathogenic",\n "observedGains":1,\n "validated":true,\n "phenotypes":[\n "Intrauterine growth retardation"\n ],\n "phenotypeIds":[\n "HP:0001511",\n "MedGen:C1853481"\n ],\n "reciprocalOverlap":0.00131\n },\n {\n "chromosome":"17",\n "begin":45835,\n "end":7600330,\n "variantType":"copy_number_loss",\n "id":"nsv869419",\n "clinicalInterpretation":"pathogenic",\n "observedLosses":1,\n "validated":true,\n "phenotypes":[\n "Developmental delay AND/OR other significant developmental or morphological phenotypes"\n ],\n "reciprocalOverlap":0.00254\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Field"),(0,i.kt)("th",{parentName:"tr",align:null},"Type"),(0,i.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clingen"),(0,i.kt)("td",{parentName:"tr",align:null},"object array"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"begin"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"end"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"variantType"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Any of the\xa0sequence alterations defined here.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"id"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Identifier from the data source. Alternatively a VID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clinicalInterpretation"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"observedGains"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,i.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"observedLosses"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,i.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"validated"),(0,i.kt)("td",{parentName:"tr",align:null},"boolean"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"phenotypes"),(0,i.kt)("td",{parentName:"tr",align:null},"string array"),(0,i.kt)("td",{parentName:"tr",align:null},"Description of the phenotype.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"phenotypeIds"),(0,i.kt)("td",{parentName:"tr",align:null},"string array"),(0,i.kt)("td",{parentName:"tr",align:null},"Description of the phenotype IDs.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"clinicalInterpretation")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"benign"),(0,i.kt)("li",{parentName:"ul"},"curated benign"),(0,i.kt)("li",{parentName:"ul"},"curated pathogenic"),(0,i.kt)("li",{parentName:"ul"},"likely benign"),(0,i.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"path gain"),(0,i.kt)("li",{parentName:"ul"},"path loss"),(0,i.kt)("li",{parentName:"ul"},"pathogenic"),(0,i.kt)("li",{parentName:"ul"},"uncertain")))}c.isMDXComponent=!0},18666:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>d,default:()=>g,frontMatter:()=>s,metadata:()=>p,toc:()=>c});var a=n(87462),i=(n(67294),n(3905)),l=n(96177),r=n(67769),o=n(95733);const s={title:"ClinGen"},d=void 0,p={unversionedId:"data-sources/clingen",id:"version-3.18/data-sources/clingen",title:"ClinGen",description:"Overview",source:"@site/versioned_docs/version-3.18/data-sources/clingen.mdx",sourceDirName:"data-sources",slug:"/data-sources/clingen",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/clingen",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/clingen.mdx",tags:[],version:"3.18",frontMatter:{title:"ClinGen"},sidebar:"docs",previous:{title:"Amino Acid Conservation",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/amino-acid-conservation"},next:{title:"ClinVar",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/clinvar"}},c=[{value:"Overview",id:"overview",children:[],level:2},{value:"ISCA Regions",id:"isca-regions",children:[{value:"TSV Extraction",id:"tsv-extraction",children:[{value:"Status levels",id:"status-levels",children:[],level:4},{value:"Parsing",id:"parsing",children:[],level:4}],level:3}],level:2},{value:"Conflict Resolution",id:"conflict-resolution",children:[{value:"Clinical significance priority",id:"clinical-significance-priority",children:[],level:3},{value:"Validation Priority",id:"validation-priority",children:[],level:3},{value:"Download URL",id:"download-url",children:[],level:3},{value:"JSON Output",id:"json-output",children:[],level:3}],level:2},{value:"Dosage Sensitivity Map",id:"dosage-sensitivity-map",children:[{value:"TSV Source files",id:"tsv-source-files",children:[],level:3},{value:"Dosage Rating System",id:"dosage-rating-system",children:[],level:3},{value:"Download URL",id:"download-url-1",children:[],level:3},{value:"JSON Output",id:"json-output-1",children:[],level:3},{value:"Building the supplementary files",id:"building-the-supplementary-files",children:[],level:3}],level:2},{value:"Gene-Disease Validity",id:"gene-disease-validity",children:[{value:"Source TSV",id:"source-tsv",children:[],level:3},{value:"Download URL",id:"download-url-2",children:[],level:3},{value:"Conflict Resolution",id:"conflict-resolution-1",children:[{value:"Multiple Classifications",id:"multiple-classifications",children:[],level:4},{value:"Multiple Dates",id:"multiple-dates",children:[],level:4}],level:3},{value:"JSON Output",id:"json-output-2",children:[],level:3},{value:"Building the supplementary files",id:"building-the-supplementary-files-1",children:[],level:3}],level:2}],u={toc:c},m="wrapper";function g(e){let{components:t,...n}=e;return(0,i.kt)(m,(0,a.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"ClinGen is a National Institutes of Health (NIH)-funded resource dedicated to building a central resource that defines the clinical relevance of genes and variants for use in precision medicine and research."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Heidi L. Rehm, Ph.D., Jonathan S. Berg, M.D., Ph.D., Lisa D. Brooks, Ph.D., Carlos D. Bustamante, Ph.D., James P. Evans, M.D., Ph.D., Melissa J. Landrum, Ph.D., David H. Ledbetter, Ph.D., Donna R. Maglott, Ph.D., Christa Lese Martin, Ph.D., Robert L. Nussbaum, M.D., Sharon E. Plon, M.D., Ph.D., Erin M. Ramos, Ph.D., Stephen T. Sherry, Ph.D., and Michael S. Watson, Ph.D., for ClinGen. ",(0,i.kt)("strong",{parentName:"p"},"ClinGen The Clinical Genome Resource.")," ",(0,i.kt)("em",{parentName:"p"},"N Engl J Med 2015; 372:2235-2242 June 4, 2015 DOI: 10.1056/NEJMsr1406261.")))),(0,i.kt)("h2",{id:"isca-regions"},"ISCA Regions"),(0,i.kt)("h3",{id:"tsv-extraction"},"TSV Extraction"),(0,i.kt)("p",null,"ClinGen contains only copy number variation variants, since the coordinates in ClinGen original file follow the same rule as BED format, the coordinates had to be adjusted to ","[BEGIN+1, END]","."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#bin chrom chromStart chromEnd name score strand thickStart thickEnd attrCount attrTags attrVals\nnsv530705 1 564405 8597804 0 1 copy_number_loss pathogenic False Developmental delay AND/OR other significant developmental or morphological phenotypes\nnsv530706 1 564424 3262790 0 1 copy_number_loss pathogenic False Abnormal facial shape,Abnormality of cardiac morphology,Global developmental delay,Muscular hypotonia HP:0001252,HP:0001263,HP:0001627,HP:0001999,MedGen:CN001147,MedGen:CN001157,MedGen:CN001482,MedGen:CN001810\nnsv530707 1 564424 7068738 0 1 copy_number_loss pathogenic False Abnormality of cardiac morphology,Cleft upper lip,Failure to thrive,Global developmental delay,Intrauterine growth retardation,Microcephaly,Short stature HP:0000204,HP:0000252,HP:0001263,HP:0001508,HP:0001511,HP:0001627,HP:0004322,MedGen:C0349588,MedGen:C1845868,MedGen:C1853481,MedGen:C2364119,MedGen:CN000197,MedGen:CN001157,MedGen:CN001482\nnsv533512 1 564435 649748 0 1 copy_number_loss benign False Developmental delay AND/OR other significant developmental or morphological phenotypes\nnsv931338 1 714078 4958499 0 1 copy_number_loss pathogenic False Developmental delay AND/OR other significant developmental or morphological phenotypes\nnsv530300 1 728138 5066371 1 0 copy_number_gain pathogenic False Abnormality of cardiac morphology,Cleft palate,Global developmental delay HP:0000175,HP:0001263,HP:0001627,MedGen:C2240378,MedGen:CN001157,MedGen:CN001482\n")),(0,i.kt)("h4",{id:"status-levels"},"Status levels"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"benign"),(0,i.kt)("li",{parentName:"ul"},"curated benign"),(0,i.kt)("li",{parentName:"ul"},"curated pathogenic"),(0,i.kt)("li",{parentName:"ul"},"likely benign"),(0,i.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"path gain"),(0,i.kt)("li",{parentName:"ul"},"path loss"),(0,i.kt)("li",{parentName:"ul"},"pathogenic"),(0,i.kt)("li",{parentName:"ul"},"uncertain")),(0,i.kt)("h4",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"We parse the ClinGen tsv file and extract the following:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"chrom"),(0,i.kt)("li",{parentName:"ul"},"chromStart (note this a 0-based coordinate)"),(0,i.kt)("li",{parentName:"ul"},"chromEnd"),(0,i.kt)("li",{parentName:"ul"},"attrTags"),(0,i.kt)("li",{parentName:"ul"},"attrVals")),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"attrTags")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"attrVals")," are comma separated lists. ",(0,i.kt)("inlineCode",{parentName:"p"},"attrTags")," contains the field keys and ",(0,i.kt)("inlineCode",{parentName:"p"},"attrVals")," contains the field values. We will parse the following keys from the two fields:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"parent (this will be used as the ID in our JSON output)"),(0,i.kt)("li",{parentName:"ul"},"clinical_int"),(0,i.kt)("li",{parentName:"ul"},"validated"),(0,i.kt)("li",{parentName:"ul"},"phenotype (this should be a string array)"),(0,i.kt)("li",{parentName:"ul"},"phenotype_id (this should be a string array)")),(0,i.kt)("p",null,"Observed losses and observed gains will be calculated from entries that share a common parent ID."),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"variants with a common parent ID and same coordinates are grouped",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"calculated observed losses, observed gains for each group"),(0,i.kt)("li",{parentName:"ul"},"Clinical significance and validation status are collapsed using the priority strategy described below"))),(0,i.kt)("li",{parentName:"ul"},"Variants with the same parent ID can have different coordinates (mapped to hg38)",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"nsv491508 : chr14:105583663-106881350 and chr14:105605043-106766076 (only one example)"),(0,i.kt)("li",{parentName:"ul"},"we kept both variants")))),(0,i.kt)("h2",{id:"conflict-resolution"},"Conflict Resolution"),(0,i.kt)("h3",{id:"clinical-significance-priority"},"Clinical significance priority"),(0,i.kt)("p",null,"When there are a mixture of variants belonging to the same parent ID, we will choose the most pathogenic clinical significance from the available values. i.e. if 3 samples were deemed pathogenic and 2 samples were likely pathogenic, we would list the variant as pathogenic."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Priority")," (high to low)"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"Priority"),(0,i.kt)("li",{parentName:"ul"},"Pathogenic"),(0,i.kt)("li",{parentName:"ul"},"Likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"Benign"),(0,i.kt)("li",{parentName:"ul"},"Likely benign"),(0,i.kt)("li",{parentName:"ul"},"Uncertain significance")),(0,i.kt)("h3",{id:"validation-priority"},"Validation Priority"),(0,i.kt)("p",null,"When there are a mixture of variants belonging to same parent ID, we will set the validation status to true if any of the variants were validated."),(0,i.kt)("h3",{id:"download-url"},"Download URL"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"https://cirm.ucsc.edu/cgi-bin/hgTrackUi?db=hg19&g=iscaComposite"},"https://cirm.ucsc.edu/cgi-bin/hgTrackUi?db=hg19&g=iscaComposite")),(0,i.kt)("h3",{id:"json-output"},"JSON Output"),(0,i.kt)(l.default,{mdxType:"CLINGENJSON"}),(0,i.kt)("h2",{id:"dosage-sensitivity-map"},"Dosage Sensitivity Map"),(0,i.kt)("p",null,"The Clinical Genome Resource (ClinGen) consortium is curating genes and regions of the genome to assess whether there is evidence to support that these genes/regions are dosage sensitive and should be targeted on a cytogenomic array. Nirvana reports these annotations for overlapping SVs."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Riggs ER, Nelson T, Merz A, Ackley T, Bunke B, Collins CD, Collinson MN, Fan YS, Goodenberger ML, Golden DM, Haglund-Hazy L, Krgovic D, Lamb AN, Lewis Z, Li G, Liu Y, Meck J, Neufeld-Kaiser W, Runke CK, Sanmann JN, Stavropoulos DJ, Strong E, Su M, Tayeh MK, Kokalj Vokac N, Thorland EC, Andersen E, Martin CL. ",(0,i.kt)("strong",{parentName:"p"},"Copy number variant discrepancy resolution using the ClinGen dosage sensitivity map results in updated clinical interpretations in ClinVar.")," ",(0,i.kt)("em",{parentName:"p"},"Hum Mutat. 2018 Nov;39(11):1650-1659. doi: 10.1002/humu.23610. PMID: 30095202; PMCID: PMC7374944.")))),(0,i.kt)("h3",{id:"tsv-source-files"},"TSV Source files"),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Regions")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#ClinGen Region Curation Results\n#07 May,2019\n#Genomic Locations are reported on GRCh38 (hg38): GCF_000001405.36\n#https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen\n#to create link: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/clingen_region.cgi?id=key\n#ISCA ID ISCA Region Name cytoBand Genomic Location Haploinsufficiency Score Haploinsufficiency Description Haploinsufficiency PMID1 Haploinsufficiency PMID2 Haploinsufficiency PMID3 Triplosensitivity Score Triplosensitivity Description Triplosensitivity PMID1 Triplosensitivity PMID2 Triplosensitivity PMID3 Date Last Evaluated Loss phenotype OMIM ID Triplosensitive phenotype OMIM ID\nISCA-46299 Xp11.22 region (includes HUWE1) Xp11.22 tbd 0 No evidence available 3 Sufficient evidence for dosage pathogenicity 22840365 20655035 26692240 2018-11-19\nISCA-46295 15q13.3 recurrent region (D-CHRNA7 to BP5) (includes CHRNA7 and OTUD7A) 15q13.3 chr15:31727418-32153204 3 Sufficient evidence for dosage pathogenicity 19898479 20236110 22775350 40 Dosage sensitivity unlikely 26968334 22420048 2018-05-10\nISCA-46291 7q11.23 recurrent distal region (includes HIP1, YWHAG) 7q11.23 chr7:75528718-76433859 2 Some evidence for dosage pathogenicity 21109226 16971481 1 Little evidence for dosage pathogenicity 21109226 27867344 2018-12-31\nISCA-46290 Xp11.22p11.23 recurrent region (includes SHROOM4) Xp11.22-p11.23 chrX: 48447780-52444264 0 No evidence available 3 Sufficient evidence for dosage pathogenicity 19716111 21418194 25425167 2017-12-14 300801\n")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Genes")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#ClinGen Gene Curation Results\n#24 May,2019\n#Genomic Locations are reported on GRCh37 (hg19): GCF_000001405.13\n#https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen\n#to create link: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/clingen_gene.cgi?sym=Gene Symbol\n#Gene Symbol Gene ID cytoBand Genomic Location Haploinsufficiency Score Haploinsufficiency Description Haploinsufficiency PMID1 Haploinsufficiency PMID2 Haploinsufficiency PMID3 Triplosensitivity Score Triplosensitivity Description Triplosensitivity PMID1 Triplosensitivity PMID2 Triplosensitivity PMID3 Date Last Evaluated Loss phenotype OMIM ID Triplosensitive phenotype OMIM ID\nA4GALT 53947 22q13.2 chr22:43088121-43117307 30 Gene associated with autosomal recessive phenotype 0 No evidence available 2014-12-11 111400\nAAGAB 79719 15q23 chr15:67493013-67547536 3 Sufficient evidence for dosage pathogenicity 23064416 23000146 0 No evidence available 2013-02-28 148600\n")),(0,i.kt)("h3",{id:"dosage-rating-system"},"Dosage Rating System"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Rating"),(0,i.kt)("th",{parentName:"tr",align:null},"Possible Clinical Interpretation"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"0"),(0,i.kt)("td",{parentName:"tr",align:null},"No evidence to suggest that dosage sensitivity is associated with clinical phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"1"),(0,i.kt)("td",{parentName:"tr",align:null},"Little evidence suggesting dosage sensitivity is associated with clinical phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"2"),(0,i.kt)("td",{parentName:"tr",align:null},"Emerging evidence suggesting dosage sensitivity is associated with clinical phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"3"),(0,i.kt)("td",{parentName:"tr",align:null},"Sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"30"),(0,i.kt)("td",{parentName:"tr",align:null},"Gene associated with autosomal recessive phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"40"),(0,i.kt)("td",{parentName:"tr",align:null},"Dosage sensitivity unlikely")))),(0,i.kt)("p",null,"Reference: ",(0,i.kt)("a",{parentName:"p",href:"https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/help.shtml"},"https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/help.shtml")),(0,i.kt)("h3",{id:"download-url-1"},"Download URL"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"ftp://ftp.clinicalgenome.org/"},"ftp://ftp.clinicalgenome.org/")),(0,i.kt)("h3",{id:"json-output-1"},"JSON Output"),(0,i.kt)(r.default,{mdxType:"ClinGenDosageJson"}),(0,i.kt)("h3",{id:"building-the-supplementary-files"},"Building the supplementary files"),(0,i.kt)("p",null,"The gene dosage sensitivity ",(0,i.kt)("inlineCode",{parentName:"p"},".nga")," for Nirvana can be built using the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's ",(0,i.kt)("inlineCode",{parentName:"p"},"DosageSensitivity")," subcommand. The required data file is ",(0,i.kt)("inlineCode",{parentName:"p"},"ClinGen_gene_curation_list_{ASSEMBLY}.tsv")," (url provided above) and its associated ",(0,i.kt)("inlineCode",{parentName:"p"},".version")," file."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"NAME=ClinGen Dosage Sensitivity Map\nVERSION=20211201\nDATE=2021-12-01\nDESCRIPTION=Dosage sensitivity map from ClinGen (dbVar)\n")),(0,i.kt)("p",null,"Here is a sample run:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Nirvana/bin/Debug/netcoreapp3.1/SAUtils.dll DosageSensitivity --out SupplementaryDatabase/64/GRCh37 --tsv ClinGen_gene_curation_list_GRCh37.tsv\n---------------------------------------------------------------------------\nSAUtils (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.17.0\n---------------------------------------------------------------------------\n\n\nTime: 00:00:00.1\n")),(0,i.kt)("p",null,"For building the ",(0,i.kt)("inlineCode",{parentName:"p"},".nsi")," files, we use the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's ",(0,i.kt)("inlineCode",{parentName:"p"},"DosageMapRegions")," subcommand. The required data file is ",(0,i.kt)("inlineCode",{parentName:"p"},"ClinGen_region_curation_list_{ASSEMBLY}.tsv")," (url provided above) and its associated ",(0,i.kt)("inlineCode",{parentName:"p"},".version")," file."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"NAME=ClinGen Dosage Sensitivity Map\nVERSION=20211201\nDATE=2021-12-01\nDESCRIPTION=Dosage sensitivity map from ClinGen (dbVar)\n")),(0,i.kt)("p",null,"Here is a sample run:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Nirvana/bin/Debug/netcoreapp3.1/SAUtils.dll DosageMapRegions --out SupplementaryDatabase/64/GRCh37 --ref References/7/Homo_sapiens.GRCh37.Nirvana.dat --tsv ClinGen_region_curation_list_GRCh37.tsv\n---------------------------------------------------------------------------\nSAUtils (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.17.0\n---------------------------------------------------------------------------\n\nWriting 505 intervals to database...\n\nTime: 00:00:00.1\n")),(0,i.kt)("h2",{id:"gene-disease-validity"},"Gene-Disease Validity"),(0,i.kt)("p",null,"The ClinGen Gene-Disease Clinical Validity curation process involves evaluating the strength of evidence supporting or refuting a claim that variation in a particular gene causes a particular disease. Nirvana reports these annotations for genes in the genes section of the JSON."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Strande NT, Riggs ER, Buchanan AH, et al. ",(0,i.kt)("strong",{parentName:"p"},"Evaluating the Clinical Validity of Gene-Disease Associations: An Evidence-Based Framework Developed by the Clinical Genome Resource.")," ",(0,i.kt)("em",{parentName:"p"},"Am J Hum Genet. 2017;100(6):895-906. doi:10.1016/j.ajhg.2017.04.015")))),(0,i.kt)("h3",{id:"source-tsv"},"Source TSV"),(0,i.kt)("p",null,"The source data comes in a CSV file that we convert to a TSV."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"CLINGEN GENE VALIDITY CURATIONS\nFILE CREATED: 2019-05-28\nWEBPAGE: https://search.clinicalgenome.org/kb/gene-validity\n+++++++++++,++++++++++++++,+++++++++++++,++++++++++++++++++,+++++++++,++++++++++++++,+++++++++++++,+++++++++++++++++++\nGENE SYMBOL,GENE ID (HGNC),DISEASE LABEL,DISEASE ID (MONDO),SOP,CLASSIFICATION,ONLINE REPORT,CLASSIFICATION DATE\n+++++++++++,++++++++++++++,+++++++++++++,++++++++++++++++++,+++++++++,++++++++++++++,+++++++++++++,+++++++++++++++++++\nA2ML1,HGNC:23336,Noonan syndrome with multiple lentigines,MONDO_0007893,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/59b87033-dd91-4f1e-aec1-c9b1f5124b16--2018-06-07T14:37:47,2018-06-07T14:37:47.175Z\nA2ML1,HGNC:23336,cardiofaciocutaneous syndrome,MONDO_0015280,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/fc3c41d8-8497-489b-a350-c9e30016bc6a--2018-06-07T14:31:03,2018-06-07T14:31:03.696Z\nA2ML1,HGNC:23336,Costello syndrome,MONDO_0009026,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/ea72ba8d-cf62-44bc-86be-da64e3848eba--2018-06-07T14:34:05,2018-06-07T14:34:05.324Z\n")),(0,i.kt)("h3",{id:"download-url-2"},"Download URL"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"https://search.clinicalgenome.org/kb/downloads#section_gene-disease-validity"},"https://search.clinicalgenome.org/kb/downloads#section_gene-disease-validity")),(0,i.kt)("h3",{id:"conflict-resolution-1"},"Conflict Resolution"),(0,i.kt)("h4",{id:"multiple-classifications"},"Multiple Classifications"),(0,i.kt)("p",null,"Here is an example of multiple classifications."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"$ grep MONDO_0010192 ClinGen-Gene-Disease-Summary-2019-12-02.csv | grep EDNRB\nEDNRB,HGNC:3180,Waardenburg syndrome type 4A,MONDO_0010192,SOP6,Moderate,https://search.clinicalgenome.org/kb/gene-validity/d7abbd45-7915-437b-849b-dea876bfc2f5--2018-05-08T04:00:00,2018-05-08T04:00:00.000Z\nEDNRB,HGNC:3180,Waardenburg syndrome type 4A,MONDO_0010192,SOP6,Limited,https://search.clinicalgenome.org/kb/gene-validity/73ee9727-60c1-40fd-830f-08c2b513d2ee--2018-05-08T04:00:00,2018-05-08T04:00:00.000Z\n")),(0,i.kt)("p",null,"In such cases, we select the more severe classification."),(0,i.kt)("h4",{id:"multiple-dates"},"Multiple Dates"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"$ grep MONDO_0016419 ClinGen-Gene-Disease-Summary-2019-12-02.csv | grep MUTYH\nMUTYH,HGNC:7527,hereditary breast carcinoma,MONDO_0016419,SOP4,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/9904,2017-05-24T00:00:00\nMUTYH,HGNC:7527,hereditary breast carcinoma,MONDO_0016419,SOP4,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/9902,2017-05-25T00:00:00\n")),(0,i.kt)("p",null,"If the classifications are the same, we should select the latest classification date."),(0,i.kt)("h3",{id:"json-output-2"},"JSON Output"),(0,i.kt)(o.default,{mdxType:"ClinGenGeneValidity"}),(0,i.kt)("h3",{id:"building-the-supplementary-files-1"},"Building the supplementary files"),(0,i.kt)("p",null,"The gene disease validity ",(0,i.kt)("inlineCode",{parentName:"p"},".nga")," for Nirvana can be built using the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's ",(0,i.kt)("inlineCode",{parentName:"p"},"DiseaseValidity")," subcommand. The only required data file is ",(0,i.kt)("inlineCode",{parentName:"p"},"Clingen-Gene-Disease-Summary-2021-12-01.tsv")," (url provided above) and its associated ",(0,i.kt)("inlineCode",{parentName:"p"},".version")," file."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"NAME=ClinGen disease validity curations\nVERSION=20211201\nDATE=2021-12-01\nDESCRIPTION=Disease validity curations from ClinGen (dbVar)\n")),(0,i.kt)("p",null,"Here is a sample run:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Nirvana/bin/Debug/netcoreapp3.1/SAUtils.dll DiseaseValidity --tsv Clingen-Gene-Disease-Summary-2021-12-01.tsv \\\\\n--uga Cache/27/UGA.tsv.gz --out SupplementaryDatabase/64/GRCh37\n---------------------------------------------------------------------------\nSAUtils (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.17.0\n---------------------------------------------------------------------------\n\nNumber of geneIds missing from the cache:0 (0%)\n\nTime: 00:00:00.2\n")))}g.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/0bd2af6a.997f90ad.js b/assets/js/0bd2af6a.997f90ad.js deleted file mode 100644 index f4bf9cae..00000000 --- a/assets/js/0bd2af6a.997f90ad.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5160],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>f});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var p=a.createContext({}),s=function(t){var e=a.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},m=function(t){var e=s(t.components);return a.createElement(p.Provider,{value:e},t.children)},c="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},u=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),c=s(n),u=r,f=c["".concat(p,".").concat(u)]||c[u]||d[u]||l;return n?a.createElement(f,o(o({ref:e},m),{},{components:n})):a.createElement(f,o({ref:e},m))}));function f(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=u;var i={};for(var p in e)hasOwnProperty.call(e,p)&&(i[p]=e[p]);i.originalType=t,i[c]="string"==typeof t?t:r,o[1]=i;for(var s=2;s{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>c,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/mitomap-small-variants-json",id:"data-sources/mitomap-small-variants-json",title:"mitomap-small-variants-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/mitomap-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/mitomap-small-variants-json.md",tags:[],version:"current",frontMatter:{}},p=[],s={toc:p},m="wrapper";function c(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},s,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "refAllele":"G",\n "altAllele":"A",\n "diseases":[ \n "Bipolar disorder",\n "Melanoma"\n ],\n "hasHomoplasmy":false,\n "hasHeteroplasmy":true,\n "status":"Reported",\n "clinicalSignificance":"confirmed pathogenic",\n "scorePercentile":83.30,\n "numGenBankFullLengthSeqs":2,\n "pubMedIds":["2316527","6299878","6301949"],\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"diseases"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"associated diseases")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHomoplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHeteroplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"status"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"record status")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"clinicalSignificance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"predicted pathogenicity")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MitoTIP score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numGenBankFullLengthSeqs"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"# of GenBank full-length sequences")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the MITOMAP alternate allele")))))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/0bd2af6a.e4002a39.js b/assets/js/0bd2af6a.e4002a39.js new file mode 100644 index 00000000..5220df66 --- /dev/null +++ b/assets/js/0bd2af6a.e4002a39.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5160],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>f});var a=n(7294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var p=a.createContext({}),s=function(t){var e=a.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},m=function(t){var e=s(t.components);return a.createElement(p.Provider,{value:e},t.children)},c="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},u=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),c=s(n),u=r,f=c["".concat(p,".").concat(u)]||c[u]||d[u]||l;return n?a.createElement(f,o(o({ref:e},m),{},{components:n})):a.createElement(f,o({ref:e},m))}));function f(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=u;var i={};for(var p in e)hasOwnProperty.call(e,p)&&(i[p]=e[p]);i.originalType=t,i[c]="string"==typeof t?t:r,o[1]=i;for(var s=2;s{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>c,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(7462),r=(n(7294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/mitomap-small-variants-json",id:"data-sources/mitomap-small-variants-json",title:"mitomap-small-variants-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/mitomap-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/mitomap-small-variants-json.md",tags:[],version:"current",frontMatter:{}},p=[],s={toc:p},m="wrapper";function c(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},s,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "refAllele":"G",\n "altAllele":"A",\n "diseases":[ \n "Bipolar disorder",\n "Melanoma"\n ],\n "hasHomoplasmy":false,\n "hasHeteroplasmy":true,\n "status":"Reported",\n "clinicalSignificance":"confirmed pathogenic",\n "scorePercentile":83.30,\n "numGenBankFullLengthSeqs":2,\n "pubMedIds":["2316527","6299878","6301949"],\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"diseases"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"associated diseases")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHomoplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHeteroplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"status"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"record status")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"clinicalSignificance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"predicted pathogenicity")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MitoTIP score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numGenBankFullLengthSeqs"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"# of GenBank full-length sequences")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the MITOMAP alternate allele")))))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/0be5de6c.79641df9.js b/assets/js/0be5de6c.79641df9.js new file mode 100644 index 00000000..a225495d --- /dev/null +++ b/assets/js/0be5de6c.79641df9.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1912],{3905:(t,e,n)=>{n.d(e,{Zo:()=>c,kt:()=>g});var r=n(7294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function o(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var p=r.createContext({}),u=function(t){var e=r.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},c=function(t){var e=u(t.components);return r.createElement(p.Provider,{value:e},t.children)},d="mdxType",m={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},s=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,l=t.originalType,p=t.parentName,c=i(t,["components","mdxType","originalType","parentName"]),d=u(n),s=a,g=d["".concat(p,".").concat(s)]||d[s]||m[s]||l;return n?r.createElement(g,o(o({ref:e},c),{},{components:n})):r.createElement(g,o({ref:e},c))}));function g(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var l=n.length,o=new Array(l);o[0]=s;var i={};for(var p in e)hasOwnProperty.call(e,p)&&(i[p]=e[p]);i.originalType=t,i[d]="string"==typeof t?t:a,o[1]=i;for(var u=2;u{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>d,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var r=n(7462),a=(n(7294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/decipher-json",id:"data-sources/decipher-json",title:"decipher-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/decipher-json.md",sourceDirName:"data-sources",slug:"/data-sources/decipher-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/decipher-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/decipher-json.md",tags:[],version:"current",frontMatter:{}},p=[],u={toc:p},c="wrapper";function d(t){let{components:e,...n}=t;return(0,a.kt)(c,(0,r.Z)({},u,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"decipher":[\n {\n "chromosome":"1",\n "begin":13516,\n "end":91073,\n "numDeletions":27,\n "deletionFrequency":0.675,\n "numDuplications":27,\n "duplicationFrequency":0.675,\n "sampleSize":40,\n "reciprocalOverlap": 0.27555,\n "annotationOverlap": 0.5901\n }\n],\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"begin"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"end"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"numDeletions"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"# of observed deletions")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"deletionFrequency"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"deletion frequency")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"numDuplications"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"# of observed duplications")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"duplicationFrequency"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"duplication frequency")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"sampleSize"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"total # of samples")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% annotation overlap")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/0be5de6c.8979d7bf.js b/assets/js/0be5de6c.8979d7bf.js deleted file mode 100644 index d822f62c..00000000 --- a/assets/js/0be5de6c.8979d7bf.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1912],{3905:(t,e,n)=>{n.d(e,{Zo:()=>c,kt:()=>g});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function o(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var p=r.createContext({}),u=function(t){var e=r.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},c=function(t){var e=u(t.components);return r.createElement(p.Provider,{value:e},t.children)},d="mdxType",m={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},s=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,l=t.originalType,p=t.parentName,c=i(t,["components","mdxType","originalType","parentName"]),d=u(n),s=a,g=d["".concat(p,".").concat(s)]||d[s]||m[s]||l;return n?r.createElement(g,o(o({ref:e},c),{},{components:n})):r.createElement(g,o({ref:e},c))}));function g(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var l=n.length,o=new Array(l);o[0]=s;var i={};for(var p in e)hasOwnProperty.call(e,p)&&(i[p]=e[p]);i.originalType=t,i[d]="string"==typeof t?t:a,o[1]=i;for(var u=2;u{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>d,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var r=n(87462),a=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/decipher-json",id:"data-sources/decipher-json",title:"decipher-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/decipher-json.md",sourceDirName:"data-sources",slug:"/data-sources/decipher-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/decipher-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/decipher-json.md",tags:[],version:"current",frontMatter:{}},p=[],u={toc:p},c="wrapper";function d(t){let{components:e,...n}=t;return(0,a.kt)(c,(0,r.Z)({},u,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"decipher":[\n {\n "chromosome":"1",\n "begin":13516,\n "end":91073,\n "numDeletions":27,\n "deletionFrequency":0.675,\n "numDuplications":27,\n "duplicationFrequency":0.675,\n "sampleSize":40,\n "reciprocalOverlap": 0.27555,\n "annotationOverlap": 0.5901\n }\n],\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"begin"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"end"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"numDeletions"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"# of observed deletions")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"deletionFrequency"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"deletion frequency")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"numDuplications"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"# of observed duplications")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"duplicationFrequency"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"duplication frequency")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"sampleSize"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"total # of samples")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% annotation overlap")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/0c0ee93d.196835cf.js b/assets/js/0c0ee93d.196835cf.js deleted file mode 100644 index 7a7d032f..00000000 --- a/assets/js/0c0ee93d.196835cf.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7268],{3905:(e,t,n)=>{n.d(t,{Zo:()=>u,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var i=r.createContext({}),s=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},u=function(e){var t=s(e.components);return r.createElement(i.Provider,{value:t},e.children)},p="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,u=c(e,["components","mdxType","originalType","parentName"]),p=s(n),m=a,f=p["".concat(i,".").concat(m)]||p[m]||d[m]||o;return n?r.createElement(f,l(l({ref:t},u),{},{components:n})):r.createElement(f,l({ref:t},u))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=m;var c={};for(var i in t)hasOwnProperty.call(t,i)&&(c[i]=t[i]);c.originalType=e,c[p]="string"==typeof e?e:a,l[1]=c;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>p,frontMatter:()=>o,metadata:()=>c,toc:()=>i});var r=n(87462),a=(n(67294),n(3905));const o={},l=void 0,c={unversionedId:"data-sources/dann-json",id:"version-3.21/data-sources/dann-json",title:"dann-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/dann-json.md",sourceDirName:"data-sources",slug:"/data-sources/dann-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/dann-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/dann-json.md",tags:[],version:"3.21",frontMatter:{}},i=[],s={toc:i},u="wrapper";function p(e){let{components:t,...n}=e;return(0,a.kt)(u,(0,r.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"dannScore": 0.27\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"dannScore"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1.0")))))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/0cd662a6.0f06469f.js b/assets/js/0cd662a6.0f06469f.js deleted file mode 100644 index 33d9e7a3..00000000 --- a/assets/js/0cd662a6.0f06469f.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2108],{3905:(e,n,t)=>{t.d(n,{Zo:()=>u,kt:()=>h});var a=t(67294);function i(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function o(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function r(e){for(var n=1;n=0||(i[t]=e[t]);return i}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(i[t]=e[t])}return i}var s=a.createContext({}),c=function(e){var n=a.useContext(s),t=n;return e&&(t="function"==typeof e?e(n):r(r({},n),e)),t},u=function(e){var n=c(e.components);return a.createElement(s.Provider,{value:n},e.children)},p="mdxType",d={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},m=a.forwardRef((function(e,n){var t=e.components,i=e.mdxType,o=e.originalType,s=e.parentName,u=l(e,["components","mdxType","originalType","parentName"]),p=c(t),m=i,h=p["".concat(s,".").concat(m)]||p[m]||d[m]||o;return t?a.createElement(h,r(r({ref:n},u),{},{components:t})):a.createElement(h,r({ref:n},u))}));function h(e,n){var t=arguments,i=n&&n.mdxType;if("string"==typeof e||i){var o=t.length,r=new Array(o);r[0]=m;var l={};for(var s in n)hasOwnProperty.call(n,s)&&(l[s]=n[s]);l.originalType=e,l[p]="string"==typeof e?e:i,r[1]=l;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>r,default:()=>p,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=t(87462),i=(t(67294),t(3905));const o={title:"Annotating COVID-19"},r=void 0,l={unversionedId:"introduction/covid19",id:"version-3.18/introduction/covid19",title:"Annotating COVID-19",description:"The Nirvana development team is mainly focused on providing annotations for the human genome. This focus allows us to maximize our resources towards understanding human health.",source:"@site/versioned_docs/version-3.18/introduction/covid19.md",sourceDirName:"introduction",slug:"/introduction/covid19",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/introduction/covid19",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/introduction/covid19.md",tags:[],version:"3.18",frontMatter:{title:"Annotating COVID-19"},sidebar:"docs",previous:{title:"Parsing Nirvana JSON",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/introduction/parsing-json"},next:{title:"1000 Genomes",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/1000Genomes"}},s=[{value:"Getting Nirvana",id:"getting-nirvana",children:[],level:2},{value:"Downloading the COVID-19 data files",id:"downloading-the-covid-19-data-files",children:[],level:2},{value:"Download a COVID-19 VCF file",id:"download-a-covid-19-vcf-file",children:[],level:2},{value:"Running Nirvana",id:"running-nirvana",children:[],level:2},{value:"Investigating the Results",id:"investigating-the-results",children:[],level:2}],c={toc:s},u="wrapper";function p(e){let{components:n,...t}=e;return(0,i.kt)(u,(0,a.Z)({},c,t,{components:n,mdxType:"MDXLayout"}),(0,i.kt)("p",null,"The Nirvana development team is mainly focused on providing annotations for the human genome. This focus allows us to maximize our resources towards understanding human health."),(0,i.kt)("p",null,"However, nothing in our architecture prevents us from supporting other genomes. Earlier this year, we had an opportunity to put that statement to the test - we added support for annotating the ",(0,i.kt)("strong",{parentName:"p"},"SARS-CoV-2")," genome, the virus that causes the ",(0,i.kt)("strong",{parentName:"p"},"COVID-19")," disease."),(0,i.kt)("p",null,"In addition to normal transcript annotation, we also supply:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"allele frequencies"),(0,i.kt)("li",{parentName:"ul"},"protein domains")),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"SARS-CoV-2 Galaxy Project")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The allele frequencies used by Nirvana were provided by the ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/galaxyproject/SARS-CoV-2"},"SARS-CoV-2 Galaxy Project"),". This is an international effort that provides ongoing analysis of COVID-19 using Galaxy, BioConda, and public research infrastructures."))),(0,i.kt)("h2",{id:"getting-nirvana"},"Getting Nirvana"),(0,i.kt)("p",null,"If you don't have Nirvana already, please consult our ",(0,i.kt)("a",{parentName:"p",href:"getting-started"},"Getting Started")," page first."),(0,i.kt)("h2",{id:"downloading-the-covid-19-data-files"},"Downloading the COVID-19 data files"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/Covid19Data.zip"},"a data zip file")," containing new gene models, reference, and external data sources for SARS-CoV-2:"),(0,i.kt)("p",null,"Just go to the directory that contains your Nirvana ",(0,i.kt)("inlineCode",{parentName:"p"},"Data")," directory."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"cd ~/Nirvana\ncurl -O https://illumina.github.io/NirvanaDocumentation/files/Covid19Data.zip\nunzip Covid19Data.zip\n")),(0,i.kt)("h2",{id:"download-a-covid-19-vcf-file"},"Download a COVID-19 VCF file"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/Covid19Mutations.vcf.gz"},"a COVID-19 VCF file")," you can play around with:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"curl -O https://illumina.github.io/NirvanaDocumentation/files/Covid19Mutations.vcf.gz\n")),(0,i.kt)("h2",{id:"running-nirvana"},"Running Nirvana"),(0,i.kt)("p",null,"Once you have downloaded the data sets, use the following command to annotate your VCF:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/Nirvana.dll \\\n -c Data/Cache/SARS-CoV-2/SARS-CoV-2 \\\n --sd Data/SupplementaryAnnotation/SARS-CoV-2 \\\n -r Data/References/SARS-CoV-2.ASM985889v3.dat \\\n -i Covid19Mutations.vcf.gz \\\n -o Covid19Mutations\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-c")," argument specifies the cache prefix"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"--sd")," argument specifies the supplementary annotation directory"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-r")," argument specifies the compressed reference path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input VCF path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output filename prefix")),(0,i.kt)("p",null,"When running Nirvana, performance metrics are shown as it evaluates each chromosome in the input VCF file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"---------------------------------------------------------------------------\nNirvana (c) 2020 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0\n---------------------------------------------------------------------------\n\nInitialization Time Positions/s\n---------------------------------------------------------------------------\nCache 00:00:00.0\nSA Position Scan 00:00:00.0 1763\n\nReference Preload Annotation Variants/s\n---------------------------------------------------------------------------\nNC_045512 00:00:00.0 00:00:00.1 173\n\nSummary Time Percent\n---------------------------------------------------------------------------\nInitialization 00:00:00.0 2.0 %\nPreload 00:00:00.0 0.3 %\nAnnotation 00:00:00.1 6.0 %\n\nTime: 00:00:01.5\n")),(0,i.kt)("p",null,"The output will be a JSON file called ",(0,i.kt)("inlineCode",{parentName:"p"},"Covid19Mutations.json.gz"),". Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/Covid19Mutations.json.gz"},"the full JSON file"),"."),(0,i.kt)("h2",{id:"investigating-the-results"},"Investigating the Results"),(0,i.kt)("p",null,"Here's an example of what a COVID-19 variant looks like in the JSON output:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{\n "chromosome":"NC_045512.2",\n "position":27323,\n "refAllele":"C",\n "altAlleles":[\n "T"\n ],\n "filters":[\n "PASS"\n ],\n "proteinDomains":[\n {\n "start":27202,\n "end":27384,\n "proteinId":"YP_009724394.1",\n "domainId":"cl13556",\n "domainName":"Sars6 super family",\n "reciprocalOverlap":0.00546,\n "annotationOverlap":0.00546\n }\n ],\n "variants":[\n {\n "vid":"NC_045512.2-27323-C-T",\n "chromosome":"NC_045512.2",\n "begin":27323,\n "end":27323,\n "refAllele":"C",\n "altAllele":"T",\n "variantType":"SNV",\n "hgvsg":"NC_045512.2:g.27323C>T",\n "alleleFrequency":{\n "refAllele":"C",\n "altAllele":"T",\n "allAc":8,\n "allAn":1058,\n "allAf":0.007561\n },\n "transcripts":[\n {\n "transcript":"YP_009724394.1",\n "source":"RefSeq",\n "bioType":"protein_coding",\n "codons":"tCt/tTt",\n "aminoAcids":"S/F",\n "cdnaPos":"122",\n "cdsPos":"122",\n "exons":"1/1",\n "proteinPos":"41",\n "geneId":"43740572",\n "hgnc":"ORF6",\n "consequence":[\n "missense_variant"\n ],\n "hgvsc":"YP_009724394.1:c.122C>T",\n "hgvsp":"YP_009724394.1:p.(Ser41Phe)",\n "proteinId":"YP_009724394.1"\n },\n {\n "transcript":"YP_009724395.1",\n "source":"RefSeq",\n "bioType":"protein_coding",\n "geneId":"43740573",\n "hgnc":"ORF7a",\n "consequence":[\n "upstream_gene_variant"\n ],\n "proteinId":"YP_009724395.1"\n }\n ]\n }\n ]\n}\n')))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/0d1682b8.b2a1e362.js b/assets/js/0d1682b8.b2a1e362.js deleted file mode 100644 index 85048398..00000000 --- a/assets/js/0d1682b8.b2a1e362.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6472],{3905:(t,e,a)=>{a.d(e,{Zo:()=>p,kt:()=>g});var n=a(67294);function l(t,e,a){return e in t?Object.defineProperty(t,e,{value:a,enumerable:!0,configurable:!0,writable:!0}):t[e]=a,t}function r(t,e){var a=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),a.push.apply(a,n)}return a}function i(t){for(var e=1;e=0||(l[a]=t[a]);return l}(t,e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,a)&&(l[a]=t[a])}return l}var s=n.createContext({}),m=function(t){var e=n.useContext(s),a=e;return t&&(a="function"==typeof t?t(e):i(i({},e),t)),a},p=function(t){var e=m(t.components);return n.createElement(s.Provider,{value:e},t.children)},d="mdxType",k={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},N=n.forwardRef((function(t,e){var a=t.components,l=t.mdxType,r=t.originalType,s=t.parentName,p=o(t,["components","mdxType","originalType","parentName"]),d=m(a),N=l,g=d["".concat(s,".").concat(N)]||d[N]||k[N]||r;return a?n.createElement(g,i(i({ref:e},p),{},{components:a})):n.createElement(g,i({ref:e},p))}));function g(t,e){var a=arguments,l=e&&e.mdxType;if("string"==typeof t||l){var r=a.length,i=new Array(r);i[0]=N;var o={};for(var s in e)hasOwnProperty.call(e,s)&&(o[s]=e[s]);o.originalType=t,o[d]="string"==typeof t?t:l,i[1]=o;for(var m=2;m{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>d,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var n=a(87462),l=(a(67294),a(3905));const r={title:"Custom Annotations"},i=void 0,o={unversionedId:"file-formats/custom-annotations",id:"version-3.18/file-formats/custom-annotations",title:"Custom Annotations",description:"Overview",source:"@site/versioned_docs/version-3.18/file-formats/custom-annotations.md",sourceDirName:"file-formats",slug:"/file-formats/custom-annotations",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/file-formats/custom-annotations",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/file-formats/custom-annotations.md",tags:[],version:"3.18",frontMatter:{title:"Custom Annotations"},sidebar:"docs",previous:{title:"Nirvana JSON File Format",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/file-formats/nirvana-json-file-format"},next:{title:"Canonical Transcripts",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/core-functionality/canonical-transcripts"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Variant File Format",id:"variant-file-format",children:[{value:"Basic Allele Frequency Example",id:"basic-allele-frequency-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv",children:[],level:4},{value:"Convert to Nirvana Format",id:"convert-to-nirvana-format",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results",children:[],level:4}],level:3},{value:"Categories & Descriptions Example",id:"categories--descriptions-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-1",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana-1",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-1",children:[],level:4},{value:"Using Positional Matches",id:"using-positional-matches",children:[],level:4}],level:3},{value:"Genomic Region Example",id:"genomic-region-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-2",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana-2",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-2",children:[],level:4}],level:3},{value:"Genomic Regions for Structural Variants Example",id:"genomic-regions-for-structural-variants-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-3",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana-3",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-3",children:[],level:4}],level:3},{value:"Mixing Small Variants and Genomic Regions",id:"mixing-small-variants-and-genomic-regions",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-4",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana-4",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-4",children:[],level:4}],level:3}],level:2},{value:"Gene File Format",id:"gene-file-format",children:[{value:"Basic Gene Example",id:"basic-gene-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-5",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana-5",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-5",children:[],level:4}],level:3}],level:2},{value:"Customizing the Header",id:"customizing-the-header",children:[{value:"Title",id:"title",children:[],level:3},{value:"Genome Assemblies",id:"genome-assemblies",children:[],level:3},{value:"Matching Criteria",id:"matching-criteria",children:[],level:3},{value:"Categories",id:"categories",children:[],level:3},{value:"Descriptions",id:"descriptions",children:[{value:"Populations",id:"populations",children:[],level:4}],level:3},{value:"Data Types",id:"data-types",children:[],level:3}],level:2},{value:"Using SAUtils",id:"using-sautils",children:[{value:"Convert Variant File",id:"convert-variant-file",children:[],level:3},{value:"Convert Gene File",id:"convert-gene-file",children:[],level:3}],level:2}],m={toc:s},p="wrapper";function d(t){let{components:e,...a}=t;return(0,l.kt)(p,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,l.kt)("h2",{id:"overview"},"Overview"),(0,l.kt)("p",null,"While the team tries to keep data sources up-to-date, you might want to start incorporate new annotations ahead of our update cycle. Another\ncommon use case involves protected health information (PHI). Custom annotations are a mechanism that enables both use cases."),(0,l.kt)("p",null,"Here are some examples of how our collaborators use custom annotations:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"associating context from both a patient-level and a patient cohort level with the variant annotations"),(0,l.kt)("li",{parentName:"ul"},"adding content that is licensed (e.g. HGMD) to the variant annotations")),(0,l.kt)("p",null,"At the moment, we have two different custom annotation file formats. One provides additional annotations to variants (both small variants and SVs)\nwhile the other caters to gene annotations."),(0,l.kt)("p",null,"In both cases, the custom annotation file format is a tab-delimited file that is separated into two parts: the header & the data."),(0,l.kt)("p",null,"The header is where you can customize how you want the data to appear in the JSON file and provide context about the genome assembly and how\nNirvana should match the variants."),(0,l.kt)("p",null,"At Illumina, there are usually many components downstream of Nirvana that have to parse our annotations. If a customer provides a custom\nannotation, those downstream tools need to understand more about the data such as:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"data type (e.g. number, boolean, or a string)"),(0,l.kt)("li",{parentName:"ul"},"data category (e.g. is this an allele count, allele number, allele frequency, etc.)"),(0,l.kt)("li",{parentName:"ul"},"associated population (i.e. if this is an allele frequency)")),(0,l.kt)("p",null,"For each custom annotation, Nirvana uses this context to create a ",(0,l.kt)("a",{parentName:"p",href:"https://json-schema.org/"},"JSON schema")," that can be sent to downstream tools. If\na tool knows that this is an allele frequency, it can validate user input to ensure that it's in the range of ","[0, 1]","."),(0,l.kt)("h2",{id:"variant-file-format"},"Variant File Format"),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"File Format")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Nirvana expects plain text (or gzipped text) files. Using tools like Excel can add extra characters that can break parsing. We highly recommend creating and modifying these files with plain text editor like Notepad, Notepad++ or Atom."))),(0,l.kt)("h3",{id:"basic-allele-frequency-example"},"Basic Allele Frequency Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Imagine that you want to create a basic allele frequency custom annotation for small variants. If we visualized the tab-delimited file\n(TSV), it would look something like this:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allAf")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleFrequency")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"number")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"23603511"),(0,l.kt)("td",{parentName:"tr",align:"left"},"TGA"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006579")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"68801894"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006569")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr19"),(0,l.kt)("td",{parentName:"tr",align:"left"},"11107436"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.00003291")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over the header and discuss the contents:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"title")," indicates the name of the JSON key"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"assembly")," indicates that this data is only valid for ",(0,l.kt)("inlineCode",{parentName:"li"},"GRCh38"),"."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"matchVariantsBy")," indicates how annotations should be matched and reported. In this case annotations will be matched and reported by allele."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"categories")," provides hints to downstream tools on how they might want to treat the data. In this case, we indicate that it's an allele frequency."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"descriptions")," are used in special circumstances to provide more context. Even though column 5 is called ",(0,l.kt)("inlineCode",{parentName:"li"},"allAf"),", it might not be clear to a\ndownstream tool that this means a global allele frequency using all sub-populations. In this case, ",(0,l.kt)("inlineCode",{parentName:"li"},"ALL")," indicates the intended population."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"type")," indicates to downstream tools the data type. Since allele frequencies are numbers, we'll write ",(0,l.kt)("inlineCode",{parentName:"li"},"number")," in this column.")),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Reference Base Checking")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Nirvana validates all the reference bases in a custom annotation. If a variant or genomic region is specified that has the wrong reference base, an error will be produced."))),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Sorting")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"The variants within each chromosome must be sorted by genomic position."))),(0,l.kt)("h4",{id:"convert-to-nirvana-format"},"Convert to Nirvana Format"),(0,l.kt)("p",null,"First we need to convert the TSV file to Nirvana's native file format and let's put that file in a new directory called CA:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"$ mkdir CA\n$ dotnet bin/Release/netcoreapp2.1/SAUtils.dll customvar \\\n -r Data/References/Homo_sapiens.GRCh38.Nirvana.dat -i MyDataSource.tsv -o CA\n---------------------------------------------------------------------------\nSAUtils (c) 2020 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0\n---------------------------------------------------------------------------\n\nChromosome 16 completed in 00:00:00.1\nChromosome 19 completed in 00:00:00.0\n\nTime: 00:00:00.2\n")),(0,l.kt)("h4",{id:"annotate-with-nirvana"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's annotate the following VCF (notice that it's one of the variants that we have in our custom annotation):"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n16 68801894 . G A . . .\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA.vcf"},"the full VCF file"),"."),(0,l.kt)("p",null,"Since Nirvana can handle multiple directories with external annotations, all we need to do is specify our new CA directory in addition to\nthe normal Nirvana command-line."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash",metastring:"{3}","{3}":!0},"$ dotnet bin/Release/netcoreapp2.1/Nirvana.dll -c Data/Cache/GRCh38/Both \\\n -r Data/References/Homo_sapiens.GRCh38.Nirvana.dat \\\n --sd Data/SupplementaryAnnotation/GRCh38 --sd CA -i TestCA.vcf -o TestCA\n---------------------------------------------------------------------------\nNirvana (c) 2020 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0\n---------------------------------------------------------------------------\n\nInitialization Time Positions/s\n---------------------------------------------------------------------------\nCache 00:00:01.8\nSA Position Scan 00:00:00.0 19\n\nReference Preload Annotation Variants/s\n---------------------------------------------------------------------------\nchr16 00:00:00.2 00:00:01.3 1\n\nSummary Time Percent\n---------------------------------------------------------------------------\nInitialization 00:00:01.9 25.5 %\nPreload 00:00:00.2 3.3 %\nAnnotation 00:00:01.3 18.2 %\n\nTime: 00:00:06.3\n")),(0,l.kt)("h4",{id:"investigate-the-results"},"Investigate the Results"),(0,l.kt)("p",null,"We would expect the following data to show up in our JSON output file:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-16}","{12-16}":!0},' "variants": [\n {\n "vid": "16-68801894-G-A",\n "chromosome": "16",\n "begin": 68801894,\n "end": 68801894,\n "refAllele": "G",\n "altAllele": "A",\n "variantType": "SNV",\n "hgvsg": "NC_000016.10:g.68801894G>A",\n "phylopScore": 1,\n "MyDataSource": {\n "refAllele": "G",\n "altAllele": "A",\n "allAf": 7e-06\n },\n "clinvar": [\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA.json.gz"},"the full JSON file"),"."),(0,l.kt)("p",null,"Nirvana preserves up to 6 decimal places for allele frequency data."),(0,l.kt)("h3",{id:"categories--descriptions-example"},"Categories & Descriptions Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-1"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Building on the previous example, we can add other types of annotations like predictions and general notes."),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 6"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 7"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,l.kt)("td",{parentName:"tr",align:"left"},"pathogenicity"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleFrequency"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Prediction"),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"number"),(0,l.kt)("td",{parentName:"tr",align:"left"},"string"),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"23603511"),(0,l.kt)("td",{parentName:"tr",align:"left"},"TGA"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006579"),(0,l.kt)("td",{parentName:"tr",align:"left"},"P"),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"68801894"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006569"),(0,l.kt)("td",{parentName:"tr",align:"left"},"LP"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Seen in case 123")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr19"),(0,l.kt)("td",{parentName:"tr",align:"left"},"11107436"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.00003291"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource2.tsv"},"the full TSV file"),"."),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Placeholders")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"You can use a period to denote an empty value (much in the same way as periods are used in VCF files to signify missing values). While\nNirvana also accepts empty columns in the TSV file, we use them in these examples to promote readability."))),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 6")," adds a field called ",(0,l.kt)("inlineCode",{parentName:"li"},"pathogenicity")," which uses the ",(0,l.kt)("inlineCode",{parentName:"li"},"Prediction")," category. When using this category, Nirvana will\nvalidate to make\nsure that the field contains either the abbreviations (B, LB, VUS, LP, and P) or the long-form equivalents (e.g. benign or pathogenic)."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 7")," adds a field called ",(0,l.kt)("inlineCode",{parentName:"li"},"notes")," and it doesn't have a category or description. We're just going to use it to add some internal\nnotes.")),(0,l.kt)("h4",{id:"annotate-with-nirvana-1"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's use a new VCF file. It includes all the same positions as our custom annotation file, but only the middle variant also matches the\nalternate allele (allele-specific match):"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n16 23603511 . TG T . . .\n16 68801894 . G A . . .\n19 11107436 . G C . . .\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA2.vcf"},"the full VCF file"),"."),(0,l.kt)("h4",{id:"investigate-the-results-1"},"Investigate the Results"),(0,l.kt)("p",null,"Because we specified ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=allele")," in our custom annotation file, only the middle variant will get an annotation:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-18}","{12-18}":!0},' "variants": [\n {\n "vid": "16-68801894-G-A",\n "chromosome": "16",\n "begin": 68801894,\n "end": 68801894,\n "refAllele": "G",\n "altAllele": "A",\n "variantType": "SNV",\n "hgvsg": "NC_000016.10:g.68801894G>A",\n "phylopScore": 1,\n "MyDataSource": {\n "refAllele": "G",\n "altAllele": "A",\n "allAf": 7e-06,\n "pathogenicity": "LP",\n "notes": "Seen in case 123"\n },\n "clinvar": [\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA2.json.gz"},"the full JSON file"),"."),(0,l.kt)("h4",{id:"using-positional-matches"},"Using Positional Matches"),(0,l.kt)("p",null,"What would happen if we changed to ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=position"),"? Two things will happen. First, our positional variants will now match:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-17}","{12-17}":!0},' "variants": [\n {\n "vid": "16-23603511-TG-T",\n "chromosome": "16",\n "begin": 23603512,\n "end": 23603512,\n "refAllele": "G",\n "altAllele": "-",\n "variantType": "deletion",\n "hgvsg": "NC_000016.10:g.23603512delG",\n "MyDataSource": [\n {\n "refAllele": "GA",\n "altAllele": "-",\n "allAf": 7e-06,\n "pathogenicity": "P"\n }\n ],\n "clinvar": [\n')),(0,l.kt)("p",null,"In addition, you will now see an extra flag for our allele-specific variant:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-20}","{12-20}":!0},' "variants": [\n {\n "vid": "16-68801894-G-A",\n "chromosome": "16",\n "begin": 68801894,\n "end": 68801894,\n "refAllele": "G",\n "altAllele": "A",\n "variantType": "SNV",\n "hgvsg": "NC_000016.10:g.68801894G>A",\n "phylopScore": 1,\n "MyDataSource": [\n {\n "refAllele": "G",\n "altAllele": "A",\n "allAf": 7e-06,\n "pathogenicity": "LP",\n "notes": "Seen in case 123",\n "isAlleleSpecific": true\n }\n ],\n "clinvar": [\n')),(0,l.kt)("h3",{id:"genomic-region-example"},"Genomic Region Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-2"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"In the previous example, we added a note for the middle variant, but sometimes it's handy to annotate a genomic region. Consider the following example:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"END"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"20000000"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"70000000"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Lots of false positives in this region")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource3.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 5")," now has a field called ",(0,l.kt)("inlineCode",{parentName:"li"},"notes"),". In essence, it looks exactly like column 7 from our previous example."),(0,l.kt)("li",{parentName:"ul"},"The main difference is that now one of our custom annotation entries is actually a genomic region. Any variant that overlaps with that region will get a custom annotation.")),(0,l.kt)("p",null,"In the previous example we learned about positional matching vs allele-specific matching. For genomic regions, ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=allele")," and ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=position")," produce\nthe same result."),(0,l.kt)("h4",{id:"annotate-with-nirvana-2"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's use the same VCF file as our previous example."),(0,l.kt)("h4",{id:"investigate-the-results-2"},"Investigate the Results"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{9-17}","{9-17}":!0},' {\n "chromosome": "16",\n "position": 23603511,\n "refAllele": "TG",\n "altAlleles": [\n "T"\n ],\n "cytogeneticBand": "16p12.2",\n "MyDataSource": [\n {\n "start": 20000000,\n "end": 70000000,\n "notes": "Lots of false positives in this region",\n "reciprocalOverlap": 0,\n "annotationOverlap": 0\n }\n ],\n "variants": [\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA3.json.gz"},"the full JSON file"),"."),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Reciprocal & Annotation Overlap")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"For all intervals, Nirvana internally calculates two overlaps: a ",(0,l.kt)("strong",{parentName:"p"},"variant overlap")," and an ",(0,l.kt)("strong",{parentName:"p"},"annotation overlap"),". Variant overlap is the percentage of the variant's length that is\noverlapped. Annotation overlap is the percentage of the annotation's length that is overlap."),(0,l.kt)("p",{parentName:"div"},(0,l.kt)("strong",{parentName:"p"},"Reciprocal overlap")," is the minimum of those two overlaps. Given that the annotation is 50 Mbp and the deletion is one 1 bp, both overlaps will be pretty close to 0."))),(0,l.kt)("p",null,"We will also see this annotation for the other variant on chr16:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{9-17}","{9-17}":!0},' {\n "chromosome": "16",\n "position": 68801894,\n "refAllele": "G",\n "altAlleles": [\n "A"\n ],\n "cytogeneticBand": "16q22.1",\n "MyDataSource": [\n {\n "start": 20000000,\n "end": 70000000,\n "notes": "Lots of false positives in this region",\n "reciprocalOverlap": 0,\n "annotationOverlap": 0\n }\n ],\n "variants": [\n')),(0,l.kt)("h3",{id:"genomic-regions-for-structural-variants-example"},"Genomic Regions for Structural Variants Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-3"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Often we use genomic regions to represent other known CNVs and SVs in the genome. In this use case, we usually don't want to match these regions to other small variants. To force Nirvana to match regions only to other SVs, use the ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=sv")," option in the header. Here is an example:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=sv"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"END"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"20000000"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"70000000"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Lots of false positives in this region")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource6.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"The main difference is the header field ",(0,l.kt)("inlineCode",{parentName:"li"},"#matchVariantsBy=sv")," which indicates that only structural variants that overlap these genomic regions will receive annotations.")),(0,l.kt)("h4",{id:"annotate-with-nirvana-3"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's use a new VCF file. It contains the first variant from the previous file and a structural variant deletion- both of which overlap the given genomic region."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n16 23603511 . TG T . . .\n16 68801894 . G . . END=73683789;SVTYPE=DEL\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA6.vcf"},"the full VCF file"),"."),(0,l.kt)("h4",{id:"investigate-the-results-3"},"Investigate the Results"),(0,l.kt)("p",null,"Note that this time, ",(0,l.kt)("inlineCode",{parentName:"p"},"MyDataSource")," only showed up for the ",(0,l.kt)("inlineCode",{parentName:"p"},"")," and not the deletion ",(0,l.kt)("inlineCode",{parentName:"p"},"16-23603511-TG-T"),"."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{21-29}","{21-29}":!0},' {\n "chromosome": "16",\n "position": 23603511,\n "refAllele": "TG",\n "altAlleles": [\n "T"\n ],\n "cytogeneticBand": "16p12.2",\n "variants": [\n ...\n ...\n {\n "chromosome": "16",\n "position": 68801894,\n "svEnd": 73683789,\n "refAllele": "G",\n "altAlleles": [\n ""\n ],\n "cytogeneticBand": "16q22.1-q22.3",\n "MyDataSource": [\n {\n "start": 20000000,\n "end": 70000000,\n "notes": "Lots of false positives in this region",\n "reciprocalOverlap": 0.02396,\n "annotationOverlap": 0.02396\n }\n ],\n "variants": [\n\n')),(0,l.kt)("h3",{id:"mixing-small-variants-and-genomic-regions"},"Mixing Small Variants and Genomic Regions"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-4"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Previously we looked at examples that either had small variants or genomic regions. Let's create a file that contains both:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 6"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"END"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"23603511"),(0,l.kt)("td",{parentName:"tr",align:"left"},"TGA"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"68801894"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr19"),(0,l.kt)("td",{parentName:"tr",align:"left"},"11107436"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr21"),(0,l.kt)("td",{parentName:"tr",align:"left"},"10510818"),(0,l.kt)("td",{parentName:"tr",align:"left"},"C"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"10699435"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Interval #1")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr21"),(0,l.kt)("td",{parentName:"tr",align:"left"},"10510818"),(0,l.kt)("td",{parentName:"tr",align:"left"},"C"),(0,l.kt)("td",{parentName:"tr",align:"left"},"<","DEL",">"),(0,l.kt)("td",{parentName:"tr",align:"left"},"10699435"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Interval #2")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr22"),(0,l.kt)("td",{parentName:"tr",align:"left"},"12370388"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T[chr22:12370729["),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"Known false-positive")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource4.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 4")," now has the ",(0,l.kt)("inlineCode",{parentName:"li"},"REF")," field. Exception for the case listed below, this is only used by small variants or translocation breakends."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 5")," now has the ",(0,l.kt)("inlineCode",{parentName:"li"},"END")," field. This is only used by genomic regions."),(0,l.kt)("li",{parentName:"ul"},"There are two custom annotations on chr21 and the start and end coordinates look the same, so what's different? Interval #2 has ",(0,l.kt)("strong",{parentName:"li"},"a symbolic allele in the ALT column"),". When this is used in custom annotation, the start position is treated as the padding base (using VCF conventions). When Nirvana matches a variant to interval #2, it will ignore the padding base and consider the start position to be at position 10510819.")),(0,l.kt)("h4",{id:"annotate-with-nirvana-4"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's use a new VCF file to study how matching works for intervals #1 and #2:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n21 10510818 . C . . END=10699435;SVTYPE=DUP\n22 12370388 . T T[chr22:12370729[ . . SVTYPE=BND\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA3.vcf"},"the full VCF file"),"."),(0,l.kt)("p",null,'The first variant is similar to the custom annotation labelled "interval #2". Position 10510818 is the padding base, so it effectively starts at position 10510819.'),(0,l.kt)("h4",{id:"investigate-the-results-4"},"Investigate the Results"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{11-26}","{11-26}":!0},' "positions": [\n {\n "chromosome": "21",\n "position": 10510818,\n "svEnd": 10699435,\n "refAllele": "C",\n "altAlleles": [\n ""\n ],\n "cytogeneticBand": "21p11.2",\n "MyDataSource": [\n {\n "start": 10510818,\n "end": 10699435,\n "notes": "Interval #1",\n "reciprocalOverlap": 0.99999,\n "annotationOverlap": 0.99999\n },\n {\n "start": 10510819,\n "end": 10699435,\n "notes": "Interval #2",\n "reciprocalOverlap": 1,\n "annotationOverlap": 1\n }\n ],\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA4.json.gz"},"the full JSON file"),"."),(0,l.kt)("p",null,"As expected, the variant and interval #2 have matching endpoints, therefore there is 100% overlap. Interval #1 technically starts 1 bp earlier, so its overlap 99.9%."),(0,l.kt)("p",null,"Further down the JSON file, we find the annotated translocation breakend:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{11-15}","{11-15}":!0},' "variants": [\n {\n "vid": "22-12370388-T-T[chr22:12370729[",\n "chromosome": "22",\n "begin": 12370388,\n "end": 12370388,\n "isStructuralVariant": true,\n "refAllele": "T",\n "altAllele": "T[chr22:12370729[",\n "variantType": "translocation_breakend",\n "MyDataSource": {\n "refAllele": "T",\n "altAllele": "T[chr22:12370729[",\n "notes": "Known false-positive"\n }\n }\n')),(0,l.kt)("h2",{id:"gene-file-format"},"Gene File Format"),(0,l.kt)("h3",{id:"basic-gene-example"},"Basic Gene Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-5"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Previously we looked at examples that either had small variants or genomic regions, however, sometimes we would like to add custom gene annotations. The gene custom annotation file format\nlooks slightly different:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#geneSymbol"),(0,l.kt)("td",{parentName:"tr",align:"left"},"geneId"),(0,l.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string"),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"TP53"),(0,l.kt)("td",{parentName:"tr",align:"left"},"7157"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Colorectal cancer, hereditary nonpolyposis, type 5"),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"KRAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ENSG00000133703"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mismatch repair cancer syndrome"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Seen in cohort 123")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource5.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 2")," has the ",(0,l.kt)("inlineCode",{parentName:"li"},"geneId")," field. This can be either an ",(0,l.kt)("strong",{parentName:"li"},"Entrez Gene ID")," or an ",(0,l.kt)("strong",{parentName:"li"},"Ensembl ID"),".")),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Gene Symbols")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Gene symbols are always in flux and are being updated on a daily basis at the NCBI and at HGNC. Due to this, Nirvana uses the ",(0,l.kt)("inlineCode",{parentName:"p"},"geneId")," to match genes rather than the gene symbol. However, to\nmake the custom annotation files easier to read, we've included the ",(0,l.kt)("inlineCode",{parentName:"p"},"geneSymbol")," column as well."))),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Unknown Gene IDs")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"When Nirvana parses the gene custom annotation file, it will note any gene IDs that are currently not recognized in Nirvana. In such a case, Nirvana will display an error showing all the\nunrecognized gene IDs."))),(0,l.kt)("h4",{id:"annotate-with-nirvana-5"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's use a VCF file that contain variants in TP53 and KRAS:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n12 25227255 . A T . . .\n17 7675074 . C A . . .\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA4.vcf"},"the full VCF file"),"."),(0,l.kt)("h4",{id:"investigate-the-results-5"},"Investigate the Results"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{24-27}","{24-27}":!0},' "genes": [\n {\n "name": "KRAS",\n "clingenGeneValidity": [\n {\n "diseaseId": "MONDO_0009026",\n "disease": "Costello syndrome",\n "classification": "disputed",\n "classificationDate": "2018-07-24"\n }\n ],\n "clingenDosageSensitivityMap": {\n "haploinsufficiency": "no evidence to suggest that dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "no evidence to suggest that dosage sensitivity is associated with clinical phenotype"\n },\n "gnomAD": {\n "pLi": 0.000788,\n "pRec": 0.789,\n "pNull": 0.21,\n "synZ": 0.336,\n "misZ": 2.32,\n "loeuf": 1.24\n },\n "MyDataSource": {\n "phenotype": "Mismatch repair cancer syndrome",\n "notes": "Seen in cohort 123"\n }\n },\n')),(0,l.kt)("p",null,"This is the abbreviated output for KRAS. Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA5.json.gz"},"the full JSON file")," if you want to see the complete KRAS entry."),(0,l.kt)("h2",{id:"customizing-the-header"},"Customizing the Header"),(0,l.kt)("h3",{id:"title"},"Title"),(0,l.kt)("p",null,"For the title, you can provide any string that hasn't already been used. The title should be unique."),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Make sure that the title does not conflict with other keys in the JSON file."))),(0,l.kt)("p",null,"For small variants, you can't provide a title that conflicts with other keys in the variant object. Some examples of this would be\n",(0,l.kt)("inlineCode",{parentName:"p"},"vid"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"chromosome"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"transcripts"),", etc.. The title should also not conflict with other data source keys like ",(0,l.kt)("inlineCode",{parentName:"p"},"clinvar")," or ",(0,l.kt)("inlineCode",{parentName:"p"},"gnomad"),"."),(0,l.kt)("p",null,"For structural variants, you can't provide a title that conflicts with other keys in the position object. Some examples of this would be\n",(0,l.kt)("inlineCode",{parentName:"p"},"chromosome"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"svLength"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"cytogeneticBand"),", etc. The title should also not conflict with other data source keys like ",(0,l.kt)("inlineCode",{parentName:"p"},"clingen")," or ",(0,l.kt)("inlineCode",{parentName:"p"},"dgv"),"."),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Care should be taken not to annotate using multiple custom annotations that all use the same title."))),(0,l.kt)("h3",{id:"genome-assemblies"},"Genome Assemblies"),(0,l.kt)("p",null,"The following genome assemblies can be specified:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"GRCh37"),(0,l.kt)("li",{parentName:"ul"},"GRCh38")),(0,l.kt)("h3",{id:"matching-criteria"},"Matching Criteria"),(0,l.kt)("p",null,"The matching criteria instructs how Nirvana should match a VCF variant to the custom annotation."),(0,l.kt)("p",null,"The following matching criteria can be specified:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"allele")," - use this when you only want allele-specific matches. This is commonly the case when using allele frequency data sources like ",(0,l.kt)("inlineCode",{parentName:"li"},"gnomAD")),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"position")," - use this when you want positional matches. This is commonly used with disease phenotype data sources like ",(0,l.kt)("inlineCode",{parentName:"li"},"ClinVar")),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"sv")," - use this when you want to match to all other overlapping SVs. This use case arose when we were adding custom annotations for baseline\ncopy number intervals along the genome.")),(0,l.kt)("h3",{id:"categories"},"Categories"),(0,l.kt)("p",null,"Categories are not used by Nirvana, but are often used by downstream tools. Categories provide hints for how those tools should filter or display\nthe annotation data."),(0,l.kt)("p",null,"When a category is specified, Nirvana will provide additional validation for those fields. The following table describes each category:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Category"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Description"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Validation"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleCount"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allele counts for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleNumber"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allele numbers for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleFrequency"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allele frequencies for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Prediction"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ACMG-style pathogenicity classifications"),(0,l.kt)("td",{parentName:"tr",align:"left"},"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"benign")," (B)",(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"likely benign")," (LB)",(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"VUS"),(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"likely pathogenic")," (LP)",(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"pathogenic")," (P)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Filter"),(0,l.kt)("td",{parentName:"tr",align:"left"},"free text that signals downstream tools to add the column to the filter"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Max 20 characters")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Description"),(0,l.kt)("td",{parentName:"tr",align:"left"},"free-text description"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Max 100 characters")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Identifier"),(0,l.kt)("td",{parentName:"tr",align:"left"},"any ID"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Max 50 characters")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"HomozygousCount"),(0,l.kt)("td",{parentName:"tr",align:"left"},"count of homozygous individuals for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Score"),(0,l.kt)("td",{parentName:"tr",align:"left"},"any score value"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Any double-precision floating point number")))),(0,l.kt)("h3",{id:"descriptions"},"Descriptions"),(0,l.kt)("p",null,"Descriptions are used to add more context to the categories. For now, descriptions are mainly used to associate allele counts, numbers, and frequencies with their respective populations."),(0,l.kt)("h4",{id:"populations"},"Populations"),(0,l.kt)("p",null,"The following populations were specified in the HapMap project, 1000 Genomes Project, ExAC, and gnomAD."),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Population Code"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Super-population Code"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Description"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ACB"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"African Caribbeans in Barbados")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"African")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"All populations")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Ad Mixed American")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ASJ"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"},"Ashkenazi Jewish")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ASW"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Americans of African Ancestry in SW USA")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"BEB"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Bengali from Bangladesh")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CDX"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Chinese Dai in Xishuangbanna, China")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CEU"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Utah Residents (CEPH) with Northern and Western European Ancestry")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CHB"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Han Chinese in Beijing, China")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CHS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Southern Han Chinese")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CLM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Colombians from Medellin, Colombia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"East Asian")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ESN"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Esan in Nigeria")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"European")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"FIN"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Finnish in Finland")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"GBR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"British in England and Scotland")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"GIH"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Gujarati Indian from Houston, Texas")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"GWD"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Gambian in Western Divisions in the Gambia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"IBS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Iberian population in Spain")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ITU"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Indian Telugu from the UK")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"JPT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Japanese in Tokyo, Japan")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"KHV"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Kinh in Ho Chi Minh City, Vietnam")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"LWK"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Luhya in Webuye, Kenya")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MAG"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mandinka in the Gambia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MKK"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Maasai in Kinyawa, Kenya")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MSL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mende in Sierra Leone")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MXL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mexican Ancestry from Los Angeles, USA")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"NFE"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"European (Non-Finnish)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"OTH"),(0,l.kt)("td",{parentName:"tr",align:"left"},"OTH"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Other")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"PEL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Peruvians from Lima, Peru")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"PJL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Punjabi from Lahore, Pakistan")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"PUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Puerto Ricans from Puerto Rico")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"South Asian")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"STU"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Sri Lankan Tamil from the UK")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"TSI"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Toscani in Italia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"YRI"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Yoruba in Ibadan, Nigeria")))),(0,l.kt)("h3",{id:"data-types"},"Data Types"),(0,l.kt)("p",null,"Each custom annotation can be one of the following data types:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"bool")," - true or false"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"number")," - any integer or floating-point number"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"string")," - text")),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"For boolean variables, only keys with a ",(0,l.kt)("inlineCode",{parentName:"p"},"true")," value will be output to the JSON object."))),(0,l.kt)("h2",{id:"using-sautils"},"Using SAUtils"),(0,l.kt)("p",null,"Nirvana includes a tool called ",(0,l.kt)("inlineCode",{parentName:"p"},"SAUtils")," that converts various data sources into Nirvana's native binary format. The sub-commands ",(0,l.kt)("inlineCode",{parentName:"p"},"customvar")," and ",(0,l.kt)("inlineCode",{parentName:"p"},"customgene")," are used to specify a variant file or a gene file respectively."),(0,l.kt)("h3",{id:"convert-variant-file"},"Convert Variant File"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/SAUtils.dll customvar \\\n -r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n -i MyDataSource.tsv \\\n -o SupplementaryAnnotation\n")),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-r")," argument specifies the compressed reference path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input TSV path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output directory")),(0,l.kt)("h3",{id:"convert-gene-file"},"Convert Gene File"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/SAUtils.dll customgene \\\n --uga Nirvana_UGA.tsv \\\n -i MyDataSource.tsv \\\n -o SupplementaryAnnotation\n")),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"--uga")," argument specifies the Nirvana universal gene archive (UGA) path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input TSV path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output directory")),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Nirvana_UGA file")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"The Nirvana_UGA is not part of the official set of files retrieved using the Downloader utility. But it is available ",(0,l.kt)("a",{parentName:"p",href:"http://annotations.nirvana.illumina.com/ab0cf104f39708eabd07b8cb67e149ba-Cache/27/UGA.tsv.gz"},"here"),"."))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/0ffe09fc.319238df.js b/assets/js/0ffe09fc.319238df.js deleted file mode 100644 index 3a212a86..00000000 --- a/assets/js/0ffe09fc.319238df.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[852,9076],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>h});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),m=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},d=function(e){var t=m(e.components);return a.createElement(s.Provider,{value:t},e.children)},c="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,d=l(e,["components","mdxType","originalType","parentName"]),c=m(n),u=i,h=c["".concat(s,".").concat(u)]||c[u]||p[u]||r;return n?a.createElement(h,o(o({ref:t},d),{},{components:n})):a.createElement(h,o({ref:t},d))}));function h(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,o=new Array(r);o[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[c]="string"==typeof e?e:i,o[1]=l;for(var m=2;m{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>c,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={},o=void 0,l={unversionedId:"data-sources/cosmic-json",id:"version-3.18/data-sources/cosmic-json",title:"cosmic-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/cosmic-json.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/cosmic-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/cosmic-json.md",tags:[],version:"3.18",frontMatter:{}},s=[],m={toc:s},d="wrapper";function c(e){let{components:t,...n}=e;return(0,i.kt)(d,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},' "cosmicGeneFusions":[\n {\n "id":"COSF881",\n "numSamples":6,\n "geneSymbols":[\n "MYB",\n "NFIB"\n ],\n "hgvsr":"ENST00000341911.5(MYB):r.1_2368::ENST00000397581.2(NFIB):r.2592_3318",\n "histologies":[\n {\n "name":"adenoid cystic carcinoma",\n "numSamples":6\n }\n ],\n "sites":[\n {\n "name":"salivary gland (submandibular)",\n "numSamples":1\n },\n {\n "name":"salivary gland (parotid)",\n "numSamples":1\n },\n {\n "name":"salivary gland (nasal cavity)",\n "numSamples":1\n },\n {\n "name":"breast",\n "numSamples":3\n }\n ],\n "pubMedIds":[\n 19841262\n ]\n }\n ]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"id"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"COSMIC fusion ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"geneSymbols"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"5' gene & 3' gene")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hgvsr"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"HGVS RNA translocation fusion notation")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"histologies"),(0,i.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotypic descriptions")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"sites"),(0,i.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"tissue types")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Count")),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"name"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"description")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})))))}c.isMDXComponent=!0},40660:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>p,frontMatter:()=>o,metadata:()=>s,toc:()=>m});var a=n(87462),i=(n(67294),n(3905)),r=n(26203);const o={title:"COSMIC"},l=void 0,s={unversionedId:"data-sources/cosmic",id:"version-3.18/data-sources/cosmic",title:"COSMIC",description:"Overview",source:"@site/versioned_docs/version-3.18/data-sources/cosmic.mdx",sourceDirName:"data-sources",slug:"/data-sources/cosmic",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/cosmic",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/cosmic.mdx",tags:[],version:"3.18",frontMatter:{title:"COSMIC"},sidebar:"docs",previous:{title:"ClinVar",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/clinvar"},next:{title:"DANN",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/dann"}},m=[{value:"Overview",id:"overview",children:[],level:2},{value:"Gene Fusions",id:"gene-fusions",children:[{value:"TSV File",id:"tsv-file",children:[{value:"Example",id:"example",children:[],level:4},{value:"Parsing",id:"parsing",children:[],level:4},{value:"Aggregation",id:"aggregation",children:[],level:4},{value:"Fixing the HGVS RNA Notation",id:"fixing-the-hgvs-rna-notation",children:[],level:4},{value:"Aggregating Histologies",id:"aggregating-histologies",children:[],level:4},{value:"Aggregating Sites",id:"aggregating-sites",children:[],level:4}],level:3},{value:"Known Issues",id:"known-issues",children:[],level:3},{value:"Download URL",id:"download-url",children:[],level:3},{value:"JSON Output",id:"json-output",children:[],level:3}],level:2}],d={toc:m},c="wrapper";function p(e){let{components:t,...n}=e;return(0,i.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"COSMIC, the Catalogue of Somatic Mutations in Cancer, is the world's largest source of expert manually curated somatic mutation information relating to human cancers."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"John G Tate, Sally Bamford, Harry C Jubb, Zbyslaw Sondka, David M Beare, Nidhi Bindal, Harry Boutselakis, Charlotte G Cole, Celestino Creatore, Elisabeth Dawson, Peter Fish, Bhavana Harsha, Charlie Hathaway, Steve C Jupe, Chai Yin Kok, Kate Noble, Laura Ponting, Christopher C Ramshaw, Claire E Rye, Helen E Speedy, Ray Stefancsik, Sam L Thompson, Shicai Wang, Sari Ward, Peter J Campbell, Simon A Forbes. (2019) ",(0,i.kt)("a",{parentName:"p",href:"https://academic.oup.com/nar/article/47/D1/D941/5146192"},"COSMIC: the Catalogue Of Somatic Mutations In Cancer"),", ",(0,i.kt)("em",{parentName:"p"},"Nucleic Acids Research"),", Volume 47, Issue D1"))),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Licensed Content")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Commercial companies are required to ",(0,i.kt)("a",{parentName:"p",href:"https://cancer.sanger.ac.uk/cosmic/license"},"acquire a license from COSMIC"),". At the moment, this means that our COSMIC content is only available in Illumina's products and services, not in the open source distribution."),(0,i.kt)("p",{parentName:"div"},"Since many of you are academic users, we will enable a COSMIC login in our downloader later this year that will allow academic and commercial organizations (with a license) access our COSMIC data sources. "))),(0,i.kt)("h2",{id:"gene-fusions"},"Gene Fusions"),(0,i.kt)("p",null,"Gene fusions are manually curated from peer reviewed publications by expert COSMIC curators. A comprehensive literature curation is completed for each fusion pair when it is released in the database. Currently COSMIC includes information on fusions involved in solid tumours and leukaemias."),(0,i.kt)("h3",{id:"tsv-file"},"TSV File"),(0,i.kt)("h4",{id:"example"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"SAMPLE_ID SAMPLE_NAME PRIMARY_SITE SITE_SUBTYPE_1 SITE_SUBTYPE_2 SITE_SUBTYPE_3 PRIMARY_HISTOLOGY HISTOLOGY_SUBTYPE_1 HISTOLOGY_SUBTYPE_2 HISTOLOGY_SUBTYPE_3 FUSION_ID TRANSLOCATION_NAME 5'_CHROMOSOME 5'_STRAND 5'_GENE_ID 5'_GENE_NAME 5'_LAST_OBSERVED_EXON 5'_GENOME_START_FROM 5'_GENOME_START_TO 5'_GENOME_STOP_FROM 5'_GENOME_STOP_TO 3'_CHROMOSOME 3'_STRAND 3'_GENE_ID 3'_GENE_NAME 3'_FIRST_OBSERVED_EXON 3'_GENOME_START_FROM 3'_GENOME_START_TO 3'_GENOME_STOP_FROM 3'_GENOME_STOP_TO FUSION_TYPE PUBMED_PMID\n749711 HCC1187 breast NS NS NS carcinoma ductal_carcinoma NS NS 665 ENST00000360863.10(RGS22):r.1_3555_ENST00000369518.1(SYCP1):r.2100_3452 8 - 197199 RGS22 22 99981937 99981937 100106116 100106116 1 + 212470 SYCP1_ENST00000369518 24 114944339 114944339 114995367 114995367 Inferred Breakpoint 20033038\n")),(0,i.kt)("h4",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"From the TSV file, we're mainly interested in the following columns:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"SAMPLE_ID")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"PRIMARY_SITE")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"PRIMARY_HISTOLOGY")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"HISTOLOGY_SUBTYPE_1")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"FUSION_ID")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"TRANSLOCATION_NAME")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"PUBMED_PMID"))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"For all the histologies and sites, we replace all the underlines with spaces. ",(0,i.kt)("inlineCode",{parentName:"p"},"salivary_gland")," would become ",(0,i.kt)("inlineCode",{parentName:"p"},"salivary gland"),"."))),(0,i.kt)("h4",{id:"aggregation"},"Aggregation"),(0,i.kt)("p",null,"To create the gene fusion entries in Nirvana, we perform the following on each row in the TSV file:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"Group all entries by FUSION_ID"),(0,i.kt)("li",{parentName:"ul"},"Using all the entries related to this FUSION_ID:",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"Collect all the PubMed IDs"),(0,i.kt)("li",{parentName:"ul"},"Tally the number of observed sample IDs"),(0,i.kt)("li",{parentName:"ul"},"Grab the HGVS r. notation (should not change throughout the FUSION_ID)"),(0,i.kt)("li",{parentName:"ul"},"Tally the number of samples observed for each histology"),(0,i.kt)("li",{parentName:"ul"},"Tally the number of samples observed for each site"))),(0,i.kt)("li",{parentName:"ul"},"Extract the transcript IDs from the HGVS notation and lookup the associated gene symbols")),(0,i.kt)("h4",{id:"fixing-the-hgvs-rna-notation"},"Fixing the HGVS RNA Notation"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre"},"ENST00000360863.6(RGS22):r.1_3555_ENST00000369518.1(SYCP1):r.2100_3452\n")),(0,i.kt)("p",null,"There are some issues with the HGVS RNA notation:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"The two transcripts should be linked by a double colon ",(0,i.kt)("inlineCode",{parentName:"li"},"::"),"."),(0,i.kt)("li",{parentName:"ul"},"For coding transcripts, HGVS numbering should use CDS coordinates. Right now COSMIC is using cDNA coordinates for all their fusion"),(0,i.kt)("li",{parentName:"ul"},"If only the breakpoint is truly known, the recommendation is to use ",(0,i.kt)("inlineCode",{parentName:"li"},"?")," marks")),(0,i.kt)("p",null,"We chose to only update the linkage between each transcript using double colons ",(0,i.kt)("inlineCode",{parentName:"p"},"::"),". While we could have recalculated the HGVS notation using the supplied breakpoints, we chose not to because the resulting notation would be quite different from the original material. This would potentially lead to some confusion."),(0,i.kt)("h4",{id:"aggregating-histologies"},"Aggregating Histologies"),(0,i.kt)("p",null,"For histologies we want to capture the most specific description available. In the example above, we saw that the primary histology was ",(0,i.kt)("inlineCode",{parentName:"p"},"carcinoma"),", but the subtype was ",(0,i.kt)("inlineCode",{parentName:"p"},"ductal carcinoma"),". In this case we would use the subtype for the annotation."),(0,i.kt)("p",null,"COSMIC uses ",(0,i.kt)("inlineCode",{parentName:"p"},"NS")," to show that a value is empty. If the subtype is ",(0,i.kt)("inlineCode",{parentName:"p"},"NS"),", we will use the primary histology instead."),(0,i.kt)("h4",{id:"aggregating-sites"},"Aggregating Sites"),(0,i.kt)("p",null,"For sites, we observe that the subtype provides additional description but is still dependent on the primary site value. For example, the primary site might be ",(0,i.kt)("inlineCode",{parentName:"p"},"skin"),", but the subtype is ",(0,i.kt)("inlineCode",{parentName:"p"},"foot"),". Therefore, we will combine the values in the following manner: ",(0,i.kt)("inlineCode",{parentName:"p"},"skin (foot)"),"."),(0,i.kt)("h3",{id:"known-issues"},"Known Issues"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"There are some issues with the HGVS RNA notation:"),(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"The two transcripts should be linked by a double colon ",(0,i.kt)("inlineCode",{parentName:"li"},"::"),". We fixed this aspect in Nirvana."),(0,i.kt)("li",{parentName:"ul"},"For coding transcripts, HGVS numbering should use CDS coordinates. Right now COSMIC is using cDNA coordinates for all their fusions.")))),(0,i.kt)("h3",{id:"download-url"},"Download URL"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh37/cosmic/v94/CosmicFusionExport.tsv.gz"},"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh37/cosmic/v94/CosmicFusionExport.tsv.gz")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v94/CosmicFusionExport.tsv.gz"},"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v94/CosmicFusionExport.tsv.gz"))),(0,i.kt)("h3",{id:"json-output"},"JSON Output"),(0,i.kt)(r.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/1072b09c.4c2720ad.js b/assets/js/1072b09c.4c2720ad.js deleted file mode 100644 index c6f70827..00000000 --- a/assets/js/1072b09c.4c2720ad.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3389],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>g});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function o(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function i(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var s=r.createContext({}),p=function(t){var e=r.useContext(s),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},m=function(t){var e=p(t.components);return r.createElement(s.Provider,{value:e},t.children)},c="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},u=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,o=t.originalType,s=t.parentName,m=l(t,["components","mdxType","originalType","parentName"]),c=p(n),u=a,g=c["".concat(s,".").concat(u)]||c[u]||d[u]||o;return n?r.createElement(g,i(i({ref:e},m),{},{components:n})):r.createElement(g,i({ref:e},m))}));function g(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var o=n.length,i=new Array(o);i[0]=u;var l={};for(var s in e)hasOwnProperty.call(e,s)&&(l[s]=e[s]);l.originalType=t,l[c]="string"==typeof t?t:a,i[1]=l;for(var p=2;p{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>c,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var r=n(87462),a=(n(67294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/cosmic-json",id:"version-3.16/data-sources/cosmic-json",title:"cosmic-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/cosmic-json.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/cosmic-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/cosmic-json.md",tags:[],version:"3.16",frontMatter:{}},s=[],p={toc:s},m="wrapper";function c(t){let{components:e,...n}=t;return(0,a.kt)(m,(0,r.Z)({},p,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},' "cosmicGeneFusions":[\n {\n "id":"COSF881",\n "numSamples":6,\n "geneSymbols":[\n "MYB",\n "NFIB"\n ],\n "hgvsr":"ENST00000341911.5(MYB):r.1_2368::ENST00000397581.2(NFIB):r.2592_3318",\n "histologies":[\n {\n "name":"adenoid cystic carcinoma",\n "numSamples":6\n }\n ],\n "sites":[\n {\n "name":"salivary gland (submandibular)",\n "numSamples":1\n },\n {\n "name":"salivary gland (parotid)",\n "numSamples":1\n },\n {\n "name":"salivary gland (nasal cavity)",\n "numSamples":1\n },\n {\n "name":"breast",\n "numSamples":3\n }\n ],\n "pubMedIds":[\n 19841262\n ]\n }\n ]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"id"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"COSMIC fusion ID")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"geneSymbols"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"5' gene & 3' gene")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"hgvsr"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"HGVS RNA translocation fusion notation")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"histologies"),(0,a.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"phenotypic descriptions")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"sites"),(0,a.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"tissue types")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")))),(0,a.kt)("p",null,(0,a.kt)("strong",{parentName:"p"},"Count")),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"name"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"description")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"})))))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/10fd7dc0.fa7b7214.js b/assets/js/10fd7dc0.fa7b7214.js deleted file mode 100644 index b6daf783..00000000 --- a/assets/js/10fd7dc0.fa7b7214.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1987],{3905:(e,n,t)=>{t.d(n,{Zo:()=>p,kt:()=>h});var a=t(67294);function i(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function r(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function o(e){for(var n=1;n=0||(i[t]=e[t]);return i}(e,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(i[t]=e[t])}return i}var s=a.createContext({}),c=function(e){var n=a.useContext(s),t=n;return e&&(t="function"==typeof e?e(n):o(o({},n),e)),t},p=function(e){var n=c(e.components);return a.createElement(s.Provider,{value:n},e.children)},d="mdxType",u={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},m=a.forwardRef((function(e,n){var t=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),d=c(t),m=i,h=d["".concat(s,".").concat(m)]||d[m]||u[m]||r;return t?a.createElement(h,o(o({ref:n},p),{},{components:t})):a.createElement(h,o({ref:n},p))}));function h(e,n){var t=arguments,i=n&&n.mdxType;if("string"==typeof e||i){var r=t.length,o=new Array(r);o[0]=m;var l={};for(var s in n)hasOwnProperty.call(n,s)&&(l[s]=n[s]);l.originalType=e,l[d]="string"==typeof e?e:i,o[1]=l;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>o,default:()=>d,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var a=t(87462),i=(t(67294),t(3905));const r={title:"Gene Fusion Detection"},o=void 0,l={unversionedId:"core-functionality/gene-fusions",id:"version-3.2.5/core-functionality/gene-fusions",title:"Gene Fusion Detection",description:"Overview",source:"@site/versioned_docs/version-3.2.5/core-functionality/gene-fusions.md",sourceDirName:"core-functionality",slug:"/core-functionality/gene-fusions",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/core-functionality/gene-fusions",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/core-functionality/gene-fusions.md",tags:[],version:"3.2.5",frontMatter:{title:"Gene Fusion Detection"},sidebar:"version-3.2.5/docs",previous:{title:"Variant IDs",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/core-functionality/variant-ids"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Approach",id:"approach",children:[{value:"Variant Types",id:"variant-types",children:[],level:3},{value:"Criteria",id:"criteria",children:[],level:3}],level:2},{value:"ETV6/RUNX1 Example",id:"etv6runx1-example",children:[{value:"VCF",id:"vcf",children:[{value:"Interpreting translocation breakends",id:"interpreting-translocation-breakends",children:[],level:4},{value:"Visualization",id:"visualization",children:[],level:4}],level:3},{value:"JSON Output",id:"json-output",children:[{value:"Consequences",id:"consequences",children:[],level:4},{value:"Introns & Exons",id:"introns--exons",children:[],level:4},{value:"HGVS coding notation",id:"hgvs-coding-notation",children:[],level:4}],level:3}],level:2}],c={toc:s},p="wrapper";function d(e){let{components:n,...r}=e;return(0,i.kt)(p,(0,a.Z)({},c,r,{components:n,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"Gene fusions often result from large genomic rearrangements such as structural variants. While WGS secondary analysis pipelines typically contain alignment and variant calling stages, very few of them contain dedicated gene fusion callers. When they are included, they are usually associated with RNA-Seq pipelines where gene fusions can be readily observed."),(0,i.kt)("p",null,"Since gene fusions are frequently observed in cancer and since many sequencing experiments do not include paired RNA-Seq data, we have added gene fusion detection and annotation to Nirvana."),(0,i.kt)("p",null,"The rich diversity in gene fusion architectures and their likely mechanisms can be seen below:"),(0,i.kt)("p",null,(0,i.kt)("img",{src:t(32596).Z})),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Kumar-Sinha, C., Kalyana-Sundaram, S. & Chinnaiyan, A.M. ",(0,i.kt)("a",{parentName:"p",href:"https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-015-0252-1"},"Landscape of gene fusions in epithelial cancers: seq and ye shall find"),". Genome Med 7, 129 (2015)"))),(0,i.kt)("h2",{id:"approach"},"Approach"),(0,i.kt)("p",null,"Nirvana uses structural variant calls to evaluate if they form either putative intra-chromosomal or inter-chromosomal gene fusions. "),(0,i.kt)("p",null,"For each originating transcript, we report the following:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"originating intron or exon number"),(0,i.kt)("li",{parentName:"ul"},"for each partner transcript fused with the originating transcript, we report:",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"HGVS coding notation"),(0,i.kt)("li",{parentName:"ul"},"partner intron or exon number")))),(0,i.kt)("h3",{id:"variant-types"},"Variant Types"),(0,i.kt)("p",null,"Specifically we can identify gene fusions from the following structural variant types:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"deletions (",(0,i.kt)("inlineCode",{parentName:"li"},""),")"),(0,i.kt)("li",{parentName:"ul"},"tandem_duplications (",(0,i.kt)("inlineCode",{parentName:"li"},""),")"),(0,i.kt)("li",{parentName:"ul"},"inversions (",(0,i.kt)("inlineCode",{parentName:"li"},""),")"),(0,i.kt)("li",{parentName:"ul"},"translocation breakpoints (",(0,i.kt)("inlineCode",{parentName:"li"},"AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911["),") ")),(0,i.kt)("h3",{id:"criteria"},"Criteria"),(0,i.kt)("p",null,"The following criteria must be met for Nirvana to identify a gene fusion:"),(0,i.kt)("ol",null,(0,i.kt)("li",{parentName:"ol"},"Both transcripts must possess a coding region"),(0,i.kt)("li",{parentName:"ol"},"After accounting for genomic rearrangements, both transcripts must have the same orientation"),(0,i.kt)("li",{parentName:"ol"},"Both transcripts must be from the same transcript source (i.e. we won't mix and match between RefSeq and Ensembl transcripts)"),(0,i.kt)("li",{parentName:"ol"},"Both transcripts must belong to different genes"),(0,i.kt)("li",{parentName:"ol"},"Both transcripts cannot have a coding region that already overlaps without the variant (i.e. in cases where two genes naturally overlap, we don't want to call a gene fusion)"),(0,i.kt)("li",{parentName:"ol"},"The coding regions from the two genes must overlap\n:::")),(0,i.kt)("h2",{id:"etv6runx1-example"},"ETV6/RUNX1 Example"),(0,i.kt)("p",null,"ETV6/RUNX1 is the most common gene fusion in childhood B-cell precursor acute lymphoblastic leukemia (ALL). Patients with this translocation are associated with a good prognosis and excellent response to treatment."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Sun C., Chang L., Zhu X. ",(0,i.kt)("a",{parentName:"p",href:"https://www.oncotarget.com/article/16367/text/"},"Pathogenesis of ETV6/RUNX1-positive childhood acute lymphoblastic leukemia and mechanisms underlying its relapse"),". Oncotarget. 2017; 8: 35445-35459"))),(0,i.kt)("h3",{id:"vcf"},"VCF"),(0,i.kt)("p",null,"Here's a simplified representation of the translocation breakends called by the Manta structural variant caller:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\nchr12 12026270 . C [chr21:36420865[C . PASS SVTYPE=BND\nchr12 12026305 . A A]chr21:36420571] . PASS SVTYPE=BND\nchr21 36420571 . C C]chr12:12026305] . PASS SVTYPE=BND\nchr21 36420865 . C [chr12:12026270[C . PASS SVTYPE=BND\n")),(0,i.kt)("h4",{id:"interpreting-translocation-breakends"},"Interpreting translocation breakends"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"REF"),(0,i.kt)("th",{parentName:"tr",align:"left"},"ALT"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Meaning"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"t[p["),(0,i.kt)("td",{parentName:"tr",align:"left"},"piece extending to the right of p is joined after t")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"t]p]"),(0,i.kt)("td",{parentName:"tr",align:"left"},"reverse comp piece extending left of p is joined after t")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"]p]t"),(0,i.kt)("td",{parentName:"tr",align:"left"},"piece extending to the left of p is joined before t")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"[p[t"),(0,i.kt)("td",{parentName:"tr",align:"left"},"reverse comp piece extending right of p is joined before t")))),(0,i.kt)("h4",{id:"visualization"},"Visualization"),(0,i.kt)("p",null,(0,i.kt)("img",{src:t(28267).Z})),(0,i.kt)("h3",{id:"json-output"},"JSON Output"),(0,i.kt)("p",null,"The annotation for the first variant in the VCF looks like this:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{66,68-100,113,115-123}","{66,68-100,113,115-123}":!0},' {\n "chromosome": "chr12",\n "position": 12026270,\n "refAllele": "C",\n "altAlleles": [\n "[chr21:36420865[C"\n ],\n "filters": [\n "PASS"\n ],\n "cytogeneticBand": "12p13.2",\n "clingen": [\n {\n "chromosome": "12",\n "begin": 173786,\n "end": 34835837,\n "variantType": "copy_number_gain",\n "id": "nsv995956",\n "clinicalInterpretation": "pathogenic",\n "phenotypes": [\n "Decreased calvarial ossification",\n "Delayed gross motor development",\n "Feeding difficulties",\n "Frontal bossing",\n "Morphological abnormality of the central nervous system",\n "Patchy alopecia"\n ],\n "phenotypeIds": [\n "HP:0002007",\n "HP:0002011",\n "HP:0002194",\n "HP:0002232",\n "HP:0005474",\n "HP:0011968",\n "MedGen:C0232466",\n "MedGen:C1862862",\n "MedGen:CN001816",\n "MedGen:CN001820",\n "MedGen:CN001989",\n "MedGen:CN004852"\n ],\n "observedGains": 1,\n "validated": true\n }\n ],\n "variants": [\n {\n "vid": "12-12026270-C-[chr21:36420865[C",\n "chromosome": "chr12",\n "begin": 12026270,\n "end": 12026270,\n "isStructuralVariant": true,\n "refAllele": "C",\n "altAllele": "[chr21:36420865[C",\n "variantType": "translocation_breakend",\n "transcripts": [\n {\n "transcript": "ENST00000396373.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "introns": "5/7",\n "geneId": "ENSG00000139083",\n "hgnc": "ETV6",\n "consequence": [\n "transcript_variant",\n "unidirectional_gene_fusion"\n ],\n "geneFusion": {\n "intron": 5,\n "fusions": [\n {\n "hgvsc": "RUNX1{ENST00000437180.1}:c.1_58+274_ETV6{ENST00000396373.4}:c.1009+3367_1359",\n "intron": 2\n },\n {\n "hgvsc": "RUNX1{ENST00000300305.3}:c.1_58+274_ETV6{ENST00000396373.4}:c.1009+3367_1359",\n "intron": 1\n },\n {\n "hgvsc": "RUNX1{ENST00000482318.1}:c.1_58+274_ETV6{ENST00000396373.4}:c.1009+3367_1359",\n "intron": 2\n },\n {\n "hgvsc": "RUNX1{ENST00000486278.2}:c.?_156195_ETV6{ENST00000396373.4}:c.1009+3367_1359",\n "intron": 2\n },\n {\n "hgvsc": "RUNX1{ENST00000455571.1}:c.1_58+274_ETV6{ENST00000396373.4}:c.1009+3367_1359",\n "intron": 2\n },\n {\n "hgvsc": "RUNX1{ENST00000475045.2}:c.1_58+274_ETV6{ENST00000396373.4}:c.1009+3367_1359",\n "intron": 11\n },\n {\n "hgvsc": "RUNX1{ENST00000416754.1}:c.1_58+274_ETV6{ENST00000396373.4}:c.1009+3367_1359",\n "intron": 2\n }\n ]\n },\n "isCanonical": true,\n "proteinId": "ENSP00000379658.3"\n },\n {\n "transcript": "NM_001987.4",\n "source": "RefSeq",\n "bioType": "protein_coding",\n "introns": "5/7",\n "geneId": "2120",\n "hgnc": "ETV6",\n "consequence": [\n "transcript_variant",\n "unidirectional_gene_fusion"\n ],\n "geneFusion": {\n "intron": 5,\n "fusions": [\n {\n "hgvsc": "RUNX1{NM_001754.4}:c.1_58+274_ETV6{NM_001987.4}:c.1009+3367_1359",\n "intron": 2\n }\n ]\n },\n "isCanonical": true,\n "proteinId": "NP_001978.1"\n }\n ]\n }\n ]\n }\n')),(0,i.kt)("h4",{id:"consequences"},"Consequences"),(0,i.kt)("p",null,"When a gene fusion is identified, we add the following Sequence Ontology consequence:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{3}","{3}":!0},' "consequence": [\n "transcript_variant",\n "unidirectional_gene_fusion"\n ],\n')),(0,i.kt)("h4",{id:"introns--exons"},"Introns & Exons"),(0,i.kt)("p",null,"In this section we describe all the pairwise gene fusions that obey the criteria outlined above. In the case of ",(0,i.kt)("inlineCode",{parentName:"p"},"ENST00000396373.4"),", there 7 other Ensembl transcripts that would produce a gene fusion. For ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001987.4"),", there was only one transcript (",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001754.4"),") that produce a gene fusion."),(0,i.kt)("p",null,"In each case, Nirvana outputs which intron or exon contained the breakpoint in both of the transcripts that form the gene fusion."),(0,i.kt)("h4",{id:"hgvs-coding-notation"},"HGVS coding notation"),(0,i.kt)("p",null,"Finally, Nirvana also describes the gene fusion using HGVS c. notation:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{3}","{3}":!0},' "fusions": [\n {\n "hgvsc": "RUNX1{NM_001754.4}:c.1_58+274_ETV6{NM_001987.4}:c.1009+3367_1359",\n "intron": 2\n }\n')),(0,i.kt)("p",null,"This means that gene fusion uses CDS positions 1-58 from ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001754.4")," (RUNX1) and CDS positions 1009-1359 from ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001987.4")," (ETV6). ",(0,i.kt)("inlineCode",{parentName:"p"},"1009+3367")," indicates that the fusion occurred 3367 bp within intron 2."))}d.isMDXComponent=!0},28267:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/etv6-runx1-fusion-ec8f4312c9aca496bde0d6e2b1bbd50d.svg"},32596:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/gene-fusions-fig2-1cce8ac31b00465c8d36bdc47ec3309e.svg"}}]); \ No newline at end of file diff --git a/assets/js/110895d8.783e449a.js b/assets/js/110895d8.783e449a.js deleted file mode 100644 index 84f98361..00000000 --- a/assets/js/110895d8.783e449a.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5062],{3905:(e,t,r)=>{r.d(t,{Zo:()=>p,kt:()=>f});var n=r(67294);function a(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function o(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function c(e){for(var t=1;t=0||(a[r]=e[r]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(a[r]=e[r])}return a}var i=n.createContext({}),s=function(e){var t=n.useContext(i),r=t;return e&&(r="function"==typeof e?e(t):c(c({},t),e)),r},p=function(e){var t=s(e.components);return n.createElement(i.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},d=n.forwardRef((function(e,t){var r=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),u=s(r),d=a,f=u["".concat(i,".").concat(d)]||u[d]||m[d]||o;return r?n.createElement(f,c(c({ref:t},p),{},{components:r})):n.createElement(f,c({ref:t},p))}));function f(e,t){var r=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=r.length,c=new Array(o);c[0]=d;var l={};for(var i in t)hasOwnProperty.call(t,i)&&(l[i]=t[i]);l.originalType=e,l[u]="string"==typeof e?e:a,c[1]=l;for(var s=2;s{r.r(t),r.d(t,{contentTitle:()=>c,default:()=>u,frontMatter:()=>o,metadata:()=>l,toc:()=>i});var n=r(87462),a=(r(67294),r(3905));const o={},c=void 0,l={unversionedId:"data-sources/revel-json",id:"version-3.14/data-sources/revel-json",title:"revel-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/revel-json.md",sourceDirName:"data-sources",slug:"/data-sources/revel-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/revel-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/revel-json.md",tags:[],version:"3.14",frontMatter:{}},i=[],s={toc:i},p="wrapper";function u(e){let{components:t,...r}=e;return(0,a.kt)(p,(0,n.Z)({},s,r,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"revel":{ \n "score":0.027\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"score"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1.0")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/126a7ea2.6ebca69b.js b/assets/js/126a7ea2.6ebca69b.js deleted file mode 100644 index 8f74e1e6..00000000 --- a/assets/js/126a7ea2.6ebca69b.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8001],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function o(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var c=r.createContext({}),u=function(e){var t=r.useContext(c),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},p=function(e){var t=u(e.components);return r.createElement(c.Provider,{value:t},e.children)},s="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,l=e.originalType,c=e.parentName,p=i(e,["components","mdxType","originalType","parentName"]),s=u(n),d=a,f=s["".concat(c,".").concat(d)]||s[d]||m[d]||l;return n?r.createElement(f,o(o({ref:t},p),{},{components:n})):r.createElement(f,o({ref:t},p))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var l=n.length,o=new Array(l);o[0]=d;var i={};for(var c in t)hasOwnProperty.call(t,c)&&(i[c]=t[c]);i.originalType=e,i[s]="string"==typeof e?e:a,o[1]=i;for(var u=2;u{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>s,frontMatter:()=>l,metadata:()=>i,toc:()=>c});var r=n(87462),a=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/gme-json",id:"version-3.18/data-sources/gme-json",title:"gme-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/gme-json.md",sourceDirName:"data-sources",slug:"/data-sources/gme-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gme-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/gme-json.md",tags:[],version:"3.18",frontMatter:{}},c=[],u={toc:c},p="wrapper";function s(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"gmeVariome":{\n "allAc":10,\n "allAn":202,\n "allAf":0.049504,\n "failedFilter":true\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAc"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"GME allele count")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAn"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"GME allele number")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAf"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"GME allele frequency")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,a.kt)("td",{parentName:"tr",align:null},"bool"),(0,a.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/1426.23516bfc.js b/assets/js/1426.23516bfc.js deleted file mode 100644 index 9397c142..00000000 --- a/assets/js/1426.23516bfc.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1426],{61426:(e,t,r)=>{function n(e,t){var r=void 0;return function(){for(var n=arguments.length,o=new Array(n),i=0;ipn});var a=function(){};function c(e){var t=e.item,r=e.items;return{index:t.__autocomplete_indexName,items:[t],positions:[1+r.findIndex((function(e){return e.objectID===t.objectID}))],queryID:t.__autocomplete_queryID,algoliaSource:["autocomplete"]}}function l(e,t){return function(e){if(Array.isArray(e))return e}(e)||function(e,t){var r=null==e?null:"undefined"!=typeof Symbol&&e[Symbol.iterator]||e["@@iterator"];if(null!=r){var n,o,i,a,c=[],l=!0,u=!1;try{if(i=(r=r.call(e)).next,0===t){if(Object(r)!==r)return;l=!1}else for(;!(l=(n=i.call(r)).done)&&(c.push(n.value),c.length!==t);l=!0);}catch(s){u=!0,o=s}finally{try{if(!l&&null!=r.return&&(a=r.return(),Object(a)!==a))return}finally{if(u)throw o}}return c}}(e,t)||function(e,t){if(!e)return;if("string"==typeof e)return u(e,t);var r=Object.prototype.toString.call(e).slice(8,-1);"Object"===r&&e.constructor&&(r=e.constructor.name);if("Map"===r||"Set"===r)return Array.from(e);if("Arguments"===r||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(r))return u(e,t)}(e,t)||function(){throw new TypeError("Invalid attempt to destructure non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}()}function u(e,t){(null==t||t>e.length)&&(t=e.length);for(var r=0,n=new Array(t);re.length)&&(t=e.length);for(var r=0,n=new Array(t);r=0||(o[r]=e[r]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(o[r]=e[r])}return o}function y(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function h(e){for(var t=1;t=3||2===r&&n>=4||1===r&&n>=10);function i(t,r,n){if(o&&void 0!==n){var i=n[0].__autocomplete_algoliaCredentials,a={"X-Algolia-Application-Id":i.appId,"X-Algolia-API-Key":i.apiKey};e.apply(void 0,[t].concat(p(r),[{headers:a}]))}else e.apply(void 0,[t].concat(p(r)))}return{init:function(t,r){e("init",{appId:t,apiKey:r})},setUserToken:function(t){e("setUserToken",t)},clickedObjectIDsAfterSearch:function(){for(var e=arguments.length,t=new Array(e),r=0;r0&&i("clickedObjectIDsAfterSearch",g(t),t[0].items)},clickedObjectIDs:function(){for(var e=arguments.length,t=new Array(e),r=0;r0&&i("clickedObjectIDs",g(t),t[0].items)},clickedFilters:function(){for(var t=arguments.length,r=new Array(t),n=0;n0&&e.apply(void 0,["clickedFilters"].concat(r))},convertedObjectIDsAfterSearch:function(){for(var e=arguments.length,t=new Array(e),r=0;r0&&i("convertedObjectIDsAfterSearch",g(t),t[0].items)},convertedObjectIDs:function(){for(var e=arguments.length,t=new Array(e),r=0;r0&&i("convertedObjectIDs",g(t),t[0].items)},convertedFilters:function(){for(var t=arguments.length,r=new Array(t),n=0;n0&&e.apply(void 0,["convertedFilters"].concat(r))},viewedObjectIDs:function(){for(var e=arguments.length,t=new Array(e),r=0;r0&&t.reduce((function(e,t){var r=t.items,n=d(t,f);return[].concat(p(e),p(function(e){for(var t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:20,r=[],n=0;n0&&e.apply(void 0,["viewedFilters"].concat(r))}}}function S(e){var t=e.items.reduce((function(e,t){var r;return e[t.__autocomplete_indexName]=(null!==(r=e[t.__autocomplete_indexName])&&void 0!==r?r:[]).concat(t),e}),{});return Object.keys(t).map((function(e){return{index:e,items:t[e],algoliaSource:["autocomplete"]}}))}function j(e){return e.objectID&&e.__autocomplete_indexName&&e.__autocomplete_queryID}function w(e){return w="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},w(e)}function E(e){return function(e){if(Array.isArray(e))return P(e)}(e)||function(e){if("undefined"!=typeof Symbol&&null!=e[Symbol.iterator]||null!=e["@@iterator"])return Array.from(e)}(e)||function(e,t){if(!e)return;if("string"==typeof e)return P(e,t);var r=Object.prototype.toString.call(e).slice(8,-1);"Object"===r&&e.constructor&&(r=e.constructor.name);if("Map"===r||"Set"===r)return Array.from(e);if("Arguments"===r||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(r))return P(e,t)}(e)||function(){throw new TypeError("Invalid attempt to spread non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}()}function P(e,t){(null==t||t>e.length)&&(t=e.length);for(var r=0,n=new Array(t);r0&&C({onItemsChange:o,items:r,insights:f,state:t}))}}),0);return{name:"aa.algoliaInsightsPlugin",subscribe:function(e){var t=e.setContext,r=e.onSelect,n=e.onActive;s("addAlgoliaAgent","insights-plugin"),t({algoliaInsightsPlugin:{__algoliaSearchParameters:{clickAnalytics:!0},insights:f}}),r((function(e){var t=e.item,r=e.state,n=e.event;j(t)&&l({state:r,event:n,insights:f,item:t,insightsEvents:[D({eventName:"Item Selected"},c({item:t,items:m.current}))]})})),n((function(e){var t=e.item,r=e.state,n=e.event;j(t)&&u({state:r,event:n,insights:f,item:t,insightsEvents:[D({eventName:"Item Active"},c({item:t,items:m.current}))]})}))},onStateChange:function(e){var t=e.state;p({state:t})},__autocomplete_pluginOptions:e}}function N(e){return N="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},N(e)}function T(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function q(e,t,r){return(t=function(e){var t=function(e,t){if("object"!==N(e)||null===e)return e;var r=e[Symbol.toPrimitive];if(void 0!==r){var n=r.call(e,t||"default");if("object"!==N(n))return n;throw new TypeError("@@toPrimitive must return a primitive value.")}return("string"===t?String:Number)(e)}(e,"string");return"symbol"===N(t)?t:String(t)}(t))in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function R(e,t,r){var n,o=t.initialState;return{getState:function(){return o},dispatch:function(n,i){var a=function(e){for(var t=1;te.length)&&(t=e.length);for(var r=0,n=new Array(t);r0},reshape:function(e){return e.sources}},e),{},{id:null!==(r=e.id)&&void 0!==r?r:"autocomplete-".concat(V++),plugins:o,initialState:X({activeItemId:null,query:"",completion:null,collections:[],isOpen:!1,status:"idle",context:{}},e.initialState),onStateChange:function(t){var r;null===(r=e.onStateChange)||void 0===r||r.call(e,t),o.forEach((function(e){var r;return null===(r=e.onStateChange)||void 0===r?void 0:r.call(e,t)}))},onSubmit:function(t){var r;null===(r=e.onSubmit)||void 0===r||r.call(e,t),o.forEach((function(e){var r;return null===(r=e.onSubmit)||void 0===r?void 0:r.call(e,t)}))},onReset:function(t){var r;null===(r=e.onReset)||void 0===r||r.call(e,t),o.forEach((function(e){var r;return null===(r=e.onReset)||void 0===r?void 0:r.call(e,t)}))},getSources:function(r){return Promise.all([].concat(Q(o.map((function(e){return e.getSources}))),[e.getSources]).filter(Boolean).map((function(e){return function(e,t){var r=[];return Promise.resolve(e(t)).then((function(e){return Array.isArray(e),Promise.all(e.filter((function(e){return Boolean(e)})).map((function(e){if(e.sourceId,r.includes(e.sourceId))throw new Error("[Autocomplete] The `sourceId` ".concat(JSON.stringify(e.sourceId)," is not unique."));r.push(e.sourceId);var t={getItemInputValue:function(e){return e.state.query},getItemUrl:function(){},onSelect:function(e){(0,e.setIsOpen)(!1)},onActive:a,onResolve:a};Object.keys(t).forEach((function(e){t[e].__default=!0}));var n=$($({},t),e);return Promise.resolve(n)})))}))}(e,r)}))).then((function(e){return L(e)})).then((function(e){return e.map((function(e){return X(X({},e),{},{onSelect:function(r){e.onSelect(r),t.forEach((function(e){var t;return null===(t=e.onSelect)||void 0===t?void 0:t.call(e,r)}))},onActive:function(r){e.onActive(r),t.forEach((function(e){var t;return null===(t=e.onActive)||void 0===t?void 0:t.call(e,r)}))},onResolve:function(r){e.onResolve(r),t.forEach((function(e){var t;return null===(t=e.onResolve)||void 0===t?void 0:t.call(e,r)}))}})}))}))},navigator:X({navigate:function(e){var t=e.itemUrl;n.location.assign(t)},navigateNewTab:function(e){var t=e.itemUrl,r=n.open(t,"_blank","noopener");null==r||r.focus()},navigateNewWindow:function(e){var t=e.itemUrl;n.open(t,"_blank","noopener")}},e.navigator)})}function te(e){return te="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},te(e)}function re(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function ne(e){for(var t=1;te.length)&&(t=e.length);for(var r=0,n=new Array(t);r=0||(o[r]=e[r]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(o[r]=e[r])}return o}var Ie,De,Ae,ke=null,xe=(Ie=-1,De=-1,Ae=void 0,function(e){var t=++Ie;return Promise.resolve(e).then((function(e){return Ae&&t=0||(o[r]=e[r]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(o[r]=e[r])}return o}var Me=/((gt|sm)-|galaxy nexus)|samsung[- ]|samsungbrowser/i;function He(e){return He="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},He(e)}var Fe=["props","refresh","store"],Ue=["inputElement","formElement","panelElement"],Be=["inputElement"],Ve=["inputElement","maxLength"],Ke=["sourceIndex"],$e=["sourceIndex"],Je=["item","source","sourceIndex"];function ze(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function We(e){for(var t=1;t=0||(o[r]=e[r]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(o[r]=e[r])}return o}function Ge(e){var t=e.props,r=e.refresh,n=e.store,o=Ze(e,Fe),i=function(e,t){return void 0!==t?"".concat(e,"-").concat(t):e};return{getEnvironmentProps:function(e){var r=e.inputElement,o=e.formElement,i=e.panelElement;function a(e){!n.getState().isOpen&&n.pendingRequests.isEmpty()||e.target===r||!1===[o,i].some((function(t){return r=t,n=e.target,r===n||r.contains(n);var r,n}))&&(n.dispatch("blur",null),t.debug||n.pendingRequests.cancelAll())}return We({onTouchStart:a,onMouseDown:a,onTouchMove:function(e){!1!==n.getState().isOpen&&r===t.environment.document.activeElement&&e.target!==r&&r.blur()}},Ze(e,Ue))},getRootProps:function(e){return We({role:"combobox","aria-expanded":n.getState().isOpen,"aria-haspopup":"listbox","aria-owns":n.getState().isOpen?"".concat(t.id,"-list"):void 0,"aria-labelledby":"".concat(t.id,"-label")},e)},getFormProps:function(e){e.inputElement;return We({action:"",noValidate:!0,role:"search",onSubmit:function(i){var a;i.preventDefault(),t.onSubmit(We({event:i,refresh:r,state:n.getState()},o)),n.dispatch("submit",null),null===(a=e.inputElement)||void 0===a||a.blur()},onReset:function(i){var a;i.preventDefault(),t.onReset(We({event:i,refresh:r,state:n.getState()},o)),n.dispatch("reset",null),null===(a=e.inputElement)||void 0===a||a.focus()}},Ze(e,Be))},getLabelProps:function(e){var r=e||{},n=r.sourceIndex,o=Ze(r,Ke);return We({htmlFor:"".concat(i(t.id,n),"-input"),id:"".concat(i(t.id,n),"-label")},o)},getInputProps:function(e){var i;function c(e){(t.openOnFocus||Boolean(n.getState().query))&&Ce(We({event:e,props:t,query:n.getState().completion||n.getState().query,refresh:r,store:n},o)),n.dispatch("focus",null)}var l=e||{},u=(l.inputElement,l.maxLength),s=void 0===u?512:u,f=Ze(l,Ve),m=ge(n.getState()),p=function(e){return Boolean(e&&e.match(Me))}((null===(i=t.environment.navigator)||void 0===i?void 0:i.userAgent)||""),v=null!=m&&m.itemUrl&&!p?"go":"search";return We({"aria-autocomplete":"both","aria-activedescendant":n.getState().isOpen&&null!==n.getState().activeItemId?"".concat(t.id,"-item-").concat(n.getState().activeItemId):void 0,"aria-controls":n.getState().isOpen?"".concat(t.id,"-list"):void 0,"aria-labelledby":"".concat(t.id,"-label"),value:n.getState().completion||n.getState().query,id:"".concat(t.id,"-input"),autoComplete:"off",autoCorrect:"off",autoCapitalize:"off",enterKeyHint:v,spellCheck:"false",autoFocus:t.autoFocus,placeholder:t.placeholder,maxLength:s,type:"search",onChange:function(e){Ce(We({event:e,props:t,query:e.currentTarget.value.slice(0,s),refresh:r,store:n},o))},onKeyDown:function(e){!function(e){var t=e.event,r=e.props,n=e.refresh,o=e.store,i=Le(e,Ne);if("ArrowUp"===t.key||"ArrowDown"===t.key){var a=function(){var e=r.environment.document.getElementById("".concat(r.id,"-item-").concat(o.getState().activeItemId));e&&(e.scrollIntoViewIfNeeded?e.scrollIntoViewIfNeeded(!1):e.scrollIntoView(!1))},c=function(){var e=ge(o.getState());if(null!==o.getState().activeItemId&&e){var r=e.item,a=e.itemInputValue,c=e.itemUrl,l=e.source;l.onActive(qe({event:t,item:r,itemInputValue:a,itemUrl:c,refresh:n,source:l,state:o.getState()},i))}};t.preventDefault(),!1===o.getState().isOpen&&(r.openOnFocus||Boolean(o.getState().query))?Ce(qe({event:t,props:r,query:o.getState().query,refresh:n,store:o},i)).then((function(){o.dispatch(t.key,{nextActiveItemId:r.defaultActiveItemId}),c(),setTimeout(a,0)})):(o.dispatch(t.key,{}),c(),a())}else if("Escape"===t.key)t.preventDefault(),o.dispatch(t.key,null),o.pendingRequests.cancelAll();else if("Tab"===t.key)o.dispatch("blur",null),o.pendingRequests.cancelAll();else if("Enter"===t.key){if(null===o.getState().activeItemId||o.getState().collections.every((function(e){return 0===e.items.length})))return void(r.debug||o.pendingRequests.cancelAll());t.preventDefault();var l=ge(o.getState()),u=l.item,s=l.itemInputValue,f=l.itemUrl,m=l.source;if(t.metaKey||t.ctrlKey)void 0!==f&&(m.onSelect(qe({event:t,item:u,itemInputValue:s,itemUrl:f,refresh:n,source:m,state:o.getState()},i)),r.navigator.navigateNewTab({itemUrl:f,item:u,state:o.getState()}));else if(t.shiftKey)void 0!==f&&(m.onSelect(qe({event:t,item:u,itemInputValue:s,itemUrl:f,refresh:n,source:m,state:o.getState()},i)),r.navigator.navigateNewWindow({itemUrl:f,item:u,state:o.getState()}));else if(t.altKey);else{if(void 0!==f)return m.onSelect(qe({event:t,item:u,itemInputValue:s,itemUrl:f,refresh:n,source:m,state:o.getState()},i)),void r.navigator.navigate({itemUrl:f,item:u,state:o.getState()});Ce(qe({event:t,nextState:{isOpen:!1},props:r,query:s,refresh:n,store:o},i)).then((function(){m.onSelect(qe({event:t,item:u,itemInputValue:s,itemUrl:f,refresh:n,source:m,state:o.getState()},i))}))}}}(We({event:e,props:t,refresh:r,store:n},o))},onFocus:c,onBlur:a,onClick:function(r){e.inputElement!==t.environment.document.activeElement||n.getState().isOpen||c(r)}},f)},getPanelProps:function(e){return We({onMouseDown:function(e){e.preventDefault()},onMouseLeave:function(){n.dispatch("mouseleave",null)}},e)},getListProps:function(e){var r=e||{},n=r.sourceIndex,o=Ze(r,$e);return We({role:"listbox","aria-labelledby":"".concat(i(t.id,n),"-label"),id:"".concat(i(t.id,n),"-list")},o)},getItemProps:function(e){var a=e.item,c=e.source,l=e.sourceIndex,u=Ze(e,Je);return We({id:"".concat(i(t.id,l),"-item-").concat(a.__autocomplete_id),role:"option","aria-selected":n.getState().activeItemId===a.__autocomplete_id,onMouseMove:function(e){if(a.__autocomplete_id!==n.getState().activeItemId){n.dispatch("mousemove",a.__autocomplete_id);var t=ge(n.getState());if(null!==n.getState().activeItemId&&t){var i=t.item,c=t.itemInputValue,l=t.itemUrl,u=t.source;u.onActive(We({event:e,item:i,itemInputValue:c,itemUrl:l,refresh:r,source:u,state:n.getState()},o))}}},onMouseDown:function(e){e.preventDefault()},onClick:function(e){var i=c.getItemInputValue({item:a,state:n.getState()}),l=c.getItemUrl({item:a,state:n.getState()});(l?Promise.resolve():Ce(We({event:e,nextState:{isOpen:!1},props:t,query:i,refresh:r,store:n},o))).then((function(){c.onSelect(We({event:e,item:a,itemInputValue:i,itemUrl:l,refresh:r,source:c,state:n.getState()},o))}))}},u)}}}var Xe=[{segment:"autocomplete-core",version:"1.9.3"}];function Ye(e){return Ye="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},Ye(e)}function et(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function tt(e){for(var t=1;t=r?null===n?null:0:o}function at(e){return at="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},at(e)}function ct(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function lt(e){for(var t=1;te.length)&&(t=e.length);for(var r=0,n=new Array(t);r=0||(o[r]=e[r]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(o[r]=e[r])}return o}function kt(e){var t=e.translations,r=void 0===t?{}:t,n=At(e,Pt),o=r.noResultsText,i=void 0===o?"No results for":o,a=r.suggestedQueryText,c=void 0===a?"Try searching for":a,l=r.reportMissingResultsText,u=void 0===l?"Believe this query should return results?":l,s=r.reportMissingResultsLinkText,f=void 0===s?"Let us know.":s,m=n.state.context.searchSuggestions;return yt.createElement("div",{className:"DocSearch-NoResults"},yt.createElement("div",{className:"DocSearch-Screen-Icon"},yt.createElement(Et,null)),yt.createElement("p",{className:"DocSearch-Title"},i,' "',yt.createElement("strong",null,n.state.query),'"'),m&&m.length>0&&yt.createElement("div",{className:"DocSearch-NoResults-Prefill-List"},yt.createElement("p",{className:"DocSearch-Help"},c,":"),yt.createElement("ul",null,m.slice(0,3).reduce((function(e,t){return[].concat(It(e),[yt.createElement("li",{key:t},yt.createElement("button",{className:"DocSearch-Prefill",key:t,type:"button",onClick:function(){n.setQuery(t.toLowerCase()+" "),n.refresh(),n.inputRef.current.focus()}},t))])}),[]))),n.getMissingResultsUrl&&yt.createElement("p",{className:"DocSearch-Help"},"".concat(u," "),yt.createElement("a",{href:n.getMissingResultsUrl({query:n.state.query}),target:"_blank",rel:"noopener noreferrer"},f)))}var xt=function(){return yt.createElement("svg",{width:"20",height:"20",viewBox:"0 0 20 20"},yt.createElement("path",{d:"M17 6v12c0 .52-.2 1-1 1H4c-.7 0-1-.33-1-1V2c0-.55.42-1 1-1h8l5 5zM14 8h-3.13c-.51 0-.87-.34-.87-.87V4",stroke:"currentColor",fill:"none",fillRule:"evenodd",strokeLinejoin:"round"}))};function Ct(e){switch(e.type){case"lvl1":return yt.createElement(xt,null);case"content":return yt.createElement(Nt,null);default:return yt.createElement(_t,null)}}function _t(){return yt.createElement("svg",{width:"20",height:"20",viewBox:"0 0 20 20"},yt.createElement("path",{d:"M13 13h4-4V8H7v5h6v4-4H7V8H3h4V3v5h6V3v5h4-4v5zm-6 0v4-4H3h4z",stroke:"currentColor",fill:"none",fillRule:"evenodd",strokeLinecap:"round",strokeLinejoin:"round"}))}function Nt(){return yt.createElement("svg",{width:"20",height:"20",viewBox:"0 0 20 20"},yt.createElement("path",{d:"M17 5H3h14zm0 5H3h14zm0 5H3h14z",stroke:"currentColor",fill:"none",fillRule:"evenodd",strokeLinejoin:"round"}))}function Tt(){return yt.createElement("svg",{className:"DocSearch-Hit-Select-Icon",width:"20",height:"20",viewBox:"0 0 20 20"},yt.createElement("g",{stroke:"currentColor",fill:"none",fillRule:"evenodd",strokeLinecap:"round",strokeLinejoin:"round"},yt.createElement("path",{d:"M18 3v4c0 2-2 4-4 4H2"}),yt.createElement("path",{d:"M8 17l-6-6 6-6"})))}var qt=["hit","attribute","tagName"];function Rt(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function Lt(e){for(var t=1;t=0||(o[r]=e[r]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(o[r]=e[r])}return o}function Ft(e,t){return t.split(".").reduce((function(e,t){return null!=e&&e[t]?e[t]:null}),e)}function Ut(e){var t=e.hit,r=e.attribute,n=e.tagName,o=void 0===n?"span":n,i=Ht(e,qt);return(0,yt.createElement)(o,Lt(Lt({},i),{},{dangerouslySetInnerHTML:{__html:Ft(t,"_snippetResult.".concat(r,".value"))||Ft(t,r)}}))}function Bt(e,t){return function(e){if(Array.isArray(e))return e}(e)||function(e,t){var r=null==e?null:"undefined"!=typeof Symbol&&e[Symbol.iterator]||e["@@iterator"];if(null==r)return;var n,o,i=[],a=!0,c=!1;try{for(r=r.call(e);!(a=(n=r.next()).done)&&(i.push(n.value),!t||i.length!==t);a=!0);}catch(l){c=!0,o=l}finally{try{a||null==r.return||r.return()}finally{if(c)throw o}}return i}(e,t)||function(e,t){if(!e)return;if("string"==typeof e)return Vt(e,t);var r=Object.prototype.toString.call(e).slice(8,-1);"Object"===r&&e.constructor&&(r=e.constructor.name);if("Map"===r||"Set"===r)return Array.from(e);if("Arguments"===r||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(r))return Vt(e,t)}(e,t)||function(){throw new TypeError("Invalid attempt to destructure non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}()}function Vt(e,t){(null==t||t>e.length)&&(t=e.length);for(var r=0,n=new Array(t);r|<\/mark>)/g,Wt=RegExp(zt.source);function Qt(e){var t,r,n=e;if(!n.__docsearch_parent&&!e._highlightResult)return e.hierarchy.lvl0;var o=((n.__docsearch_parent?null===(t=n.__docsearch_parent)||void 0===t||null===(t=t._highlightResult)||void 0===t||null===(t=t.hierarchy)||void 0===t?void 0:t.lvl0:null===(r=e._highlightResult)||void 0===r||null===(r=r.hierarchy)||void 0===r?void 0:r.lvl0)||{}).value;return o&&Wt.test(o)?o.replace(zt,""):o}function Zt(){return Zt=Object.assign||function(e){for(var t=1;t=0||(o[r]=e[r]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(o[r]=e[r])}return o}function or(e){var t=e.translations,r=void 0===t?{}:t,n=nr(e,tr),o=r.recentSearchesTitle,i=void 0===o?"Recent":o,a=r.noRecentSearchesText,c=void 0===a?"No recent searches":a,l=r.saveRecentSearchButtonTitle,u=void 0===l?"Save this search":l,s=r.removeRecentSearchButtonTitle,f=void 0===s?"Remove this search from history":s,m=r.favoriteSearchesTitle,p=void 0===m?"Favorite":m,v=r.removeFavoriteSearchButtonTitle,d=void 0===v?"Remove this search from favorites":v;return"idle"===n.state.status&&!1===n.hasCollections?n.disableUserPersonalization?null:yt.createElement("div",{className:"DocSearch-StartScreen"},yt.createElement("p",{className:"DocSearch-Help"},c)):!1===n.hasCollections?null:yt.createElement("div",{className:"DocSearch-Dropdown-Container"},yt.createElement($t,rr({},n,{title:i,collection:n.state.collections[0],renderIcon:function(){return yt.createElement("div",{className:"DocSearch-Hit-icon"},yt.createElement(Xt,null))},renderAction:function(e){var t=e.item,r=e.runFavoriteTransition,o=e.runDeleteTransition;return yt.createElement(yt.Fragment,null,yt.createElement("div",{className:"DocSearch-Hit-action"},yt.createElement("button",{className:"DocSearch-Hit-action-button",title:u,type:"submit",onClick:function(e){e.preventDefault(),e.stopPropagation(),r((function(){n.favoriteSearches.add(t),n.recentSearches.remove(t),n.refresh()}))}},yt.createElement(Yt,null))),yt.createElement("div",{className:"DocSearch-Hit-action"},yt.createElement("button",{className:"DocSearch-Hit-action-button",title:f,type:"submit",onClick:function(e){e.preventDefault(),e.stopPropagation(),o((function(){n.recentSearches.remove(t),n.refresh()}))}},yt.createElement(er,null))))}})),yt.createElement($t,rr({},n,{title:p,collection:n.state.collections[1],renderIcon:function(){return yt.createElement("div",{className:"DocSearch-Hit-icon"},yt.createElement(Yt,null))},renderAction:function(e){var t=e.item,r=e.runDeleteTransition;return yt.createElement("div",{className:"DocSearch-Hit-action"},yt.createElement("button",{className:"DocSearch-Hit-action-button",title:d,type:"submit",onClick:function(e){e.preventDefault(),e.stopPropagation(),r((function(){n.favoriteSearches.remove(t),n.refresh()}))}},yt.createElement(er,null)))}})))}var ir=["translations"];function ar(){return ar=Object.assign||function(e){for(var t=1;t=0||(o[r]=e[r]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(o[r]=e[r])}return o}var lr=yt.memo((function(e){var t=e.translations,r=void 0===t?{}:t,n=cr(e,ir);if("error"===n.state.status)return yt.createElement(wt,{translations:null==r?void 0:r.errorScreen});var o=n.state.collections.some((function(e){return e.items.length>0}));return n.state.query?!1===o?yt.createElement(kt,ar({},n,{translations:null==r?void 0:r.noResultsScreen})):yt.createElement(Gt,n):yt.createElement(or,ar({},n,{hasCollections:o,translations:null==r?void 0:r.startScreen}))}),(function(e,t){return"loading"===t.state.status||"stalled"===t.state.status}));function ur(){return yt.createElement("svg",{viewBox:"0 0 38 38",stroke:"currentColor",strokeOpacity:".5"},yt.createElement("g",{fill:"none",fillRule:"evenodd"},yt.createElement("g",{transform:"translate(1 1)",strokeWidth:"2"},yt.createElement("circle",{strokeOpacity:".3",cx:"18",cy:"18",r:"18"}),yt.createElement("path",{d:"M36 18c0-9.94-8.06-18-18-18"},yt.createElement("animateTransform",{attributeName:"transform",type:"rotate",from:"0 18 18",to:"360 18 18",dur:"1s",repeatCount:"indefinite"})))))}var sr=r(20830),fr=["translations"];function mr(){return mr=Object.assign||function(e){for(var t=1;t=0||(o[r]=e[r]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(o[r]=e[r])}return o}function vr(e){var t=e.translations,r=void 0===t?{}:t,n=pr(e,fr),o=r.resetButtonTitle,i=void 0===o?"Clear the query":o,a=r.resetButtonAriaLabel,c=void 0===a?"Clear the query":a,l=r.cancelButtonText,u=void 0===l?"Cancel":l,s=r.cancelButtonAriaLabel,f=void 0===s?"Cancel":s,m=n.getFormProps({inputElement:n.inputRef.current}).onReset;return yt.useEffect((function(){n.autoFocus&&n.inputRef.current&&n.inputRef.current.focus()}),[n.autoFocus,n.inputRef]),yt.useEffect((function(){n.isFromSelection&&n.inputRef.current&&n.inputRef.current.select()}),[n.isFromSelection,n.inputRef]),yt.createElement(yt.Fragment,null,yt.createElement("form",{className:"DocSearch-Form",onSubmit:function(e){e.preventDefault()},onReset:m},yt.createElement("label",mr({className:"DocSearch-MagnifierLabel"},n.getLabelProps()),yt.createElement(sr.W,null)),yt.createElement("div",{className:"DocSearch-LoadingIndicator"},yt.createElement(ur,null)),yt.createElement("input",mr({className:"DocSearch-Input",ref:n.inputRef},n.getInputProps({inputElement:n.inputRef.current,autoFocus:n.autoFocus,maxLength:ht}))),yt.createElement("button",{type:"reset",title:i,className:"DocSearch-Reset","aria-label":c,hidden:!n.state.query},yt.createElement(er,null))),yt.createElement("button",{className:"DocSearch-Cancel",type:"reset","aria-label":f,onClick:n.onClose},u))}var dr=["_highlightResult","_snippetResult"];function yr(e,t){if(null==e)return{};var r,n,o=function(e,t){if(null==e)return{};var r,n,o={},i=Object.keys(e);for(n=0;n=0||(o[r]=e[r]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(o[r]=e[r])}return o}function hr(e){return!1===function(){var e="__TEST_KEY__";try{return localStorage.setItem(e,""),localStorage.removeItem(e),!0}catch(t){return!1}}()?{setItem:function(){},getItem:function(){return[]}}:{setItem:function(t){return window.localStorage.setItem(e,JSON.stringify(t))},getItem:function(){var t=window.localStorage.getItem(e);return t?JSON.parse(t):[]}}}function br(e){var t=e.key,r=e.limit,n=void 0===r?5:r,o=hr(t),i=o.getItem().slice(0,n);return{add:function(e){var t=e,r=(t._highlightResult,t._snippetResult,yr(t,dr)),a=i.findIndex((function(e){return e.objectID===r.objectID}));a>-1&&i.splice(a,1),i.unshift(r),i=i.slice(0,n),o.setItem(i)},remove:function(e){i=i.filter((function(t){return t.objectID!==e.objectID})),o.setItem(i)},getAll:function(){return i}}}function gr(e){const t=`algoliasearch-client-js-${e.key}`;let r;const n=()=>(void 0===r&&(r=e.localStorage||window.localStorage),r),o=()=>JSON.parse(n().getItem(t)||"{}"),i=e=>{n().setItem(t,JSON.stringify(e))};return{get:(t,r,n={miss:()=>Promise.resolve()})=>Promise.resolve().then((()=>{(()=>{const t=e.timeToLive?1e3*e.timeToLive:null,r=o(),n=Object.fromEntries(Object.entries(r).filter((([,e])=>void 0!==e.timestamp)));if(i(n),!t)return;const a=Object.fromEntries(Object.entries(n).filter((([,e])=>{const r=(new Date).getTime();return!(e.timestamp+tPromise.all([e?e.value:r(),void 0!==e]))).then((([e,t])=>Promise.all([e,t||n.miss(e)]))).then((([e])=>e)),set:(e,r)=>Promise.resolve().then((()=>{const i=o();return i[JSON.stringify(e)]={timestamp:(new Date).getTime(),value:r},n().setItem(t,JSON.stringify(i)),r})),delete:e=>Promise.resolve().then((()=>{const r=o();delete r[JSON.stringify(e)],n().setItem(t,JSON.stringify(r))})),clear:()=>Promise.resolve().then((()=>{n().removeItem(t)}))}}function Or(e){const t=[...e.caches],r=t.shift();return void 0===r?{get:(e,t,r={miss:()=>Promise.resolve()})=>t().then((e=>Promise.all([e,r.miss(e)]))).then((([e])=>e)),set:(e,t)=>Promise.resolve(t),delete:e=>Promise.resolve(),clear:()=>Promise.resolve()}:{get:(e,n,o={miss:()=>Promise.resolve()})=>r.get(e,n,o).catch((()=>Or({caches:t}).get(e,n,o))),set:(e,n)=>r.set(e,n).catch((()=>Or({caches:t}).set(e,n))),delete:e=>r.delete(e).catch((()=>Or({caches:t}).delete(e))),clear:()=>r.clear().catch((()=>Or({caches:t}).clear()))}}function Sr(e={serializable:!0}){let t={};return{get(r,n,o={miss:()=>Promise.resolve()}){const i=JSON.stringify(r);if(i in t)return Promise.resolve(e.serializable?JSON.parse(t[i]):t[i]);const a=n(),c=o&&o.miss||(()=>Promise.resolve());return a.then((e=>c(e))).then((()=>a))},set:(r,n)=>(t[JSON.stringify(r)]=e.serializable?JSON.stringify(n):n,Promise.resolve(n)),delete:e=>(delete t[JSON.stringify(e)],Promise.resolve()),clear:()=>(t={},Promise.resolve())}}function jr(e){let t=e.length-1;for(;t>0;t--){const r=Math.floor(Math.random()*(t+1)),n=e[t];e[t]=e[r],e[r]=n}return e}function wr(e,t){return t?(Object.keys(t).forEach((r=>{e[r]=t[r](e)})),e):e}function Er(e,...t){let r=0;return e.replace(/%s/g,(()=>encodeURIComponent(t[r++])))}const Pr="4.20.0",Ir={WithinQueryParameters:0,WithinHeaders:1};function Dr(e,t){const r=e||{},n=r.data||{};return Object.keys(r).forEach((e=>{-1===["timeout","headers","queryParameters","data","cacheable"].indexOf(e)&&(n[e]=r[e])})),{data:Object.entries(n).length>0?n:void 0,timeout:r.timeout||t,headers:r.headers||{},queryParameters:r.queryParameters||{},cacheable:r.cacheable}}const Ar={Read:1,Write:2,Any:3},kr={Up:1,Down:2,Timeouted:3},xr=12e4;function Cr(e,t=kr.Up){return{...e,status:t,lastUpdate:Date.now()}}function _r(e){return"string"==typeof e?{protocol:"https",url:e,accept:Ar.Any}:{protocol:e.protocol||"https",url:e.url,accept:e.accept||Ar.Any}}const Nr={Delete:"DELETE",Get:"GET",Post:"POST",Put:"PUT"};function Tr(e,t){return Promise.all(t.map((t=>e.get(t,(()=>Promise.resolve(Cr(t))))))).then((e=>{const r=e.filter((e=>function(e){return e.status===kr.Up||Date.now()-e.lastUpdate>xr}(e))),n=e.filter((e=>function(e){return e.status===kr.Timeouted&&Date.now()-e.lastUpdate<=xr}(e))),o=[...r,...n];return{getTimeout:(e,t)=>(0===n.length&&0===e?1:n.length+3+e)*t,statelessHosts:o.length>0?o.map((e=>_r(e))):t}}))}const qr=(e,t)=>(e=>{const t=e.status;return e.isTimedOut||(({isTimedOut:e,status:t})=>!e&&0==~~t)(e)||2!=~~(t/100)&&4!=~~(t/100)})(e)?t.onRetry(e):(({status:e})=>2==~~(e/100))(e)?t.onSuccess(e):t.onFail(e);function Rr(e,t,r,n){const o=[],i=function(e,t){if(e.method===Nr.Get||void 0===e.data&&void 0===t.data)return;const r=Array.isArray(e.data)?e.data:{...e.data,...t.data};return JSON.stringify(r)}(r,n),a=function(e,t){const r={...e.headers,...t.headers},n={};return Object.keys(r).forEach((e=>{const t=r[e];n[e.toLowerCase()]=t})),n}(e,n),c=r.method,l=r.method!==Nr.Get?{}:{...r.data,...n.data},u={"x-algolia-agent":e.userAgent.value,...e.queryParameters,...l,...n.queryParameters};let s=0;const f=(t,l)=>{const m=t.pop();if(void 0===m)throw{name:"RetryError",message:"Unreachable hosts - your application id may be incorrect. If the error persists, contact support@algolia.com.",transporterStackTrace:Fr(o)};const p={data:i,headers:a,method:c,url:Mr(m,r.path,u),connectTimeout:l(s,e.timeouts.connect),responseTimeout:l(s,n.timeout)},v=e=>{const r={request:p,response:e,host:m,triesLeft:t.length};return o.push(r),r},d={onSuccess:e=>function(e){try{return JSON.parse(e.content)}catch(t){throw function(e,t){return{name:"DeserializationError",message:e,response:t}}(t.message,e)}}(e),onRetry(r){const n=v(r);return r.isTimedOut&&s++,Promise.all([e.logger.info("Retryable failure",Ur(n)),e.hostsCache.set(m,Cr(m,r.isTimedOut?kr.Timeouted:kr.Down))]).then((()=>f(t,l)))},onFail(e){throw v(e),function({content:e,status:t},r){let n=e;try{n=JSON.parse(e).message}catch(o){}return function(e,t,r){return{name:"ApiError",message:e,status:t,transporterStackTrace:r}}(n,t,r)}(e,Fr(o))}};return e.requester.send(p).then((e=>qr(e,d)))};return Tr(e.hostsCache,t).then((e=>f([...e.statelessHosts].reverse(),e.getTimeout)))}function Lr(e){const t={value:`Algolia for JavaScript (${e})`,add(e){const r=`; ${e.segment}${void 0!==e.version?` (${e.version})`:""}`;return-1===t.value.indexOf(r)&&(t.value=`${t.value}${r}`),t}};return t}function Mr(e,t,r){const n=Hr(r);let o=`${e.protocol}://${e.url}/${"/"===t.charAt(0)?t.substr(1):t}`;return n.length&&(o+=`?${n}`),o}function Hr(e){return Object.keys(e).map((t=>{return Er("%s=%s",t,(r=e[t],"[object Object]"===Object.prototype.toString.call(r)||"[object Array]"===Object.prototype.toString.call(r)?JSON.stringify(e[t]):e[t]));var r})).join("&")}function Fr(e){return e.map((e=>Ur(e)))}function Ur(e){const t=e.request.headers["x-algolia-api-key"]?{"x-algolia-api-key":"*****"}:{};return{...e,request:{...e.request,headers:{...e.request.headers,...t}}}}const Br=e=>{const t=e.appId,r=function(e,t,r){const n={"x-algolia-api-key":r,"x-algolia-application-id":t};return{headers:()=>e===Ir.WithinHeaders?n:{},queryParameters:()=>e===Ir.WithinQueryParameters?n:{}}}(void 0!==e.authMode?e.authMode:Ir.WithinHeaders,t,e.apiKey),n=function(e){const{hostsCache:t,logger:r,requester:n,requestsCache:o,responsesCache:i,timeouts:a,userAgent:c,hosts:l,queryParameters:u,headers:s}=e,f={hostsCache:t,logger:r,requester:n,requestsCache:o,responsesCache:i,timeouts:a,userAgent:c,headers:s,queryParameters:u,hosts:l.map((e=>_r(e))),read(e,t){const r=Dr(t,f.timeouts.read),n=()=>Rr(f,f.hosts.filter((e=>0!=(e.accept&Ar.Read))),e,r);if(!0!==(void 0!==r.cacheable?r.cacheable:e.cacheable))return n();const o={request:e,mappedRequestOptions:r,transporter:{queryParameters:f.queryParameters,headers:f.headers}};return f.responsesCache.get(o,(()=>f.requestsCache.get(o,(()=>f.requestsCache.set(o,n()).then((e=>Promise.all([f.requestsCache.delete(o),e])),(e=>Promise.all([f.requestsCache.delete(o),Promise.reject(e)]))).then((([e,t])=>t))))),{miss:e=>f.responsesCache.set(o,e)})},write:(e,t)=>Rr(f,f.hosts.filter((e=>0!=(e.accept&Ar.Write))),e,Dr(t,f.timeouts.write))};return f}({hosts:[{url:`${t}-dsn.algolia.net`,accept:Ar.Read},{url:`${t}.algolia.net`,accept:Ar.Write}].concat(jr([{url:`${t}-1.algolianet.com`},{url:`${t}-2.algolianet.com`},{url:`${t}-3.algolianet.com`}])),...e,headers:{...r.headers(),"content-type":"application/x-www-form-urlencoded",...e.headers},queryParameters:{...r.queryParameters(),...e.queryParameters}}),o={transporter:n,appId:t,addAlgoliaAgent(e,t){n.userAgent.add({segment:e,version:t})},clearCache:()=>Promise.all([n.requestsCache.clear(),n.responsesCache.clear()]).then((()=>{}))};return wr(o,e.methods)},Vr=e=>(t,r)=>t.method===Nr.Get?e.transporter.read(t,r):e.transporter.write(t,r),Kr=e=>(t,r={})=>wr({transporter:e.transporter,appId:e.appId,indexName:t},r.methods),$r=e=>(t,r)=>{const n=t.map((e=>({...e,params:Hr(e.params||{})})));return e.transporter.read({method:Nr.Post,path:"1/indexes/*/queries",data:{requests:n},cacheable:!0},r)},Jr=e=>(t,r)=>Promise.all(t.map((t=>{const{facetName:n,facetQuery:o,...i}=t.params;return Kr(e)(t.indexName,{methods:{searchForFacetValues:Qr}}).searchForFacetValues(n,o,{...r,...i})}))),zr=e=>(t,r,n)=>e.transporter.read({method:Nr.Post,path:Er("1/answers/%s/prediction",e.indexName),data:{query:t,queryLanguages:r},cacheable:!0},n),Wr=e=>(t,r)=>e.transporter.read({method:Nr.Post,path:Er("1/indexes/%s/query",e.indexName),data:{query:t},cacheable:!0},r),Qr=e=>(t,r,n)=>e.transporter.read({method:Nr.Post,path:Er("1/indexes/%s/facets/%s/query",e.indexName,t),data:{facetQuery:r},cacheable:!0},n),Zr={Debug:1,Info:2,Error:3};function Gr(e,t,r){const n={appId:e,apiKey:t,timeouts:{connect:1,read:2,write:30},requester:{send:e=>new Promise((t=>{const r=new XMLHttpRequest;r.open(e.method,e.url,!0),Object.keys(e.headers).forEach((t=>r.setRequestHeader(t,e.headers[t])));const n=(e,n)=>setTimeout((()=>{r.abort(),t({status:0,content:n,isTimedOut:!0})}),1e3*e),o=n(e.connectTimeout,"Connection timeout");let i;r.onreadystatechange=()=>{r.readyState>r.OPENED&&void 0===i&&(clearTimeout(o),i=n(e.responseTimeout,"Socket timeout"))},r.onerror=()=>{0===r.status&&(clearTimeout(o),clearTimeout(i),t({content:r.responseText||"Network request failed",status:r.status,isTimedOut:!1}))},r.onload=()=>{clearTimeout(o),clearTimeout(i),t({content:r.responseText,status:r.status,isTimedOut:!1})},r.send(e.data)}))},logger:(o=Zr.Error,{debug:(e,t)=>(Zr.Debug>=o&&console.debug(e,t),Promise.resolve()),info:(e,t)=>(Zr.Info>=o&&console.info(e,t),Promise.resolve()),error:(e,t)=>(console.error(e,t),Promise.resolve())}),responsesCache:Sr(),requestsCache:Sr({serializable:!1}),hostsCache:Or({caches:[gr({key:`${Pr}-${e}`}),Sr()]}),userAgent:Lr(Pr).add({segment:"Browser",version:"lite"}),authMode:Ir.WithinQueryParameters};var o;return Br({...n,...r,methods:{search:$r,searchForFacetValues:Jr,multipleQueries:$r,multipleSearchForFacetValues:Jr,customRequest:Vr,initIndex:e=>t=>Kr(e)(t,{methods:{search:Wr,searchForFacetValues:Qr,findAnswers:zr}})}})}Gr.version=Pr;const Xr=Gr;var Yr="3.5.2";function en(){}function tn(e){return e}function rn(e){return 1===e.button||e.altKey||e.ctrlKey||e.metaKey||e.shiftKey}function nn(e,t,r){return e.reduce((function(e,n){var o=t(n);return e.hasOwnProperty(o)||(e[o]=[]),e[o].length<(r||5)&&e[o].push(n),e}),{})}var on=["footer","searchBox"];function an(){return an=Object.assign||function(e){for(var t=1;te.length)&&(t=e.length);for(var r=0,n=new Array(t);r=0||(o[r]=e[r]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(o[r]=e[r])}return o}function pn(e){var t=e.appId,r=e.apiKey,n=e.indexName,o=e.placeholder,i=void 0===o?"Search docs":o,a=e.searchParameters,c=e.maxResultsPerGroup,l=e.onClose,u=void 0===l?en:l,s=e.transformItems,f=void 0===s?tn:s,m=e.hitComponent,p=void 0===m?St:m,v=e.resultsFooterComponent,d=void 0===v?function(){return null}:v,y=e.navigator,h=e.initialScrollY,b=void 0===h?0:h,g=e.transformSearchClient,O=void 0===g?tn:g,S=e.disableUserPersonalization,j=void 0!==S&&S,w=e.initialQuery,E=void 0===w?"":w,P=e.translations,I=void 0===P?{}:P,D=e.getMissingResultsUrl,A=e.insights,k=void 0!==A&&A,x=I.footer,C=I.searchBox,_=mn(I,on),N=sn(yt.useState({query:"",collections:[],completion:null,context:{},isOpen:!1,activeItemId:null,status:"idle"}),2),T=N[0],q=N[1],R=yt.useRef(null),L=yt.useRef(null),M=yt.useRef(null),H=yt.useRef(null),F=yt.useRef(null),U=yt.useRef(10),B=yt.useRef("undefined"!=typeof window?window.getSelection().toString().slice(0,ht):"").current,V=yt.useRef(E||B).current,K=function(e,t,r){return yt.useMemo((function(){var n=Xr(e,t);return n.addAlgoliaAgent("docsearch",Yr),!1===/docsearch.js \(.*\)/.test(n.transporter.userAgent.value)&&n.addAlgoliaAgent("docsearch-react",Yr),r(n)}),[e,t,r])}(t,r,O),$=yt.useRef(br({key:"__DOCSEARCH_FAVORITE_SEARCHES__".concat(n),limit:10})).current,J=yt.useRef(br({key:"__DOCSEARCH_RECENT_SEARCHES__".concat(n),limit:0===$.getAll().length?7:4})).current,z=yt.useCallback((function(e){if(!j){var t="content"===e.type?e.__docsearch_parent:e;t&&-1===$.getAll().findIndex((function(e){return e.objectID===t.objectID}))&&J.add(t)}}),[$,J,j]),W=yt.useCallback((function(e){if(T.context.algoliaInsightsPlugin&&e.__autocomplete_id){var t=e,r={eventName:"Item Selected",index:t.__autocomplete_indexName,items:[t],positions:[e.__autocomplete_id],queryID:t.__autocomplete_queryID};T.context.algoliaInsightsPlugin.insights.clickedObjectIDsAfterSearch(r)}}),[T.context.algoliaInsightsPlugin]),Q=yt.useMemo((function(){return dt({id:"docsearch",defaultActiveItemId:0,placeholder:i,openOnFocus:!0,initialState:{query:V,context:{searchSuggestions:[]}},insights:k,navigator:y,onStateChange:function(e){q(e.state)},getSources:function(e){var o=e.query,i=e.state,l=e.setContext,s=e.setStatus;if(!o)return j?[]:[{sourceId:"recentSearches",onSelect:function(e){var t=e.item,r=e.event;z(t),rn(r)||u()},getItemUrl:function(e){return e.item.url},getItems:function(){return J.getAll()}},{sourceId:"favoriteSearches",onSelect:function(e){var t=e.item,r=e.event;z(t),rn(r)||u()},getItemUrl:function(e){return e.item.url},getItems:function(){return $.getAll()}}];var m=Boolean(k);return K.search([{query:o,indexName:n,params:ln({attributesToRetrieve:["hierarchy.lvl0","hierarchy.lvl1","hierarchy.lvl2","hierarchy.lvl3","hierarchy.lvl4","hierarchy.lvl5","hierarchy.lvl6","content","type","url"],attributesToSnippet:["hierarchy.lvl1:".concat(U.current),"hierarchy.lvl2:".concat(U.current),"hierarchy.lvl3:".concat(U.current),"hierarchy.lvl4:".concat(U.current),"hierarchy.lvl5:".concat(U.current),"hierarchy.lvl6:".concat(U.current),"content:".concat(U.current)],snippetEllipsisText:"\u2026",highlightPreTag:"",highlightPostTag:"",hitsPerPage:20,clickAnalytics:m},a)}]).catch((function(e){throw"RetryError"===e.name&&s("error"),e})).then((function(e){var o=e.results[0],a=o.hits,s=o.nbHits,p=nn(a,(function(e){return Qt(e)}),c);i.context.searchSuggestions.length0&&(X(),F.current&&F.current.focus())}),[V,X]),yt.useEffect((function(){function e(){if(L.current){var e=.01*window.innerHeight;L.current.style.setProperty("--docsearch-vh","".concat(e,"px"))}}return e(),window.addEventListener("resize",e),function(){window.removeEventListener("resize",e)}}),[]),yt.createElement("div",an({ref:R},G({"aria-expanded":!0}),{className:["DocSearch","DocSearch-Container","stalled"===T.status&&"DocSearch-Container--Stalled","error"===T.status&&"DocSearch-Container--Errored"].filter(Boolean).join(" "),role:"button",tabIndex:0,onMouseDown:function(e){e.target===e.currentTarget&&u()}}),yt.createElement("div",{className:"DocSearch-Modal",ref:L},yt.createElement("header",{className:"DocSearch-SearchBar",ref:M},yt.createElement(vr,an({},Q,{state:T,autoFocus:0===V.length,inputRef:F,isFromSelection:Boolean(V)&&V===B,translations:C,onClose:u}))),yt.createElement("div",{className:"DocSearch-Dropdown",ref:H},yt.createElement(lr,an({},Q,{indexName:n,state:T,hitComponent:p,resultsFooterComponent:d,disableUserPersonalization:j,recentSearches:J,favoriteSearches:$,inputRef:F,translations:_,getMissingResultsUrl:D,onItemClick:function(e,t){W(e),z(e),rn(t)||u()}}))),yt.createElement("footer",{className:"DocSearch-Footer"},yt.createElement(Ot,{translations:x}))))}}}]); \ No newline at end of file diff --git a/assets/js/1426.36c44621.js b/assets/js/1426.36c44621.js new file mode 100644 index 00000000..7b9ef0c6 --- /dev/null +++ b/assets/js/1426.36c44621.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1426],{1426:(e,t,r)=>{function n(e,t){var r=void 0;return function(){for(var n=arguments.length,o=new Array(n),i=0;ipn});var a=function(){};function c(e){var t=e.item,r=e.items;return{index:t.__autocomplete_indexName,items:[t],positions:[1+r.findIndex((function(e){return e.objectID===t.objectID}))],queryID:t.__autocomplete_queryID,algoliaSource:["autocomplete"]}}function l(e,t){return function(e){if(Array.isArray(e))return e}(e)||function(e,t){var r=null==e?null:"undefined"!=typeof Symbol&&e[Symbol.iterator]||e["@@iterator"];if(null!=r){var n,o,i,a,c=[],l=!0,u=!1;try{if(i=(r=r.call(e)).next,0===t){if(Object(r)!==r)return;l=!1}else for(;!(l=(n=i.call(r)).done)&&(c.push(n.value),c.length!==t);l=!0);}catch(s){u=!0,o=s}finally{try{if(!l&&null!=r.return&&(a=r.return(),Object(a)!==a))return}finally{if(u)throw o}}return c}}(e,t)||function(e,t){if(!e)return;if("string"==typeof e)return u(e,t);var r=Object.prototype.toString.call(e).slice(8,-1);"Object"===r&&e.constructor&&(r=e.constructor.name);if("Map"===r||"Set"===r)return Array.from(e);if("Arguments"===r||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(r))return u(e,t)}(e,t)||function(){throw new TypeError("Invalid attempt to destructure non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}()}function u(e,t){(null==t||t>e.length)&&(t=e.length);for(var r=0,n=new Array(t);re.length)&&(t=e.length);for(var r=0,n=new Array(t);r=0||(o[r]=e[r]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(o[r]=e[r])}return o}function y(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function h(e){for(var t=1;t=3||2===r&&n>=4||1===r&&n>=10);function i(t,r,n){if(o&&void 0!==n){var i=n[0].__autocomplete_algoliaCredentials,a={"X-Algolia-Application-Id":i.appId,"X-Algolia-API-Key":i.apiKey};e.apply(void 0,[t].concat(p(r),[{headers:a}]))}else e.apply(void 0,[t].concat(p(r)))}return{init:function(t,r){e("init",{appId:t,apiKey:r})},setUserToken:function(t){e("setUserToken",t)},clickedObjectIDsAfterSearch:function(){for(var e=arguments.length,t=new Array(e),r=0;r0&&i("clickedObjectIDsAfterSearch",g(t),t[0].items)},clickedObjectIDs:function(){for(var e=arguments.length,t=new Array(e),r=0;r0&&i("clickedObjectIDs",g(t),t[0].items)},clickedFilters:function(){for(var t=arguments.length,r=new Array(t),n=0;n0&&e.apply(void 0,["clickedFilters"].concat(r))},convertedObjectIDsAfterSearch:function(){for(var e=arguments.length,t=new Array(e),r=0;r0&&i("convertedObjectIDsAfterSearch",g(t),t[0].items)},convertedObjectIDs:function(){for(var e=arguments.length,t=new Array(e),r=0;r0&&i("convertedObjectIDs",g(t),t[0].items)},convertedFilters:function(){for(var t=arguments.length,r=new Array(t),n=0;n0&&e.apply(void 0,["convertedFilters"].concat(r))},viewedObjectIDs:function(){for(var e=arguments.length,t=new Array(e),r=0;r0&&t.reduce((function(e,t){var r=t.items,n=d(t,f);return[].concat(p(e),p(function(e){for(var t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:20,r=[],n=0;n0&&e.apply(void 0,["viewedFilters"].concat(r))}}}function S(e){var t=e.items.reduce((function(e,t){var r;return e[t.__autocomplete_indexName]=(null!==(r=e[t.__autocomplete_indexName])&&void 0!==r?r:[]).concat(t),e}),{});return Object.keys(t).map((function(e){return{index:e,items:t[e],algoliaSource:["autocomplete"]}}))}function j(e){return e.objectID&&e.__autocomplete_indexName&&e.__autocomplete_queryID}function w(e){return w="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},w(e)}function E(e){return function(e){if(Array.isArray(e))return P(e)}(e)||function(e){if("undefined"!=typeof Symbol&&null!=e[Symbol.iterator]||null!=e["@@iterator"])return Array.from(e)}(e)||function(e,t){if(!e)return;if("string"==typeof e)return P(e,t);var r=Object.prototype.toString.call(e).slice(8,-1);"Object"===r&&e.constructor&&(r=e.constructor.name);if("Map"===r||"Set"===r)return Array.from(e);if("Arguments"===r||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(r))return P(e,t)}(e)||function(){throw new TypeError("Invalid attempt to spread non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}()}function P(e,t){(null==t||t>e.length)&&(t=e.length);for(var r=0,n=new Array(t);r0&&C({onItemsChange:o,items:r,insights:f,state:t}))}}),0);return{name:"aa.algoliaInsightsPlugin",subscribe:function(e){var t=e.setContext,r=e.onSelect,n=e.onActive;s("addAlgoliaAgent","insights-plugin"),t({algoliaInsightsPlugin:{__algoliaSearchParameters:{clickAnalytics:!0},insights:f}}),r((function(e){var t=e.item,r=e.state,n=e.event;j(t)&&l({state:r,event:n,insights:f,item:t,insightsEvents:[D({eventName:"Item Selected"},c({item:t,items:m.current}))]})})),n((function(e){var t=e.item,r=e.state,n=e.event;j(t)&&u({state:r,event:n,insights:f,item:t,insightsEvents:[D({eventName:"Item Active"},c({item:t,items:m.current}))]})}))},onStateChange:function(e){var t=e.state;p({state:t})},__autocomplete_pluginOptions:e}}function N(e){return N="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},N(e)}function T(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function q(e,t,r){return(t=function(e){var t=function(e,t){if("object"!==N(e)||null===e)return e;var r=e[Symbol.toPrimitive];if(void 0!==r){var n=r.call(e,t||"default");if("object"!==N(n))return n;throw new TypeError("@@toPrimitive must return a primitive value.")}return("string"===t?String:Number)(e)}(e,"string");return"symbol"===N(t)?t:String(t)}(t))in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function R(e,t,r){var n,o=t.initialState;return{getState:function(){return o},dispatch:function(n,i){var a=function(e){for(var t=1;te.length)&&(t=e.length);for(var r=0,n=new Array(t);r0},reshape:function(e){return e.sources}},e),{},{id:null!==(r=e.id)&&void 0!==r?r:"autocomplete-".concat(V++),plugins:o,initialState:X({activeItemId:null,query:"",completion:null,collections:[],isOpen:!1,status:"idle",context:{}},e.initialState),onStateChange:function(t){var r;null===(r=e.onStateChange)||void 0===r||r.call(e,t),o.forEach((function(e){var r;return null===(r=e.onStateChange)||void 0===r?void 0:r.call(e,t)}))},onSubmit:function(t){var r;null===(r=e.onSubmit)||void 0===r||r.call(e,t),o.forEach((function(e){var r;return null===(r=e.onSubmit)||void 0===r?void 0:r.call(e,t)}))},onReset:function(t){var r;null===(r=e.onReset)||void 0===r||r.call(e,t),o.forEach((function(e){var r;return null===(r=e.onReset)||void 0===r?void 0:r.call(e,t)}))},getSources:function(r){return Promise.all([].concat(Q(o.map((function(e){return e.getSources}))),[e.getSources]).filter(Boolean).map((function(e){return function(e,t){var r=[];return Promise.resolve(e(t)).then((function(e){return Array.isArray(e),Promise.all(e.filter((function(e){return Boolean(e)})).map((function(e){if(e.sourceId,r.includes(e.sourceId))throw new Error("[Autocomplete] The `sourceId` ".concat(JSON.stringify(e.sourceId)," is not unique."));r.push(e.sourceId);var t={getItemInputValue:function(e){return e.state.query},getItemUrl:function(){},onSelect:function(e){(0,e.setIsOpen)(!1)},onActive:a,onResolve:a};Object.keys(t).forEach((function(e){t[e].__default=!0}));var n=$($({},t),e);return Promise.resolve(n)})))}))}(e,r)}))).then((function(e){return L(e)})).then((function(e){return e.map((function(e){return X(X({},e),{},{onSelect:function(r){e.onSelect(r),t.forEach((function(e){var t;return null===(t=e.onSelect)||void 0===t?void 0:t.call(e,r)}))},onActive:function(r){e.onActive(r),t.forEach((function(e){var t;return null===(t=e.onActive)||void 0===t?void 0:t.call(e,r)}))},onResolve:function(r){e.onResolve(r),t.forEach((function(e){var t;return null===(t=e.onResolve)||void 0===t?void 0:t.call(e,r)}))}})}))}))},navigator:X({navigate:function(e){var t=e.itemUrl;n.location.assign(t)},navigateNewTab:function(e){var t=e.itemUrl,r=n.open(t,"_blank","noopener");null==r||r.focus()},navigateNewWindow:function(e){var t=e.itemUrl;n.open(t,"_blank","noopener")}},e.navigator)})}function te(e){return te="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},te(e)}function re(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function ne(e){for(var t=1;te.length)&&(t=e.length);for(var r=0,n=new Array(t);r=0||(o[r]=e[r]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(o[r]=e[r])}return o}var Ie,De,Ae,ke=null,xe=(Ie=-1,De=-1,Ae=void 0,function(e){var t=++Ie;return Promise.resolve(e).then((function(e){return Ae&&t=0||(o[r]=e[r]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(o[r]=e[r])}return o}var Me=/((gt|sm)-|galaxy nexus)|samsung[- ]|samsungbrowser/i;function He(e){return He="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},He(e)}var Fe=["props","refresh","store"],Ue=["inputElement","formElement","panelElement"],Be=["inputElement"],Ve=["inputElement","maxLength"],Ke=["sourceIndex"],$e=["sourceIndex"],Je=["item","source","sourceIndex"];function ze(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function We(e){for(var t=1;t=0||(o[r]=e[r]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(o[r]=e[r])}return o}function Ge(e){var t=e.props,r=e.refresh,n=e.store,o=Ze(e,Fe),i=function(e,t){return void 0!==t?"".concat(e,"-").concat(t):e};return{getEnvironmentProps:function(e){var r=e.inputElement,o=e.formElement,i=e.panelElement;function a(e){!n.getState().isOpen&&n.pendingRequests.isEmpty()||e.target===r||!1===[o,i].some((function(t){return r=t,n=e.target,r===n||r.contains(n);var r,n}))&&(n.dispatch("blur",null),t.debug||n.pendingRequests.cancelAll())}return We({onTouchStart:a,onMouseDown:a,onTouchMove:function(e){!1!==n.getState().isOpen&&r===t.environment.document.activeElement&&e.target!==r&&r.blur()}},Ze(e,Ue))},getRootProps:function(e){return We({role:"combobox","aria-expanded":n.getState().isOpen,"aria-haspopup":"listbox","aria-owns":n.getState().isOpen?"".concat(t.id,"-list"):void 0,"aria-labelledby":"".concat(t.id,"-label")},e)},getFormProps:function(e){e.inputElement;return We({action:"",noValidate:!0,role:"search",onSubmit:function(i){var a;i.preventDefault(),t.onSubmit(We({event:i,refresh:r,state:n.getState()},o)),n.dispatch("submit",null),null===(a=e.inputElement)||void 0===a||a.blur()},onReset:function(i){var a;i.preventDefault(),t.onReset(We({event:i,refresh:r,state:n.getState()},o)),n.dispatch("reset",null),null===(a=e.inputElement)||void 0===a||a.focus()}},Ze(e,Be))},getLabelProps:function(e){var r=e||{},n=r.sourceIndex,o=Ze(r,Ke);return We({htmlFor:"".concat(i(t.id,n),"-input"),id:"".concat(i(t.id,n),"-label")},o)},getInputProps:function(e){var i;function c(e){(t.openOnFocus||Boolean(n.getState().query))&&Ce(We({event:e,props:t,query:n.getState().completion||n.getState().query,refresh:r,store:n},o)),n.dispatch("focus",null)}var l=e||{},u=(l.inputElement,l.maxLength),s=void 0===u?512:u,f=Ze(l,Ve),m=ge(n.getState()),p=function(e){return Boolean(e&&e.match(Me))}((null===(i=t.environment.navigator)||void 0===i?void 0:i.userAgent)||""),v=null!=m&&m.itemUrl&&!p?"go":"search";return We({"aria-autocomplete":"both","aria-activedescendant":n.getState().isOpen&&null!==n.getState().activeItemId?"".concat(t.id,"-item-").concat(n.getState().activeItemId):void 0,"aria-controls":n.getState().isOpen?"".concat(t.id,"-list"):void 0,"aria-labelledby":"".concat(t.id,"-label"),value:n.getState().completion||n.getState().query,id:"".concat(t.id,"-input"),autoComplete:"off",autoCorrect:"off",autoCapitalize:"off",enterKeyHint:v,spellCheck:"false",autoFocus:t.autoFocus,placeholder:t.placeholder,maxLength:s,type:"search",onChange:function(e){Ce(We({event:e,props:t,query:e.currentTarget.value.slice(0,s),refresh:r,store:n},o))},onKeyDown:function(e){!function(e){var t=e.event,r=e.props,n=e.refresh,o=e.store,i=Le(e,Ne);if("ArrowUp"===t.key||"ArrowDown"===t.key){var a=function(){var e=r.environment.document.getElementById("".concat(r.id,"-item-").concat(o.getState().activeItemId));e&&(e.scrollIntoViewIfNeeded?e.scrollIntoViewIfNeeded(!1):e.scrollIntoView(!1))},c=function(){var e=ge(o.getState());if(null!==o.getState().activeItemId&&e){var r=e.item,a=e.itemInputValue,c=e.itemUrl,l=e.source;l.onActive(qe({event:t,item:r,itemInputValue:a,itemUrl:c,refresh:n,source:l,state:o.getState()},i))}};t.preventDefault(),!1===o.getState().isOpen&&(r.openOnFocus||Boolean(o.getState().query))?Ce(qe({event:t,props:r,query:o.getState().query,refresh:n,store:o},i)).then((function(){o.dispatch(t.key,{nextActiveItemId:r.defaultActiveItemId}),c(),setTimeout(a,0)})):(o.dispatch(t.key,{}),c(),a())}else if("Escape"===t.key)t.preventDefault(),o.dispatch(t.key,null),o.pendingRequests.cancelAll();else if("Tab"===t.key)o.dispatch("blur",null),o.pendingRequests.cancelAll();else if("Enter"===t.key){if(null===o.getState().activeItemId||o.getState().collections.every((function(e){return 0===e.items.length})))return void(r.debug||o.pendingRequests.cancelAll());t.preventDefault();var l=ge(o.getState()),u=l.item,s=l.itemInputValue,f=l.itemUrl,m=l.source;if(t.metaKey||t.ctrlKey)void 0!==f&&(m.onSelect(qe({event:t,item:u,itemInputValue:s,itemUrl:f,refresh:n,source:m,state:o.getState()},i)),r.navigator.navigateNewTab({itemUrl:f,item:u,state:o.getState()}));else if(t.shiftKey)void 0!==f&&(m.onSelect(qe({event:t,item:u,itemInputValue:s,itemUrl:f,refresh:n,source:m,state:o.getState()},i)),r.navigator.navigateNewWindow({itemUrl:f,item:u,state:o.getState()}));else if(t.altKey);else{if(void 0!==f)return m.onSelect(qe({event:t,item:u,itemInputValue:s,itemUrl:f,refresh:n,source:m,state:o.getState()},i)),void r.navigator.navigate({itemUrl:f,item:u,state:o.getState()});Ce(qe({event:t,nextState:{isOpen:!1},props:r,query:s,refresh:n,store:o},i)).then((function(){m.onSelect(qe({event:t,item:u,itemInputValue:s,itemUrl:f,refresh:n,source:m,state:o.getState()},i))}))}}}(We({event:e,props:t,refresh:r,store:n},o))},onFocus:c,onBlur:a,onClick:function(r){e.inputElement!==t.environment.document.activeElement||n.getState().isOpen||c(r)}},f)},getPanelProps:function(e){return We({onMouseDown:function(e){e.preventDefault()},onMouseLeave:function(){n.dispatch("mouseleave",null)}},e)},getListProps:function(e){var r=e||{},n=r.sourceIndex,o=Ze(r,$e);return We({role:"listbox","aria-labelledby":"".concat(i(t.id,n),"-label"),id:"".concat(i(t.id,n),"-list")},o)},getItemProps:function(e){var a=e.item,c=e.source,l=e.sourceIndex,u=Ze(e,Je);return We({id:"".concat(i(t.id,l),"-item-").concat(a.__autocomplete_id),role:"option","aria-selected":n.getState().activeItemId===a.__autocomplete_id,onMouseMove:function(e){if(a.__autocomplete_id!==n.getState().activeItemId){n.dispatch("mousemove",a.__autocomplete_id);var t=ge(n.getState());if(null!==n.getState().activeItemId&&t){var i=t.item,c=t.itemInputValue,l=t.itemUrl,u=t.source;u.onActive(We({event:e,item:i,itemInputValue:c,itemUrl:l,refresh:r,source:u,state:n.getState()},o))}}},onMouseDown:function(e){e.preventDefault()},onClick:function(e){var i=c.getItemInputValue({item:a,state:n.getState()}),l=c.getItemUrl({item:a,state:n.getState()});(l?Promise.resolve():Ce(We({event:e,nextState:{isOpen:!1},props:t,query:i,refresh:r,store:n},o))).then((function(){c.onSelect(We({event:e,item:a,itemInputValue:i,itemUrl:l,refresh:r,source:c,state:n.getState()},o))}))}},u)}}}var Xe=[{segment:"autocomplete-core",version:"1.9.3"}];function Ye(e){return Ye="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},Ye(e)}function et(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function tt(e){for(var t=1;t=r?null===n?null:0:o}function at(e){return at="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},at(e)}function ct(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function lt(e){for(var t=1;te.length)&&(t=e.length);for(var r=0,n=new Array(t);r=0||(o[r]=e[r]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(o[r]=e[r])}return o}function kt(e){var t=e.translations,r=void 0===t?{}:t,n=At(e,Pt),o=r.noResultsText,i=void 0===o?"No results for":o,a=r.suggestedQueryText,c=void 0===a?"Try searching for":a,l=r.reportMissingResultsText,u=void 0===l?"Believe this query should return results?":l,s=r.reportMissingResultsLinkText,f=void 0===s?"Let us know.":s,m=n.state.context.searchSuggestions;return yt.createElement("div",{className:"DocSearch-NoResults"},yt.createElement("div",{className:"DocSearch-Screen-Icon"},yt.createElement(Et,null)),yt.createElement("p",{className:"DocSearch-Title"},i,' "',yt.createElement("strong",null,n.state.query),'"'),m&&m.length>0&&yt.createElement("div",{className:"DocSearch-NoResults-Prefill-List"},yt.createElement("p",{className:"DocSearch-Help"},c,":"),yt.createElement("ul",null,m.slice(0,3).reduce((function(e,t){return[].concat(It(e),[yt.createElement("li",{key:t},yt.createElement("button",{className:"DocSearch-Prefill",key:t,type:"button",onClick:function(){n.setQuery(t.toLowerCase()+" "),n.refresh(),n.inputRef.current.focus()}},t))])}),[]))),n.getMissingResultsUrl&&yt.createElement("p",{className:"DocSearch-Help"},"".concat(u," "),yt.createElement("a",{href:n.getMissingResultsUrl({query:n.state.query}),target:"_blank",rel:"noopener noreferrer"},f)))}var xt=function(){return yt.createElement("svg",{width:"20",height:"20",viewBox:"0 0 20 20"},yt.createElement("path",{d:"M17 6v12c0 .52-.2 1-1 1H4c-.7 0-1-.33-1-1V2c0-.55.42-1 1-1h8l5 5zM14 8h-3.13c-.51 0-.87-.34-.87-.87V4",stroke:"currentColor",fill:"none",fillRule:"evenodd",strokeLinejoin:"round"}))};function Ct(e){switch(e.type){case"lvl1":return yt.createElement(xt,null);case"content":return yt.createElement(Nt,null);default:return yt.createElement(_t,null)}}function _t(){return yt.createElement("svg",{width:"20",height:"20",viewBox:"0 0 20 20"},yt.createElement("path",{d:"M13 13h4-4V8H7v5h6v4-4H7V8H3h4V3v5h6V3v5h4-4v5zm-6 0v4-4H3h4z",stroke:"currentColor",fill:"none",fillRule:"evenodd",strokeLinecap:"round",strokeLinejoin:"round"}))}function Nt(){return yt.createElement("svg",{width:"20",height:"20",viewBox:"0 0 20 20"},yt.createElement("path",{d:"M17 5H3h14zm0 5H3h14zm0 5H3h14z",stroke:"currentColor",fill:"none",fillRule:"evenodd",strokeLinejoin:"round"}))}function Tt(){return yt.createElement("svg",{className:"DocSearch-Hit-Select-Icon",width:"20",height:"20",viewBox:"0 0 20 20"},yt.createElement("g",{stroke:"currentColor",fill:"none",fillRule:"evenodd",strokeLinecap:"round",strokeLinejoin:"round"},yt.createElement("path",{d:"M18 3v4c0 2-2 4-4 4H2"}),yt.createElement("path",{d:"M8 17l-6-6 6-6"})))}var qt=["hit","attribute","tagName"];function Rt(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function Lt(e){for(var t=1;t=0||(o[r]=e[r]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(o[r]=e[r])}return o}function Ft(e,t){return t.split(".").reduce((function(e,t){return null!=e&&e[t]?e[t]:null}),e)}function Ut(e){var t=e.hit,r=e.attribute,n=e.tagName,o=void 0===n?"span":n,i=Ht(e,qt);return(0,yt.createElement)(o,Lt(Lt({},i),{},{dangerouslySetInnerHTML:{__html:Ft(t,"_snippetResult.".concat(r,".value"))||Ft(t,r)}}))}function Bt(e,t){return function(e){if(Array.isArray(e))return e}(e)||function(e,t){var r=null==e?null:"undefined"!=typeof Symbol&&e[Symbol.iterator]||e["@@iterator"];if(null==r)return;var n,o,i=[],a=!0,c=!1;try{for(r=r.call(e);!(a=(n=r.next()).done)&&(i.push(n.value),!t||i.length!==t);a=!0);}catch(l){c=!0,o=l}finally{try{a||null==r.return||r.return()}finally{if(c)throw o}}return i}(e,t)||function(e,t){if(!e)return;if("string"==typeof e)return Vt(e,t);var r=Object.prototype.toString.call(e).slice(8,-1);"Object"===r&&e.constructor&&(r=e.constructor.name);if("Map"===r||"Set"===r)return Array.from(e);if("Arguments"===r||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(r))return Vt(e,t)}(e,t)||function(){throw new TypeError("Invalid attempt to destructure non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.")}()}function Vt(e,t){(null==t||t>e.length)&&(t=e.length);for(var r=0,n=new Array(t);r|<\/mark>)/g,Wt=RegExp(zt.source);function Qt(e){var t,r,n=e;if(!n.__docsearch_parent&&!e._highlightResult)return e.hierarchy.lvl0;var o=((n.__docsearch_parent?null===(t=n.__docsearch_parent)||void 0===t||null===(t=t._highlightResult)||void 0===t||null===(t=t.hierarchy)||void 0===t?void 0:t.lvl0:null===(r=e._highlightResult)||void 0===r||null===(r=r.hierarchy)||void 0===r?void 0:r.lvl0)||{}).value;return o&&Wt.test(o)?o.replace(zt,""):o}function Zt(){return Zt=Object.assign||function(e){for(var t=1;t=0||(o[r]=e[r]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(o[r]=e[r])}return o}function or(e){var t=e.translations,r=void 0===t?{}:t,n=nr(e,tr),o=r.recentSearchesTitle,i=void 0===o?"Recent":o,a=r.noRecentSearchesText,c=void 0===a?"No recent searches":a,l=r.saveRecentSearchButtonTitle,u=void 0===l?"Save this search":l,s=r.removeRecentSearchButtonTitle,f=void 0===s?"Remove this search from history":s,m=r.favoriteSearchesTitle,p=void 0===m?"Favorite":m,v=r.removeFavoriteSearchButtonTitle,d=void 0===v?"Remove this search from favorites":v;return"idle"===n.state.status&&!1===n.hasCollections?n.disableUserPersonalization?null:yt.createElement("div",{className:"DocSearch-StartScreen"},yt.createElement("p",{className:"DocSearch-Help"},c)):!1===n.hasCollections?null:yt.createElement("div",{className:"DocSearch-Dropdown-Container"},yt.createElement($t,rr({},n,{title:i,collection:n.state.collections[0],renderIcon:function(){return yt.createElement("div",{className:"DocSearch-Hit-icon"},yt.createElement(Xt,null))},renderAction:function(e){var t=e.item,r=e.runFavoriteTransition,o=e.runDeleteTransition;return yt.createElement(yt.Fragment,null,yt.createElement("div",{className:"DocSearch-Hit-action"},yt.createElement("button",{className:"DocSearch-Hit-action-button",title:u,type:"submit",onClick:function(e){e.preventDefault(),e.stopPropagation(),r((function(){n.favoriteSearches.add(t),n.recentSearches.remove(t),n.refresh()}))}},yt.createElement(Yt,null))),yt.createElement("div",{className:"DocSearch-Hit-action"},yt.createElement("button",{className:"DocSearch-Hit-action-button",title:f,type:"submit",onClick:function(e){e.preventDefault(),e.stopPropagation(),o((function(){n.recentSearches.remove(t),n.refresh()}))}},yt.createElement(er,null))))}})),yt.createElement($t,rr({},n,{title:p,collection:n.state.collections[1],renderIcon:function(){return yt.createElement("div",{className:"DocSearch-Hit-icon"},yt.createElement(Yt,null))},renderAction:function(e){var t=e.item,r=e.runDeleteTransition;return yt.createElement("div",{className:"DocSearch-Hit-action"},yt.createElement("button",{className:"DocSearch-Hit-action-button",title:d,type:"submit",onClick:function(e){e.preventDefault(),e.stopPropagation(),r((function(){n.favoriteSearches.remove(t),n.refresh()}))}},yt.createElement(er,null)))}})))}var ir=["translations"];function ar(){return ar=Object.assign||function(e){for(var t=1;t=0||(o[r]=e[r]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(o[r]=e[r])}return o}var lr=yt.memo((function(e){var t=e.translations,r=void 0===t?{}:t,n=cr(e,ir);if("error"===n.state.status)return yt.createElement(wt,{translations:null==r?void 0:r.errorScreen});var o=n.state.collections.some((function(e){return e.items.length>0}));return n.state.query?!1===o?yt.createElement(kt,ar({},n,{translations:null==r?void 0:r.noResultsScreen})):yt.createElement(Gt,n):yt.createElement(or,ar({},n,{hasCollections:o,translations:null==r?void 0:r.startScreen}))}),(function(e,t){return"loading"===t.state.status||"stalled"===t.state.status}));function ur(){return yt.createElement("svg",{viewBox:"0 0 38 38",stroke:"currentColor",strokeOpacity:".5"},yt.createElement("g",{fill:"none",fillRule:"evenodd"},yt.createElement("g",{transform:"translate(1 1)",strokeWidth:"2"},yt.createElement("circle",{strokeOpacity:".3",cx:"18",cy:"18",r:"18"}),yt.createElement("path",{d:"M36 18c0-9.94-8.06-18-18-18"},yt.createElement("animateTransform",{attributeName:"transform",type:"rotate",from:"0 18 18",to:"360 18 18",dur:"1s",repeatCount:"indefinite"})))))}var sr=r(830),fr=["translations"];function mr(){return mr=Object.assign||function(e){for(var t=1;t=0||(o[r]=e[r]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(o[r]=e[r])}return o}function vr(e){var t=e.translations,r=void 0===t?{}:t,n=pr(e,fr),o=r.resetButtonTitle,i=void 0===o?"Clear the query":o,a=r.resetButtonAriaLabel,c=void 0===a?"Clear the query":a,l=r.cancelButtonText,u=void 0===l?"Cancel":l,s=r.cancelButtonAriaLabel,f=void 0===s?"Cancel":s,m=n.getFormProps({inputElement:n.inputRef.current}).onReset;return yt.useEffect((function(){n.autoFocus&&n.inputRef.current&&n.inputRef.current.focus()}),[n.autoFocus,n.inputRef]),yt.useEffect((function(){n.isFromSelection&&n.inputRef.current&&n.inputRef.current.select()}),[n.isFromSelection,n.inputRef]),yt.createElement(yt.Fragment,null,yt.createElement("form",{className:"DocSearch-Form",onSubmit:function(e){e.preventDefault()},onReset:m},yt.createElement("label",mr({className:"DocSearch-MagnifierLabel"},n.getLabelProps()),yt.createElement(sr.W,null)),yt.createElement("div",{className:"DocSearch-LoadingIndicator"},yt.createElement(ur,null)),yt.createElement("input",mr({className:"DocSearch-Input",ref:n.inputRef},n.getInputProps({inputElement:n.inputRef.current,autoFocus:n.autoFocus,maxLength:ht}))),yt.createElement("button",{type:"reset",title:i,className:"DocSearch-Reset","aria-label":c,hidden:!n.state.query},yt.createElement(er,null))),yt.createElement("button",{className:"DocSearch-Cancel",type:"reset","aria-label":f,onClick:n.onClose},u))}var dr=["_highlightResult","_snippetResult"];function yr(e,t){if(null==e)return{};var r,n,o=function(e,t){if(null==e)return{};var r,n,o={},i=Object.keys(e);for(n=0;n=0||(o[r]=e[r]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(o[r]=e[r])}return o}function hr(e){return!1===function(){var e="__TEST_KEY__";try{return localStorage.setItem(e,""),localStorage.removeItem(e),!0}catch(t){return!1}}()?{setItem:function(){},getItem:function(){return[]}}:{setItem:function(t){return window.localStorage.setItem(e,JSON.stringify(t))},getItem:function(){var t=window.localStorage.getItem(e);return t?JSON.parse(t):[]}}}function br(e){var t=e.key,r=e.limit,n=void 0===r?5:r,o=hr(t),i=o.getItem().slice(0,n);return{add:function(e){var t=e,r=(t._highlightResult,t._snippetResult,yr(t,dr)),a=i.findIndex((function(e){return e.objectID===r.objectID}));a>-1&&i.splice(a,1),i.unshift(r),i=i.slice(0,n),o.setItem(i)},remove:function(e){i=i.filter((function(t){return t.objectID!==e.objectID})),o.setItem(i)},getAll:function(){return i}}}function gr(e){const t=`algoliasearch-client-js-${e.key}`;let r;const n=()=>(void 0===r&&(r=e.localStorage||window.localStorage),r),o=()=>JSON.parse(n().getItem(t)||"{}"),i=e=>{n().setItem(t,JSON.stringify(e))};return{get:(t,r,n={miss:()=>Promise.resolve()})=>Promise.resolve().then((()=>{(()=>{const t=e.timeToLive?1e3*e.timeToLive:null,r=o(),n=Object.fromEntries(Object.entries(r).filter((([,e])=>void 0!==e.timestamp)));if(i(n),!t)return;const a=Object.fromEntries(Object.entries(n).filter((([,e])=>{const r=(new Date).getTime();return!(e.timestamp+tPromise.all([e?e.value:r(),void 0!==e]))).then((([e,t])=>Promise.all([e,t||n.miss(e)]))).then((([e])=>e)),set:(e,r)=>Promise.resolve().then((()=>{const i=o();return i[JSON.stringify(e)]={timestamp:(new Date).getTime(),value:r},n().setItem(t,JSON.stringify(i)),r})),delete:e=>Promise.resolve().then((()=>{const r=o();delete r[JSON.stringify(e)],n().setItem(t,JSON.stringify(r))})),clear:()=>Promise.resolve().then((()=>{n().removeItem(t)}))}}function Or(e){const t=[...e.caches],r=t.shift();return void 0===r?{get:(e,t,r={miss:()=>Promise.resolve()})=>t().then((e=>Promise.all([e,r.miss(e)]))).then((([e])=>e)),set:(e,t)=>Promise.resolve(t),delete:e=>Promise.resolve(),clear:()=>Promise.resolve()}:{get:(e,n,o={miss:()=>Promise.resolve()})=>r.get(e,n,o).catch((()=>Or({caches:t}).get(e,n,o))),set:(e,n)=>r.set(e,n).catch((()=>Or({caches:t}).set(e,n))),delete:e=>r.delete(e).catch((()=>Or({caches:t}).delete(e))),clear:()=>r.clear().catch((()=>Or({caches:t}).clear()))}}function Sr(e={serializable:!0}){let t={};return{get(r,n,o={miss:()=>Promise.resolve()}){const i=JSON.stringify(r);if(i in t)return Promise.resolve(e.serializable?JSON.parse(t[i]):t[i]);const a=n(),c=o&&o.miss||(()=>Promise.resolve());return a.then((e=>c(e))).then((()=>a))},set:(r,n)=>(t[JSON.stringify(r)]=e.serializable?JSON.stringify(n):n,Promise.resolve(n)),delete:e=>(delete t[JSON.stringify(e)],Promise.resolve()),clear:()=>(t={},Promise.resolve())}}function jr(e){let t=e.length-1;for(;t>0;t--){const r=Math.floor(Math.random()*(t+1)),n=e[t];e[t]=e[r],e[r]=n}return e}function wr(e,t){return t?(Object.keys(t).forEach((r=>{e[r]=t[r](e)})),e):e}function Er(e,...t){let r=0;return e.replace(/%s/g,(()=>encodeURIComponent(t[r++])))}const Pr="4.20.0",Ir={WithinQueryParameters:0,WithinHeaders:1};function Dr(e,t){const r=e||{},n=r.data||{};return Object.keys(r).forEach((e=>{-1===["timeout","headers","queryParameters","data","cacheable"].indexOf(e)&&(n[e]=r[e])})),{data:Object.entries(n).length>0?n:void 0,timeout:r.timeout||t,headers:r.headers||{},queryParameters:r.queryParameters||{},cacheable:r.cacheable}}const Ar={Read:1,Write:2,Any:3},kr={Up:1,Down:2,Timeouted:3},xr=12e4;function Cr(e,t=kr.Up){return{...e,status:t,lastUpdate:Date.now()}}function _r(e){return"string"==typeof e?{protocol:"https",url:e,accept:Ar.Any}:{protocol:e.protocol||"https",url:e.url,accept:e.accept||Ar.Any}}const Nr={Delete:"DELETE",Get:"GET",Post:"POST",Put:"PUT"};function Tr(e,t){return Promise.all(t.map((t=>e.get(t,(()=>Promise.resolve(Cr(t))))))).then((e=>{const r=e.filter((e=>function(e){return e.status===kr.Up||Date.now()-e.lastUpdate>xr}(e))),n=e.filter((e=>function(e){return e.status===kr.Timeouted&&Date.now()-e.lastUpdate<=xr}(e))),o=[...r,...n];return{getTimeout:(e,t)=>(0===n.length&&0===e?1:n.length+3+e)*t,statelessHosts:o.length>0?o.map((e=>_r(e))):t}}))}const qr=(e,t)=>(e=>{const t=e.status;return e.isTimedOut||(({isTimedOut:e,status:t})=>!e&&0==~~t)(e)||2!=~~(t/100)&&4!=~~(t/100)})(e)?t.onRetry(e):(({status:e})=>2==~~(e/100))(e)?t.onSuccess(e):t.onFail(e);function Rr(e,t,r,n){const o=[],i=function(e,t){if(e.method===Nr.Get||void 0===e.data&&void 0===t.data)return;const r=Array.isArray(e.data)?e.data:{...e.data,...t.data};return JSON.stringify(r)}(r,n),a=function(e,t){const r={...e.headers,...t.headers},n={};return Object.keys(r).forEach((e=>{const t=r[e];n[e.toLowerCase()]=t})),n}(e,n),c=r.method,l=r.method!==Nr.Get?{}:{...r.data,...n.data},u={"x-algolia-agent":e.userAgent.value,...e.queryParameters,...l,...n.queryParameters};let s=0;const f=(t,l)=>{const m=t.pop();if(void 0===m)throw{name:"RetryError",message:"Unreachable hosts - your application id may be incorrect. If the error persists, contact support@algolia.com.",transporterStackTrace:Fr(o)};const p={data:i,headers:a,method:c,url:Mr(m,r.path,u),connectTimeout:l(s,e.timeouts.connect),responseTimeout:l(s,n.timeout)},v=e=>{const r={request:p,response:e,host:m,triesLeft:t.length};return o.push(r),r},d={onSuccess:e=>function(e){try{return JSON.parse(e.content)}catch(t){throw function(e,t){return{name:"DeserializationError",message:e,response:t}}(t.message,e)}}(e),onRetry(r){const n=v(r);return r.isTimedOut&&s++,Promise.all([e.logger.info("Retryable failure",Ur(n)),e.hostsCache.set(m,Cr(m,r.isTimedOut?kr.Timeouted:kr.Down))]).then((()=>f(t,l)))},onFail(e){throw v(e),function({content:e,status:t},r){let n=e;try{n=JSON.parse(e).message}catch(o){}return function(e,t,r){return{name:"ApiError",message:e,status:t,transporterStackTrace:r}}(n,t,r)}(e,Fr(o))}};return e.requester.send(p).then((e=>qr(e,d)))};return Tr(e.hostsCache,t).then((e=>f([...e.statelessHosts].reverse(),e.getTimeout)))}function Lr(e){const t={value:`Algolia for JavaScript (${e})`,add(e){const r=`; ${e.segment}${void 0!==e.version?` (${e.version})`:""}`;return-1===t.value.indexOf(r)&&(t.value=`${t.value}${r}`),t}};return t}function Mr(e,t,r){const n=Hr(r);let o=`${e.protocol}://${e.url}/${"/"===t.charAt(0)?t.substr(1):t}`;return n.length&&(o+=`?${n}`),o}function Hr(e){return Object.keys(e).map((t=>{return Er("%s=%s",t,(r=e[t],"[object Object]"===Object.prototype.toString.call(r)||"[object Array]"===Object.prototype.toString.call(r)?JSON.stringify(e[t]):e[t]));var r})).join("&")}function Fr(e){return e.map((e=>Ur(e)))}function Ur(e){const t=e.request.headers["x-algolia-api-key"]?{"x-algolia-api-key":"*****"}:{};return{...e,request:{...e.request,headers:{...e.request.headers,...t}}}}const Br=e=>{const t=e.appId,r=function(e,t,r){const n={"x-algolia-api-key":r,"x-algolia-application-id":t};return{headers:()=>e===Ir.WithinHeaders?n:{},queryParameters:()=>e===Ir.WithinQueryParameters?n:{}}}(void 0!==e.authMode?e.authMode:Ir.WithinHeaders,t,e.apiKey),n=function(e){const{hostsCache:t,logger:r,requester:n,requestsCache:o,responsesCache:i,timeouts:a,userAgent:c,hosts:l,queryParameters:u,headers:s}=e,f={hostsCache:t,logger:r,requester:n,requestsCache:o,responsesCache:i,timeouts:a,userAgent:c,headers:s,queryParameters:u,hosts:l.map((e=>_r(e))),read(e,t){const r=Dr(t,f.timeouts.read),n=()=>Rr(f,f.hosts.filter((e=>0!=(e.accept&Ar.Read))),e,r);if(!0!==(void 0!==r.cacheable?r.cacheable:e.cacheable))return n();const o={request:e,mappedRequestOptions:r,transporter:{queryParameters:f.queryParameters,headers:f.headers}};return f.responsesCache.get(o,(()=>f.requestsCache.get(o,(()=>f.requestsCache.set(o,n()).then((e=>Promise.all([f.requestsCache.delete(o),e])),(e=>Promise.all([f.requestsCache.delete(o),Promise.reject(e)]))).then((([e,t])=>t))))),{miss:e=>f.responsesCache.set(o,e)})},write:(e,t)=>Rr(f,f.hosts.filter((e=>0!=(e.accept&Ar.Write))),e,Dr(t,f.timeouts.write))};return f}({hosts:[{url:`${t}-dsn.algolia.net`,accept:Ar.Read},{url:`${t}.algolia.net`,accept:Ar.Write}].concat(jr([{url:`${t}-1.algolianet.com`},{url:`${t}-2.algolianet.com`},{url:`${t}-3.algolianet.com`}])),...e,headers:{...r.headers(),"content-type":"application/x-www-form-urlencoded",...e.headers},queryParameters:{...r.queryParameters(),...e.queryParameters}}),o={transporter:n,appId:t,addAlgoliaAgent(e,t){n.userAgent.add({segment:e,version:t})},clearCache:()=>Promise.all([n.requestsCache.clear(),n.responsesCache.clear()]).then((()=>{}))};return wr(o,e.methods)},Vr=e=>(t,r)=>t.method===Nr.Get?e.transporter.read(t,r):e.transporter.write(t,r),Kr=e=>(t,r={})=>wr({transporter:e.transporter,appId:e.appId,indexName:t},r.methods),$r=e=>(t,r)=>{const n=t.map((e=>({...e,params:Hr(e.params||{})})));return e.transporter.read({method:Nr.Post,path:"1/indexes/*/queries",data:{requests:n},cacheable:!0},r)},Jr=e=>(t,r)=>Promise.all(t.map((t=>{const{facetName:n,facetQuery:o,...i}=t.params;return Kr(e)(t.indexName,{methods:{searchForFacetValues:Qr}}).searchForFacetValues(n,o,{...r,...i})}))),zr=e=>(t,r,n)=>e.transporter.read({method:Nr.Post,path:Er("1/answers/%s/prediction",e.indexName),data:{query:t,queryLanguages:r},cacheable:!0},n),Wr=e=>(t,r)=>e.transporter.read({method:Nr.Post,path:Er("1/indexes/%s/query",e.indexName),data:{query:t},cacheable:!0},r),Qr=e=>(t,r,n)=>e.transporter.read({method:Nr.Post,path:Er("1/indexes/%s/facets/%s/query",e.indexName,t),data:{facetQuery:r},cacheable:!0},n),Zr={Debug:1,Info:2,Error:3};function Gr(e,t,r){const n={appId:e,apiKey:t,timeouts:{connect:1,read:2,write:30},requester:{send:e=>new Promise((t=>{const r=new XMLHttpRequest;r.open(e.method,e.url,!0),Object.keys(e.headers).forEach((t=>r.setRequestHeader(t,e.headers[t])));const n=(e,n)=>setTimeout((()=>{r.abort(),t({status:0,content:n,isTimedOut:!0})}),1e3*e),o=n(e.connectTimeout,"Connection timeout");let i;r.onreadystatechange=()=>{r.readyState>r.OPENED&&void 0===i&&(clearTimeout(o),i=n(e.responseTimeout,"Socket timeout"))},r.onerror=()=>{0===r.status&&(clearTimeout(o),clearTimeout(i),t({content:r.responseText||"Network request failed",status:r.status,isTimedOut:!1}))},r.onload=()=>{clearTimeout(o),clearTimeout(i),t({content:r.responseText,status:r.status,isTimedOut:!1})},r.send(e.data)}))},logger:(o=Zr.Error,{debug:(e,t)=>(Zr.Debug>=o&&console.debug(e,t),Promise.resolve()),info:(e,t)=>(Zr.Info>=o&&console.info(e,t),Promise.resolve()),error:(e,t)=>(console.error(e,t),Promise.resolve())}),responsesCache:Sr(),requestsCache:Sr({serializable:!1}),hostsCache:Or({caches:[gr({key:`${Pr}-${e}`}),Sr()]}),userAgent:Lr(Pr).add({segment:"Browser",version:"lite"}),authMode:Ir.WithinQueryParameters};var o;return Br({...n,...r,methods:{search:$r,searchForFacetValues:Jr,multipleQueries:$r,multipleSearchForFacetValues:Jr,customRequest:Vr,initIndex:e=>t=>Kr(e)(t,{methods:{search:Wr,searchForFacetValues:Qr,findAnswers:zr}})}})}Gr.version=Pr;const Xr=Gr;var Yr="3.5.2";function en(){}function tn(e){return e}function rn(e){return 1===e.button||e.altKey||e.ctrlKey||e.metaKey||e.shiftKey}function nn(e,t,r){return e.reduce((function(e,n){var o=t(n);return e.hasOwnProperty(o)||(e[o]=[]),e[o].length<(r||5)&&e[o].push(n),e}),{})}var on=["footer","searchBox"];function an(){return an=Object.assign||function(e){for(var t=1;te.length)&&(t=e.length);for(var r=0,n=new Array(t);r=0||(o[r]=e[r]);return o}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(o[r]=e[r])}return o}function pn(e){var t=e.appId,r=e.apiKey,n=e.indexName,o=e.placeholder,i=void 0===o?"Search docs":o,a=e.searchParameters,c=e.maxResultsPerGroup,l=e.onClose,u=void 0===l?en:l,s=e.transformItems,f=void 0===s?tn:s,m=e.hitComponent,p=void 0===m?St:m,v=e.resultsFooterComponent,d=void 0===v?function(){return null}:v,y=e.navigator,h=e.initialScrollY,b=void 0===h?0:h,g=e.transformSearchClient,O=void 0===g?tn:g,S=e.disableUserPersonalization,j=void 0!==S&&S,w=e.initialQuery,E=void 0===w?"":w,P=e.translations,I=void 0===P?{}:P,D=e.getMissingResultsUrl,A=e.insights,k=void 0!==A&&A,x=I.footer,C=I.searchBox,_=mn(I,on),N=sn(yt.useState({query:"",collections:[],completion:null,context:{},isOpen:!1,activeItemId:null,status:"idle"}),2),T=N[0],q=N[1],R=yt.useRef(null),L=yt.useRef(null),M=yt.useRef(null),H=yt.useRef(null),F=yt.useRef(null),U=yt.useRef(10),B=yt.useRef("undefined"!=typeof window?window.getSelection().toString().slice(0,ht):"").current,V=yt.useRef(E||B).current,K=function(e,t,r){return yt.useMemo((function(){var n=Xr(e,t);return n.addAlgoliaAgent("docsearch",Yr),!1===/docsearch.js \(.*\)/.test(n.transporter.userAgent.value)&&n.addAlgoliaAgent("docsearch-react",Yr),r(n)}),[e,t,r])}(t,r,O),$=yt.useRef(br({key:"__DOCSEARCH_FAVORITE_SEARCHES__".concat(n),limit:10})).current,J=yt.useRef(br({key:"__DOCSEARCH_RECENT_SEARCHES__".concat(n),limit:0===$.getAll().length?7:4})).current,z=yt.useCallback((function(e){if(!j){var t="content"===e.type?e.__docsearch_parent:e;t&&-1===$.getAll().findIndex((function(e){return e.objectID===t.objectID}))&&J.add(t)}}),[$,J,j]),W=yt.useCallback((function(e){if(T.context.algoliaInsightsPlugin&&e.__autocomplete_id){var t=e,r={eventName:"Item Selected",index:t.__autocomplete_indexName,items:[t],positions:[e.__autocomplete_id],queryID:t.__autocomplete_queryID};T.context.algoliaInsightsPlugin.insights.clickedObjectIDsAfterSearch(r)}}),[T.context.algoliaInsightsPlugin]),Q=yt.useMemo((function(){return dt({id:"docsearch",defaultActiveItemId:0,placeholder:i,openOnFocus:!0,initialState:{query:V,context:{searchSuggestions:[]}},insights:k,navigator:y,onStateChange:function(e){q(e.state)},getSources:function(e){var o=e.query,i=e.state,l=e.setContext,s=e.setStatus;if(!o)return j?[]:[{sourceId:"recentSearches",onSelect:function(e){var t=e.item,r=e.event;z(t),rn(r)||u()},getItemUrl:function(e){return e.item.url},getItems:function(){return J.getAll()}},{sourceId:"favoriteSearches",onSelect:function(e){var t=e.item,r=e.event;z(t),rn(r)||u()},getItemUrl:function(e){return e.item.url},getItems:function(){return $.getAll()}}];var m=Boolean(k);return K.search([{query:o,indexName:n,params:ln({attributesToRetrieve:["hierarchy.lvl0","hierarchy.lvl1","hierarchy.lvl2","hierarchy.lvl3","hierarchy.lvl4","hierarchy.lvl5","hierarchy.lvl6","content","type","url"],attributesToSnippet:["hierarchy.lvl1:".concat(U.current),"hierarchy.lvl2:".concat(U.current),"hierarchy.lvl3:".concat(U.current),"hierarchy.lvl4:".concat(U.current),"hierarchy.lvl5:".concat(U.current),"hierarchy.lvl6:".concat(U.current),"content:".concat(U.current)],snippetEllipsisText:"\u2026",highlightPreTag:"",highlightPostTag:"",hitsPerPage:20,clickAnalytics:m},a)}]).catch((function(e){throw"RetryError"===e.name&&s("error"),e})).then((function(e){var o=e.results[0],a=o.hits,s=o.nbHits,p=nn(a,(function(e){return Qt(e)}),c);i.context.searchSuggestions.length0&&(X(),F.current&&F.current.focus())}),[V,X]),yt.useEffect((function(){function e(){if(L.current){var e=.01*window.innerHeight;L.current.style.setProperty("--docsearch-vh","".concat(e,"px"))}}return e(),window.addEventListener("resize",e),function(){window.removeEventListener("resize",e)}}),[]),yt.createElement("div",an({ref:R},G({"aria-expanded":!0}),{className:["DocSearch","DocSearch-Container","stalled"===T.status&&"DocSearch-Container--Stalled","error"===T.status&&"DocSearch-Container--Errored"].filter(Boolean).join(" "),role:"button",tabIndex:0,onMouseDown:function(e){e.target===e.currentTarget&&u()}}),yt.createElement("div",{className:"DocSearch-Modal",ref:L},yt.createElement("header",{className:"DocSearch-SearchBar",ref:M},yt.createElement(vr,an({},Q,{state:T,autoFocus:0===V.length,inputRef:F,isFromSelection:Boolean(V)&&V===B,translations:C,onClose:u}))),yt.createElement("div",{className:"DocSearch-Dropdown",ref:H},yt.createElement(lr,an({},Q,{indexName:n,state:T,hitComponent:p,resultsFooterComponent:d,disableUserPersonalization:j,recentSearches:J,favoriteSearches:$,inputRef:F,translations:_,getMissingResultsUrl:D,onItemClick:function(e,t){W(e),z(e),rn(t)||u()}}))),yt.createElement("footer",{className:"DocSearch-Footer"},yt.createElement(Ot,{translations:x}))))}}}]); \ No newline at end of file diff --git a/assets/js/14421916.f0705482.js b/assets/js/14421916.f0705482.js deleted file mode 100644 index 487636f3..00000000 --- a/assets/js/14421916.f0705482.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[635],{3905:(e,n,t)=>{t.d(n,{Zo:()=>u,kt:()=>h});var a=t(67294);function i(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function o(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function r(e){for(var n=1;n=0||(i[t]=e[t]);return i}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(i[t]=e[t])}return i}var s=a.createContext({}),c=function(e){var n=a.useContext(s),t=n;return e&&(t="function"==typeof e?e(n):r(r({},n),e)),t},u=function(e){var n=c(e.components);return a.createElement(s.Provider,{value:n},e.children)},p="mdxType",d={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},m=a.forwardRef((function(e,n){var t=e.components,i=e.mdxType,o=e.originalType,s=e.parentName,u=l(e,["components","mdxType","originalType","parentName"]),p=c(t),m=i,h=p["".concat(s,".").concat(m)]||p[m]||d[m]||o;return t?a.createElement(h,r(r({ref:n},u),{},{components:t})):a.createElement(h,r({ref:n},u))}));function h(e,n){var t=arguments,i=n&&n.mdxType;if("string"==typeof e||i){var o=t.length,r=new Array(o);r[0]=m;var l={};for(var s in n)hasOwnProperty.call(n,s)&&(l[s]=n[s]);l.originalType=e,l[p]="string"==typeof e?e:i,r[1]=l;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>r,default:()=>p,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=t(87462),i=(t(67294),t(3905));const o={title:"Annotating COVID-19"},r=void 0,l={unversionedId:"introduction/covid19",id:"version-3.14/introduction/covid19",title:"Annotating COVID-19",description:"The Nirvana development team is mainly focused on providing annotations for the human genome. This focus allows us to maximize our resources towards understanding human health.",source:"@site/versioned_docs/version-3.14/introduction/covid19.md",sourceDirName:"introduction",slug:"/introduction/covid19",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/introduction/covid19",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/introduction/covid19.md",tags:[],version:"3.14",frontMatter:{title:"Annotating COVID-19"},sidebar:"version-3.14/docs",previous:{title:"Getting Started",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/introduction/getting-started"},next:{title:"1000 Genomes",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/1000Genomes"}},s=[{value:"Getting Nirvana",id:"getting-nirvana",children:[],level:2},{value:"Downloading the COVID-19 data files",id:"downloading-the-covid-19-data-files",children:[],level:2},{value:"Download a COVID-19 VCF file",id:"download-a-covid-19-vcf-file",children:[],level:2},{value:"Running Nirvana",id:"running-nirvana",children:[],level:2},{value:"Investigating the Results",id:"investigating-the-results",children:[],level:2}],c={toc:s},u="wrapper";function p(e){let{components:n,...t}=e;return(0,i.kt)(u,(0,a.Z)({},c,t,{components:n,mdxType:"MDXLayout"}),(0,i.kt)("p",null,"The Nirvana development team is mainly focused on providing annotations for the human genome. This focus allows us to maximize our resources towards understanding human health."),(0,i.kt)("p",null,"However, nothing in our architecture prevents us from supporting other genomes. Earlier this year, we had an opportunity to put that statement to the test - we added support for annotating the ",(0,i.kt)("strong",{parentName:"p"},"SARS-CoV-2")," genome, the virus that causes the ",(0,i.kt)("strong",{parentName:"p"},"COVID-19")," disease."),(0,i.kt)("p",null,"In addition to normal transcript annotation, we also supply:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"allele frequencies"),(0,i.kt)("li",{parentName:"ul"},"protein domains")),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"SARS-CoV-2 Galaxy Project")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The allele frequencies used by Nirvana were provided by the ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/galaxyproject/SARS-CoV-2"},"SARS-CoV-2 Galaxy Project"),". This is an international effort that provides ongoing analysis of COVID-19 using Galaxy, BioConda, and public research infrastructures."))),(0,i.kt)("h2",{id:"getting-nirvana"},"Getting Nirvana"),(0,i.kt)("p",null,"If you don't have Nirvana already, please consult our ",(0,i.kt)("a",{parentName:"p",href:"getting-started"},"Getting Started")," page first."),(0,i.kt)("h2",{id:"downloading-the-covid-19-data-files"},"Downloading the COVID-19 data files"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/Covid19Data.zip"},"a data zip file")," containing new gene models, reference, and external data sources for SARS-CoV-2:"),(0,i.kt)("p",null,"Just go to the directory that contains your Nirvana ",(0,i.kt)("inlineCode",{parentName:"p"},"Data")," directory."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"cd ~/Nirvana\ncurl -O https://illumina.github.io/NirvanaDocumentation/files/Covid19Data.zip\nunzip Covid19Data.zip\n")),(0,i.kt)("h2",{id:"download-a-covid-19-vcf-file"},"Download a COVID-19 VCF file"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/Covid19Mutations.vcf.gz"},"a COVID-19 VCF file")," you can play around with:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"curl -O https://illumina.github.io/NirvanaDocumentation/files/Covid19Mutations.vcf.gz\n")),(0,i.kt)("h2",{id:"running-nirvana"},"Running Nirvana"),(0,i.kt)("p",null,"Once you have downloaded the data sets, use the following command to annotate your VCF:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/Nirvana.dll \\\n -c Data/Cache/SARS-CoV-2/SARS-CoV-2 \\\n --sd Data/SupplementaryAnnotation/SARS-CoV-2 \\\n -r Data/References/SARS-CoV-2.ASM985889v3.dat \\\n -i Covid19Mutations.vcf.gz \\\n -o Covid19Mutations\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-c")," argument specifies the cache prefix"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"--sd")," argument specifies the supplementary annotation directory"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-r")," argument specifies the compressed reference path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input VCF path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output filename prefix")),(0,i.kt)("p",null,"When running Nirvana, performance metrics are shown as it evaluates each chromosome in the input VCF file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"---------------------------------------------------------------------------\nNirvana (c) 2020 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0\n---------------------------------------------------------------------------\n\nInitialization Time Positions/s\n---------------------------------------------------------------------------\nCache 00:00:00.0\nSA Position Scan 00:00:00.0 1763\n\nReference Preload Annotation Variants/s\n---------------------------------------------------------------------------\nNC_045512 00:00:00.0 00:00:00.1 173\n\nSummary Time Percent\n---------------------------------------------------------------------------\nInitialization 00:00:00.0 2.0 %\nPreload 00:00:00.0 0.3 %\nAnnotation 00:00:00.1 6.0 %\n\nTime: 00:00:01.5\n")),(0,i.kt)("p",null,"The output will be a JSON file called ",(0,i.kt)("inlineCode",{parentName:"p"},"Covid19Mutations.json.gz"),". Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/Covid19Mutations.json.gz"},"the full JSON file"),"."),(0,i.kt)("h2",{id:"investigating-the-results"},"Investigating the Results"),(0,i.kt)("p",null,"Here's an example of what a COVID-19 variant looks like in the JSON output:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{\n "chromosome":"NC_045512.2",\n "position":27323,\n "refAllele":"C",\n "altAlleles":[\n "T"\n ],\n "filters":[\n "PASS"\n ],\n "proteinDomains":[\n {\n "start":27202,\n "end":27384,\n "proteinId":"YP_009724394.1",\n "domainId":"cl13556",\n "domainName":"Sars6 super family",\n "reciprocalOverlap":0.00546,\n "annotationOverlap":0.00546\n }\n ],\n "variants":[\n {\n "vid":"NC_045512.2-27323-C-T",\n "chromosome":"NC_045512.2",\n "begin":27323,\n "end":27323,\n "refAllele":"C",\n "altAllele":"T",\n "variantType":"SNV",\n "hgvsg":"NC_045512.2:g.27323C>T",\n "alleleFrequency":{\n "refAllele":"C",\n "altAllele":"T",\n "allAc":8,\n "allAn":1058,\n "allAf":0.007561\n },\n "transcripts":[\n {\n "transcript":"YP_009724394.1",\n "source":"RefSeq",\n "bioType":"protein_coding",\n "codons":"tCt/tTt",\n "aminoAcids":"S/F",\n "cdnaPos":"122",\n "cdsPos":"122",\n "exons":"1/1",\n "proteinPos":"41",\n "geneId":"43740572",\n "hgnc":"ORF6",\n "consequence":[\n "missense_variant"\n ],\n "hgvsc":"YP_009724394.1:c.122C>T",\n "hgvsp":"YP_009724394.1:p.(Ser41Phe)",\n "proteinId":"YP_009724394.1"\n },\n {\n "transcript":"YP_009724395.1",\n "source":"RefSeq",\n "bioType":"protein_coding",\n "geneId":"43740573",\n "hgnc":"ORF7a",\n "consequence":[\n "upstream_gene_variant"\n ],\n "proteinId":"YP_009724395.1"\n }\n ]\n }\n ]\n}\n')))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/15c5c522.168a75c3.js b/assets/js/15c5c522.168a75c3.js deleted file mode 100644 index 0531893a..00000000 --- a/assets/js/15c5c522.168a75c3.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[692],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>f});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var s=a.createContext({}),p=function(t){var e=a.useContext(s),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},m=function(t){var e=p(t.components);return a.createElement(s.Provider,{value:e},t.children)},c="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},u=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,s=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),c=p(n),u=r,f=c["".concat(s,".").concat(u)]||c[u]||d[u]||l;return n?a.createElement(f,o(o({ref:e},m),{},{components:n})):a.createElement(f,o({ref:e},m))}));function f(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=u;var i={};for(var s in e)hasOwnProperty.call(e,s)&&(i[s]=e[s]);i.originalType=t,i[c]="string"==typeof t?t:r,o[1]=i;for(var p=2;p{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>c,frontMatter:()=>l,metadata:()=>i,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/mitomap-small-variants-json",id:"version-3.14/data-sources/mitomap-small-variants-json",title:"mitomap-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/mitomap-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/mitomap-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/mitomap-small-variants-json.md",tags:[],version:"3.14",frontMatter:{}},s=[],p={toc:s},m="wrapper";function c(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},p,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "refAllele":"G",\n "altAllele":"A",\n "diseases":[ \n "Bipolar disorder",\n "Melanoma"\n ],\n "hasHomoplasmy":false,\n "hasHeteroplasmy":true,\n "status":"Reported",\n "clinicalSignificance":"confirmed pathogenic",\n "scorePercentile":83.30,\n "numGenBankFullLengthSeqs":2,\n "pubMedIds":["2316527","6299878","6301949"],\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"diseases"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"associated diseases")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHomoplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHeteroplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"status"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"record status")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"clinicalSignificance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"predicted pathogenicity")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MitoTIP score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numGenBankFullLengthSeqs"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"# of GenBank full-length sequences")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the MITOMAP alternate allele")))))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/16f10573.f5af563d.js b/assets/js/16f10573.f5af563d.js deleted file mode 100644 index 3c920a52..00000000 --- a/assets/js/16f10573.f5af563d.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1959,1063],{3905:(t,e,a)=>{a.d(e,{Zo:()=>d,kt:()=>g});var n=a(67294);function r(t,e,a){return e in t?Object.defineProperty(t,e,{value:a,enumerable:!0,configurable:!0,writable:!0}):t[e]=a,t}function l(t,e){var a=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),a.push.apply(a,n)}return a}function i(t){for(var e=1;e=0||(r[a]=t[a]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,a)&&(r[a]=t[a])}return r}var p=n.createContext({}),m=function(t){var e=n.useContext(p),a=e;return t&&(a="function"==typeof t?t(e):i(i({},e),t)),a},d=function(t){var e=m(t.components);return n.createElement(p.Provider,{value:e},t.children)},s="mdxType",c={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},N=n.forwardRef((function(t,e){var a=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,d=o(t,["components","mdxType","originalType","parentName"]),s=m(a),N=r,g=s["".concat(p,".").concat(N)]||s[N]||c[N]||l;return a?n.createElement(g,i(i({ref:e},d),{},{components:a})):n.createElement(g,i({ref:e},d))}));function g(t,e){var a=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=a.length,i=new Array(l);i[0]=N;var o={};for(var p in e)hasOwnProperty.call(e,p)&&(o[p]=e[p]);o.originalType=t,o[s]="string"==typeof t?t:r,i[1]=o;for(var m=2;m{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/fusioncatcher-json",id:"version-3.17/data-sources/fusioncatcher-json",title:"fusioncatcher-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/fusioncatcher-json.md",sourceDirName:"data-sources",slug:"/data-sources/fusioncatcher-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/fusioncatcher-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/fusioncatcher-json.md",tags:[],version:"3.17",frontMatter:{}},p=[{value:"genes",id:"genes",children:[],level:4},{value:"gene",id:"gene",children:[],level:4}],m={toc:p},d="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(d,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},' "fusionCatcher":[\n {\n "genes":{\n "first":{\n "hgnc":"ETV6",\n "isOncogene":true\n },\n "second":{\n "hgnc":"RUNX1"\n },\n "isParalogPair":true,\n "isPseudogenePair":true,\n "isReadthrough":true\n },\n "germlineSources":[\n "1000 Genomes Project"\n ],\n "somaticSources":[\n "COSMIC",\n "TCGA oesophageal carcinomas"\n ]\n }\n ]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genes"),(0,r.kt)("td",{parentName:"tr",align:"center"},"genes object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"5' gene & 3' gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"germlineSources"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"matches in known germline data sources")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"somaticSources"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"matches in known somatic data sources")))),(0,r.kt)("h4",{id:"genes"},"genes"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"first"),(0,r.kt)("td",{parentName:"tr",align:"center"},"gene object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"5' gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"second"),(0,r.kt)("td",{parentName:"tr",align:"center"},"gene object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"3' gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isParalogPair"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when both genes are paralogs for each other")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isPseudogenePair"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when both genes are pseudogenes for each other")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isReadthrough"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this fusion gene is a readthrough event (both are on the same strand and there are no genes between them)")))),(0,r.kt)("h4",{id:"gene"},"gene"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isOncogene"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this gene is an oncogene")))))}s.isMDXComponent=!0},61842:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>o,default:()=>c,frontMatter:()=>i,metadata:()=>p,toc:()=>m});var n=a(87462),r=(a(67294),a(3905)),l=a(56015);const i={title:"FusionCatcher"},o=void 0,p={unversionedId:"data-sources/fusioncatcher",id:"version-3.17/data-sources/fusioncatcher",title:"FusionCatcher",description:"Overview",source:"@site/versioned_docs/version-3.17/data-sources/fusioncatcher.mdx",sourceDirName:"data-sources",slug:"/data-sources/fusioncatcher",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/fusioncatcher",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/fusioncatcher.mdx",tags:[],version:"3.17",frontMatter:{title:"FusionCatcher"},sidebar:"version-3.17/docs",previous:{title:"dbSNP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/dbsnp"},next:{title:"gnomAD",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/gnomad"}},m=[{value:"Overview",id:"overview",children:[],level:2},{value:"Supported Data Sources",id:"supported-data-sources",children:[{value:"Oncogenes",id:"oncogenes",children:[],level:3},{value:"Germline",id:"germline",children:[],level:3},{value:"Somatic",id:"somatic",children:[],level:3}],level:2},{value:"Gene Pair TSV File",id:"gene-pair-tsv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Gene TSV File",id:"gene-tsv-file",children:[{value:"Example",id:"example-1",children:[],level:3},{value:"Parsing",id:"parsing-1",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],d={toc:m},s="wrapper";function c(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://github.com/ndaniel/fusioncatcher"},"FusionCatcher")," is a well-known tool that searches for somatic novel/known fusion genes, translocations, and/or chimeras in RNA-seq data. While FusionCatcher itself is not part of Nirvana, we have included a subset of their genomic databases in Nirvana."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Daniel Nicorici, Mihaela \u015eatalan, Henrik Edgren, Sara Kangaspeska, Astrid Murum\xe4gi, Olli Kallioniemi, Sami Virtanen, Olavi Kilkku. (2014) ",(0,r.kt)("a",{parentName:"p",href:"https://www.biorxiv.org/content/10.1101/011650v1"},"FusionCatcher \u2013 a tool for finding somatic fusion genes in paired-end RNA-sequencing data"),". ",(0,r.kt)("em",{parentName:"p"},"bioRxiv")," 011650"))),(0,r.kt)("h2",{id:"supported-data-sources"},"Supported Data Sources"),(0,r.kt)("h3",{id:"oncogenes"},"Oncogenes"),(0,r.kt)("p",null,"The following data sources are aggregated and used to populate the ",(0,r.kt)("inlineCode",{parentName:"p"},"isOncogene")," field in the gene JSON object:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Description"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Reference"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Data"),(0,r.kt)("th",{parentName:"tr",align:"left"},"FusionCatcher filename"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Bushman"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://www.bushmanlab.org/links/genelists"},"bushmanlab.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"cancer_genes.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ONGENE"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.sciencedirect.com/science/article/pii/S1673852716302053"},"JGG")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://ongene.bioinfo-minzhao.org"},"bioinfo-minzhao.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"oncogenes_more.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"UniProt tumor genes"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/49/D1/D480/6006196"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.uniprot.org/downloads"},"uniprot.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tumor_genes.txt")))),(0,r.kt)("h3",{id:"germline"},"Germline"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Nirvana label"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Reference"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Data"),(0,r.kt)("th",{parentName:"tr",align:"left"},"FusionCatcher filename"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"1000 Genomes Project"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0104567"},"PLOS ONE")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"1000genomes.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Healthy (strong support)"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"banned.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Illumina Body Map 2.0"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.ebi.ac.uk/arrayexpress/experiments/E-MTAB-513"},"EBI")),(0,r.kt)("td",{parentName:"tr",align:"left"},"bodymap2.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"CACG"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.sciencedirect.com/science/article/pii/S0888754312000821"},"Genomics")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"cacg.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ConjoinG"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0013284"},"PLOS ONE")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"conjoing.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Healthy prefrontal cortex"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://bmcmedgenomics.biomedcentral.com/articles/10.1186/s12920-016-0164-y"},"BMC Medical Genomics")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE68719"},"NCBI GEO")),(0,r.kt)("td",{parentName:"tr",align:"left"},"cortex.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Duplicated Genes Database"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0050653"},"PLOS ONE")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://dgd.genouest.org/"},"genouest.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"dgd.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"GTEx healthy tissues"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://gtexportal.org/home/"},"gtexportal.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"gtex.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Healthy"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"healthy.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Human Protein Atlas"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.mcponline.org/article/S1535-9476(20)34633-8/fulltext"},"MCP")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.ebi.ac.uk/arrayexpress/experiments/E-MTAB-1733/"},"EBI")),(0,r.kt)("td",{parentName:"tr",align:"left"},"hpa.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Babiceanu non-cancer tissues"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/44/6/2859/2499453"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/44/6/2859/2499453#supplementary-data"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-cancer_tissues.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"non-tumor cell lines"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-tumor_cells.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TumorFusions normal"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/46/D1/D1144/4584571"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/46/D1/D1144/4584571#supplementary-data"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga-normal.txt")))),(0,r.kt)("h3",{id:"somatic"},"Somatic"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Nirvana label"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Reference"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Data"),(0,r.kt)("th",{parentName:"tr",align:"left"},"FusionCatcher filename"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Alaei-Mahabadi 18 cancers"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.pnas.org/content/113/48/13768.long"},"PNAS")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"18cancers.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"DepMap CCLE"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://depmap.org/portal/download/"},"depmap.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"ccle.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"CCLE Klijn"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nbt.3080"},"Nature Biotechnology")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nbt.3080#Sec27"},"Nature Biotechnology")),(0,r.kt)("td",{parentName:"tr",align:"left"},"ccle2.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"CCLE Vellichirammal"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cell.com/molecular-therapy-family/nucleic-acids/fulltext/S2162-2531(20)30058-5"},"Molecular Therapy Nucleic Acids")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"ccle3.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Cancer Genome Project"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://cancer.sanger.ac.uk/cosmic/download"},"COSMIC")),(0,r.kt)("td",{parentName:"tr",align:"left"},"cgp.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ChimerKB 4.0"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/48/D1/D817/5611671"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.kobic.re.kr/chimerdb_mirror/download"},"kobic.re.kr")),(0,r.kt)("td",{parentName:"tr",align:"left"},"chimerdb4kb.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ChimerPub 4.0"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/48/D1/D817/5611671"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.kobic.re.kr/chimerdb_mirror/download"},"kobic.re.kr")),(0,r.kt)("td",{parentName:"tr",align:"left"},"chimerdb4pub.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ChimerSeq 4.0"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/48/D1/D817/5611671"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.kobic.re.kr/chimerdb_mirror/download"},"kobic.re.kr")),(0,r.kt)("td",{parentName:"tr",align:"left"},"chimerdb4seq.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"COSMIC"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/47/D1/D941/5146192"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://cancer.sanger.ac.uk/cosmic/download"},"COSMIC")),(0,r.kt)("td",{parentName:"tr",align:"left"},"cosmic.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Bao gliomas"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://genome.cshlp.org/content/24/11/1765"},"Genome Research")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"gliomas.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Known"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"known.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Mitelman DB"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://mitelmandatabase.isb-cgc.org"},"ISB-CGC")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://storage.cloud.google.com/mitelman-data-files/prod/mitelman_db.zip"},"Google Cloud")),(0,r.kt)("td",{parentName:"tr",align:"left"},"mitelman.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TCGA oesophageal carcinomas"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nature20805"},"Nature")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"oesophagus.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Bailey pancreatic cancers"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nature16965"},"Nature")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nature16965#Sec44"},"Nature")),(0,r.kt)("td",{parentName:"tr",align:"left"},"pancreases.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"PCAWG"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://doi.org/10.1016/j.cell.2018.03.042"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://dcc.icgc.org/releases/PCAWG/transcriptome/fusion"},"ICGC")),(0,r.kt)("td",{parentName:"tr",align:"left"},"pcawg.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Robinson prostate cancers"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://doi.org/10.1016/j.cell.2015.05.001"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cell.com/cell/fulltext/S0092-8674(15)00548-6?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS0092867415005486%3Fshowall%3Dtrue#supplementaryMaterial"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},"prostate_cancer.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TCGA"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cancer.gov/about-nci/organization/ccg/research/structural-genomics/tcga"},"cancer.gov")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TumorFusions tumor"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/46/D1/D1144/4584571"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/46/D1/D1144/4584571#supplementary-data"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga-cancer.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TCGA Gao"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://doi.org/10.1016/j.celrep.2018.03.050"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cell.com/cell-reports/fulltext/S2211-1247(18)30395-4?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS2211124718303954%3Fshowall%3Dtrue#supplementaryMaterial"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga2.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TCGA Vellichirammal"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cell.com/molecular-therapy-family/nucleic-acids/fulltext/S2162-2531(20)30058-5"},"Molecular Therapy Nucleic Acids")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga3.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TICdb"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://bmcgenomics.biomedcentral.com/articles/10.1186/1471-2164-8-33"},"BMC Genomics")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://genetica.unav.edu/TICdb/allseqs_TICdb.txt"},"unav.edu")),(0,r.kt)("td",{parentName:"tr",align:"left"},"ticdb.txt")))),(0,r.kt)("h2",{id:"gene-pair-tsv-file"},"Gene Pair TSV File"),(0,r.kt)("p",null,"Most of the data files in FusionCatcher are two-column TSV files containing the Ensembl gene IDs that are paired together."),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("p",null,"Here are the first few lines of the 1000genomes.txt file:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre"},"ENSG00000006210 ENSG00000102962\nENSG00000006652 ENSG00000181016\nENSG00000014138 ENSG00000149798\nENSG00000026297 ENSG00000071242\nENSG00000035499 ENSG00000155959\nENSG00000055211 ENSG00000131013\nENSG00000055332 ENSG00000179915\nENSG00000062485 ENSG00000257727\nENSG00000065978 ENSG00000166501\nENSG00000066044 ENSG00000104980\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"In Nirvana, we will only import a gene pair if both Ensembl gene IDs are recognized from either our GRCh37 or GRCh38 cache files."),(0,r.kt)("h2",{id:"gene-tsv-file"},"Gene TSV File"),(0,r.kt)("p",null,"Some of the data files are single-column files containing Ensembl gene IDs. This is commonly used in the data files representing oncogene data sources."),(0,r.kt)("h3",{id:"example-1"},"Example"),(0,r.kt)("p",null,"Here are the first few lines of the oncogenes_more.txt file:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre"},"ENSG00000000938\nENSG00000003402\nENSG00000005469\nENSG00000005884\nENSG00000006128\nENSG00000006453\nENSG00000006468\nENSG00000007350\nENSG00000008294\nENSG00000008952\n")),(0,r.kt)("h3",{id:"parsing-1"},"Parsing"),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"FusionCatcher also uses creates custom Ensembl genes (e.g. ",(0,r.kt)("inlineCode",{parentName:"p"},"ENSG09000000002"),") to handle missing Ensembl genes. Nirvana will ignore these entries since we only include the gene IDs that are currently recognized by Nirvana."),(0,r.kt)("p",{parentName:"div"},"I suspect that these were originally RefSeq genes and if so, we can support those directly in Nirvana in the future."))),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://sourceforge.net/projects/fusioncatcher/files/data"},"https://sourceforge.net/projects/fusioncatcher/files/data")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(l.default,{mdxType:"JSON"}))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/1708e687.9d113652.js b/assets/js/1708e687.9d113652.js deleted file mode 100644 index 690f6771..00000000 --- a/assets/js/1708e687.9d113652.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[140],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>m});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},p=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},d="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},g=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,p=o(e,["components","mdxType","originalType","parentName"]),d=c(n),g=i,m=d["".concat(s,".").concat(g)]||d[g]||u[g]||r;return n?a.createElement(m,l(l({ref:t},p),{},{components:n})):a.createElement(m,l({ref:t},p))}));function m(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,l=new Array(r);l[0]=g;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[d]="string"==typeof e?e:i,l[1]=o;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>d,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={},l=void 0,o={unversionedId:"data-sources/clingen-dosage-json",id:"version-3.21/data-sources/clingen-dosage-json",title:"clingen-dosage-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/clingen-dosage-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-dosage-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/clingen-dosage-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/clingen-dosage-json.md",tags:[],version:"3.21",frontMatter:{}},s=[],c={toc:s},p="wrapper";function d(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clingenDosageSensitivityMap": [{\n "chromosome": "15",\n "begin": 30900686,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 0.33994\n},\n{\n "chromosome": "15",\n "begin": 31727418,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "dosage sensitivity unlikely",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 1\n}]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Field"),(0,i.kt)("th",{parentName:"tr",align:null},"Type"),(0,i.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clingenDosageSensitivityMap"),(0,i.kt)("td",{parentName:"tr",align:null},"object array"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"begin"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"end"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"haploinsufficiency"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"triplosensitivity"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"(same as haploinsufficiency)\xa0")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"haploinsufficiency and triplosensitivity")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no evidence to suggest that dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"little evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"emerging evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"gene associated with autosomal recessive phenotype"),(0,i.kt)("li",{parentName:"ul"},"dosage sensitivity unlikely")))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/1755a1d1.67318233.js b/assets/js/1755a1d1.67318233.js deleted file mode 100644 index 5da0db77..00000000 --- a/assets/js/1755a1d1.67318233.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[319,3389],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>h});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),m=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},d=function(e){var t=m(e.components);return a.createElement(s.Provider,{value:t},e.children)},c="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,d=l(e,["components","mdxType","originalType","parentName"]),c=m(n),u=i,h=c["".concat(s,".").concat(u)]||c[u]||p[u]||r;return n?a.createElement(h,o(o({ref:t},d),{},{components:n})):a.createElement(h,o({ref:t},d))}));function h(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,o=new Array(r);o[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[c]="string"==typeof e?e:i,o[1]=l;for(var m=2;m{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>c,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={},o=void 0,l={unversionedId:"data-sources/cosmic-json",id:"version-3.16/data-sources/cosmic-json",title:"cosmic-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/cosmic-json.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/cosmic-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/cosmic-json.md",tags:[],version:"3.16",frontMatter:{}},s=[],m={toc:s},d="wrapper";function c(e){let{components:t,...n}=e;return(0,i.kt)(d,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},' "cosmicGeneFusions":[\n {\n "id":"COSF881",\n "numSamples":6,\n "geneSymbols":[\n "MYB",\n "NFIB"\n ],\n "hgvsr":"ENST00000341911.5(MYB):r.1_2368::ENST00000397581.2(NFIB):r.2592_3318",\n "histologies":[\n {\n "name":"adenoid cystic carcinoma",\n "numSamples":6\n }\n ],\n "sites":[\n {\n "name":"salivary gland (submandibular)",\n "numSamples":1\n },\n {\n "name":"salivary gland (parotid)",\n "numSamples":1\n },\n {\n "name":"salivary gland (nasal cavity)",\n "numSamples":1\n },\n {\n "name":"breast",\n "numSamples":3\n }\n ],\n "pubMedIds":[\n 19841262\n ]\n }\n ]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"id"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"COSMIC fusion ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"geneSymbols"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"5' gene & 3' gene")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hgvsr"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"HGVS RNA translocation fusion notation")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"histologies"),(0,i.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotypic descriptions")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"sites"),(0,i.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"tissue types")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Count")),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"name"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"description")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})))))}c.isMDXComponent=!0},20150:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>p,frontMatter:()=>o,metadata:()=>s,toc:()=>m});var a=n(87462),i=(n(67294),n(3905)),r=n(60299);const o={title:"COSMIC"},l=void 0,s={unversionedId:"data-sources/cosmic",id:"version-3.16/data-sources/cosmic",title:"COSMIC",description:"Overview",source:"@site/versioned_docs/version-3.16/data-sources/cosmic.mdx",sourceDirName:"data-sources",slug:"/data-sources/cosmic",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/cosmic",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/cosmic.mdx",tags:[],version:"3.16",frontMatter:{title:"COSMIC"},sidebar:"version-3.16/docs",previous:{title:"ClinVar",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/clinvar"},next:{title:"dbSNP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/dbsnp"}},m=[{value:"Overview",id:"overview",children:[],level:2},{value:"Gene Fusions",id:"gene-fusions",children:[{value:"TSV File",id:"tsv-file",children:[{value:"Example",id:"example",children:[],level:4},{value:"Parsing",id:"parsing",children:[],level:4},{value:"Aggregation",id:"aggregation",children:[],level:4},{value:"Fixing the HGVS RNA Notation",id:"fixing-the-hgvs-rna-notation",children:[],level:4},{value:"Aggregating Histologies",id:"aggregating-histologies",children:[],level:4},{value:"Aggregating Sites",id:"aggregating-sites",children:[],level:4}],level:3},{value:"Known Issues",id:"known-issues",children:[],level:3},{value:"Download URL",id:"download-url",children:[],level:3},{value:"JSON Output",id:"json-output",children:[],level:3}],level:2}],d={toc:m},c="wrapper";function p(e){let{components:t,...n}=e;return(0,i.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"COSMIC, the Catalogue of Somatic Mutations in Cancer, is the world's largest source of expert manually curated somatic mutation information relating to human cancers."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"John G Tate, Sally Bamford, Harry C Jubb, Zbyslaw Sondka, David M Beare, Nidhi Bindal, Harry Boutselakis, Charlotte G Cole, Celestino Creatore, Elisabeth Dawson, Peter Fish, Bhavana Harsha, Charlie Hathaway, Steve C Jupe, Chai Yin Kok, Kate Noble, Laura Ponting, Christopher C Ramshaw, Claire E Rye, Helen E Speedy, Ray Stefancsik, Sam L Thompson, Shicai Wang, Sari Ward, Peter J Campbell, Simon A Forbes. (2019) ",(0,i.kt)("a",{parentName:"p",href:"https://academic.oup.com/nar/article/47/D1/D941/5146192"},"COSMIC: the Catalogue Of Somatic Mutations In Cancer"),", ",(0,i.kt)("em",{parentName:"p"},"Nucleic Acids Research"),", Volume 47, Issue D1"))),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Licensed Content")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Commercial companies are required to ",(0,i.kt)("a",{parentName:"p",href:"https://cancer.sanger.ac.uk/cosmic/license"},"acquire a license from COSMIC"),". At the moment, this means that our COSMIC content is only available in Illumina's products and services, not in the open source distribution."),(0,i.kt)("p",{parentName:"div"},"Since many of you are academic users, we will enable a COSMIC login in our downloader later this year that will allow academic and commercial organizations (with a license) access our COSMIC data sources. "))),(0,i.kt)("h2",{id:"gene-fusions"},"Gene Fusions"),(0,i.kt)("p",null,"Gene fusions are manually curated from peer reviewed publications by expert COSMIC curators. A comprehensive literature curation is completed for each fusion pair when it is released in the database. Currently COSMIC includes information on fusions involved in solid tumours and leukaemias."),(0,i.kt)("h3",{id:"tsv-file"},"TSV File"),(0,i.kt)("h4",{id:"example"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"SAMPLE_ID SAMPLE_NAME PRIMARY_SITE SITE_SUBTYPE_1 SITE_SUBTYPE_2 SITE_SUBTYPE_3 PRIMARY_HISTOLOGY HISTOLOGY_SUBTYPE_1 HISTOLOGY_SUBTYPE_2 HISTOLOGY_SUBTYPE_3 FUSION_ID TRANSLOCATION_NAME 5'_CHROMOSOME 5'_STRAND 5'_GENE_ID 5'_GENE_NAME 5'_LAST_OBSERVED_EXON 5'_GENOME_START_FROM 5'_GENOME_START_TO 5'_GENOME_STOP_FROM 5'_GENOME_STOP_TO 3'_CHROMOSOME 3'_STRAND 3'_GENE_ID 3'_GENE_NAME 3'_FIRST_OBSERVED_EXON 3'_GENOME_START_FROM 3'_GENOME_START_TO 3'_GENOME_STOP_FROM 3'_GENOME_STOP_TO FUSION_TYPE PUBMED_PMID\n749711 HCC1187 breast NS NS NS carcinoma ductal_carcinoma NS NS 665 ENST00000360863.10(RGS22):r.1_3555_ENST00000369518.1(SYCP1):r.2100_3452 8 - 197199 RGS22 22 99981937 99981937 100106116 100106116 1 + 212470 SYCP1_ENST00000369518 24 114944339 114944339 114995367 114995367 Inferred Breakpoint 20033038\n")),(0,i.kt)("h4",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"From the TSV file, we're mainly interested in the following columns:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"SAMPLE_ID")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"PRIMARY_SITE")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"PRIMARY_HISTOLOGY")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"HISTOLOGY_SUBTYPE_1")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"FUSION_ID")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"TRANSLOCATION_NAME")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"PUBMED_PMID"))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"For all the histologies and sites, we replace all the underlines with spaces. ",(0,i.kt)("inlineCode",{parentName:"p"},"salivary_gland")," would become ",(0,i.kt)("inlineCode",{parentName:"p"},"salivary gland"),"."))),(0,i.kt)("h4",{id:"aggregation"},"Aggregation"),(0,i.kt)("p",null,"To create the gene fusion entries in Nirvana, we perform the following on each row in the TSV file:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"Group all entries by FUSION_ID"),(0,i.kt)("li",{parentName:"ul"},"Using all the entries related to this FUSION_ID:",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"Collect all the PubMed IDs"),(0,i.kt)("li",{parentName:"ul"},"Tally the number of observed sample IDs"),(0,i.kt)("li",{parentName:"ul"},"Grab the HGVS r. notation (should not change throughout the FUSION_ID)"),(0,i.kt)("li",{parentName:"ul"},"Tally the number of samples observed for each histology"),(0,i.kt)("li",{parentName:"ul"},"Tally the number of samples observed for each site"))),(0,i.kt)("li",{parentName:"ul"},"Extract the transcript IDs from the HGVS notation and lookup the associated gene symbols")),(0,i.kt)("h4",{id:"fixing-the-hgvs-rna-notation"},"Fixing the HGVS RNA Notation"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre"},"ENST00000360863.6(RGS22):r.1_3555_ENST00000369518.1(SYCP1):r.2100_3452\n")),(0,i.kt)("p",null,"There are some issues with the HGVS RNA notation:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"The two transcripts should be linked by a double colon ",(0,i.kt)("inlineCode",{parentName:"li"},"::"),"."),(0,i.kt)("li",{parentName:"ul"},"For coding transcripts, HGVS numbering should use CDS coordinates. Right now COSMIC is using cDNA coordinates for all their fusion"),(0,i.kt)("li",{parentName:"ul"},"If only the breakpoint is truly known, the recommendation is to use ",(0,i.kt)("inlineCode",{parentName:"li"},"?")," marks")),(0,i.kt)("p",null,"We chose to only update the linkage between each transcript using double colons ",(0,i.kt)("inlineCode",{parentName:"p"},"::"),". While we could have recalculated the HGVS notation using the supplied breakpoints, we chose not to because the resulting notation would be quite different from the original material. This would potentially lead to some confusion."),(0,i.kt)("h4",{id:"aggregating-histologies"},"Aggregating Histologies"),(0,i.kt)("p",null,"For histologies we want to capture the most specific description available. In the example above, we saw that the primary histology was ",(0,i.kt)("inlineCode",{parentName:"p"},"carcinoma"),", but the subtype was ",(0,i.kt)("inlineCode",{parentName:"p"},"ductal carcinoma"),". In this case we would use the subtype for the annotation."),(0,i.kt)("p",null,"COSMIC uses ",(0,i.kt)("inlineCode",{parentName:"p"},"NS")," to show that a value is empty. If the subtype is ",(0,i.kt)("inlineCode",{parentName:"p"},"NS"),", we will use the primary histology instead."),(0,i.kt)("h4",{id:"aggregating-sites"},"Aggregating Sites"),(0,i.kt)("p",null,"For sites, we observe that the subtype provides additional description but is still dependent on the primary site value. For example, the primary site might be ",(0,i.kt)("inlineCode",{parentName:"p"},"skin"),", but the subtype is ",(0,i.kt)("inlineCode",{parentName:"p"},"foot"),". Therefore, we will combine the values in the following manner: ",(0,i.kt)("inlineCode",{parentName:"p"},"skin (foot)"),"."),(0,i.kt)("h3",{id:"known-issues"},"Known Issues"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"There are some issues with the HGVS RNA notation:"),(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"The two transcripts should be linked by a double colon ",(0,i.kt)("inlineCode",{parentName:"li"},"::"),". We fixed this aspect in Nirvana."),(0,i.kt)("li",{parentName:"ul"},"For coding transcripts, HGVS numbering should use CDS coordinates. Right now COSMIC is using cDNA coordinates for all their fusions.")))),(0,i.kt)("h3",{id:"download-url"},"Download URL"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh37/cosmic/v94/CosmicFusionExport.tsv.gz"},"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh37/cosmic/v94/CosmicFusionExport.tsv.gz")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v94/CosmicFusionExport.tsv.gz"},"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v94/CosmicFusionExport.tsv.gz"))),(0,i.kt)("h3",{id:"json-output"},"JSON Output"),(0,i.kt)(r.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/17896441.2cae488b.js b/assets/js/17896441.2cae488b.js deleted file mode 100644 index d013497a..00000000 --- a/assets/js/17896441.2cae488b.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7918],{72319:(e,t,a)=>{a.r(t),a.d(t,{default:()=>W});var n=a(67294),l=a(86010),s=a(93783),i=a(39960),o=a(95999);const r=function(e){const{previous:t,next:a}=e;return n.createElement("nav",{className:"pagination-nav docusaurus-mt-lg","aria-label":(0,o.I)({id:"theme.docs.paginator.navAriaLabel",message:"Docs pages navigation",description:"The ARIA label for the docs pagination"})},n.createElement("div",{className:"pagination-nav__item"},t&&n.createElement(i.Z,{className:"pagination-nav__link",to:t.permalink},n.createElement("div",{className:"pagination-nav__sublabel"},n.createElement(o.Z,{id:"theme.docs.paginator.previous",description:"The label used to navigate to the previous doc"},"Previous")),n.createElement("div",{className:"pagination-nav__label"},"\xab ",t.title))),n.createElement("div",{className:"pagination-nav__item pagination-nav__item--next"},a&&n.createElement(i.Z,{className:"pagination-nav__link",to:a.permalink},n.createElement("div",{className:"pagination-nav__sublabel"},n.createElement(o.Z,{id:"theme.docs.paginator.next",description:"The label used to navigate to the next doc"},"Next")),n.createElement("div",{className:"pagination-nav__label"},a.title," \xbb"))))};var c=a(52263),d=a(80907),m=a(53810);const u={unreleased:function(e){let{siteTitle:t,versionMetadata:a}=e;return n.createElement(o.Z,{id:"theme.docs.versions.unreleasedVersionLabel",description:"The label used to tell the user that he's browsing an unreleased doc version",values:{siteTitle:t,versionLabel:n.createElement("b",null,a.label)}},"This is unreleased documentation for {siteTitle} {versionLabel} version.")},unmaintained:function(e){let{siteTitle:t,versionMetadata:a}=e;return n.createElement(o.Z,{id:"theme.docs.versions.unmaintainedVersionLabel",description:"The label used to tell the user that he's browsing an unmaintained doc version",values:{siteTitle:t,versionLabel:n.createElement("b",null,a.label)}},"This is documentation for {siteTitle} {versionLabel}, which is no longer actively maintained.")}};function p(e){const t=u[e.versionMetadata.banner];return n.createElement(t,e)}function v(e){let{versionLabel:t,to:a,onClick:l}=e;return n.createElement(o.Z,{id:"theme.docs.versions.latestVersionSuggestionLabel",description:"The label used to tell the user to check the latest version",values:{versionLabel:t,latestVersionLink:n.createElement("b",null,n.createElement(i.Z,{to:a,onClick:l},n.createElement(o.Z,{id:"theme.docs.versions.latestVersionLinkLabel",description:"The label used for the latest version suggestion link label"},"latest version")))}},"For up-to-date documentation, see the {latestVersionLink} ({versionLabel}).")}function g(e){let{className:t,versionMetadata:a}=e;const{siteConfig:{title:s}}=(0,c.Z)(),{pluginId:i}=(0,d.gA)({failfast:!0}),{savePreferredVersionName:o}=(0,m.J)(i),{latestDocSuggestion:r,latestVersionSuggestion:u}=(0,d.Jo)(i),g=r??(h=u).docs.find((e=>e.id===h.mainDocId));var h;return n.createElement("div",{className:(0,l.Z)(t,m.kM.docs.docVersionBanner,"alert alert--warning margin-bottom--md"),role:"alert"},n.createElement("div",null,n.createElement(p,{siteTitle:s,versionMetadata:a})),n.createElement("div",{className:"margin-top--md"},n.createElement(v,{versionLabel:u.label,to:g.path,onClick:()=>o(u.name)})))}function h(e){let{className:t}=e;const a=(0,m.E6)();return a.banner?n.createElement(g,{className:t,versionMetadata:a}):null}function b(e){let{className:t}=e;const a=(0,m.E6)();return a.badge?n.createElement("span",{className:(0,l.Z)(t,m.kM.docs.docVersionBadge,"badge badge--secondary")},"Version: ",a.label):null}var E=a(41217);function N(e){let{lastUpdatedAt:t,formattedLastUpdatedAt:a}=e;return n.createElement(o.Z,{id:"theme.lastUpdated.atDate",description:"The words used to describe on which date a page has been last updated",values:{date:n.createElement("b",null,n.createElement("time",{dateTime:new Date(1e3*t).toISOString()},a))}}," on {date}")}function f(e){let{lastUpdatedBy:t}=e;return n.createElement(o.Z,{id:"theme.lastUpdated.byUser",description:"The words used to describe by who the page has been last updated",values:{user:n.createElement("b",null,t)}}," by {user}")}function _(e){let{lastUpdatedAt:t,formattedLastUpdatedAt:a,lastUpdatedBy:l}=e;return n.createElement("span",{className:m.kM.common.lastUpdated},n.createElement(o.Z,{id:"theme.lastUpdated.lastUpdatedAtBy",description:"The sentence used to display when a page has been last updated, and by who",values:{atDate:t&&a?n.createElement(N,{lastUpdatedAt:t,formattedLastUpdatedAt:a}):"",byUser:l?n.createElement(f,{lastUpdatedBy:l}):""}},"Last updated{atDate}{byUser}"),!1)}var k=a(87462);const L="iconEdit_mS5F";const C=function(e){let{className:t,...a}=e;return n.createElement("svg",(0,k.Z)({fill:"currentColor",height:"20",width:"20",viewBox:"0 0 40 40",className:(0,l.Z)(L,t),"aria-hidden":"true"},a),n.createElement("g",null,n.createElement("path",{d:"m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"})))};function Z(e){let{editUrl:t}=e;return n.createElement("a",{href:t,target:"_blank",rel:"noreferrer noopener",className:m.kM.common.editThisPage},n.createElement(C,null),n.createElement(o.Z,{id:"theme.common.editThisPage",description:"The link label to edit the current page"},"Edit this page"))}const U="tag_WK-t",T="tagRegular_LXbV",y="tagWithCount_S5Zl";const w=function(e){const{permalink:t,name:a,count:s}=e;return n.createElement(i.Z,{href:t,className:(0,l.Z)(U,{[T]:!s,[y]:s})},a,s&&n.createElement("span",null,s))},M={tags:"tags_NBRY",tag:"tag_F03v"};function A(e){let{tags:t}=e;return n.createElement(n.Fragment,null,n.createElement("b",null,n.createElement(o.Z,{id:"theme.tags.tagsListLabel",description:"The label alongside a tag list"},"Tags:")),n.createElement("ul",{className:(0,l.Z)(M.tags,"padding--none","margin-left--sm")},t.map((e=>{let{label:t,permalink:a}=e;return n.createElement("li",{key:a,className:M.tag},n.createElement(w,{name:t,permalink:a}))}))))}const x={lastUpdated:"lastUpdated_mt2f"};function H(e){return n.createElement("div",{className:(0,l.Z)(m.kM.docs.docFooterTagsRow,"row margin-bottom--sm")},n.createElement("div",{className:"col"},n.createElement(A,e)))}function B(e){let{editUrl:t,lastUpdatedAt:a,lastUpdatedBy:s,formattedLastUpdatedAt:i}=e;return n.createElement("div",{className:(0,l.Z)(m.kM.docs.docFooterEditMetaRow,"row")},n.createElement("div",{className:"col"},t&&n.createElement(Z,{editUrl:t})),n.createElement("div",{className:(0,l.Z)("col",x.lastUpdated)},(a||s)&&n.createElement(_,{lastUpdatedAt:a,formattedLastUpdatedAt:i,lastUpdatedBy:s})))}function S(e){const{content:t}=e,{metadata:a}=t,{editUrl:s,lastUpdatedAt:i,formattedLastUpdatedAt:o,lastUpdatedBy:r,tags:c}=a,d=c.length>0,u=!!(s||i||r);return d||u?n.createElement("footer",{className:(0,l.Z)(m.kM.docs.docFooter,"docusaurus-mt-lg")},d&&n.createElement(H,{tags:c}),u&&n.createElement(B,{editUrl:s,lastUpdatedAt:i,lastUpdatedBy:r,formattedLastUpdatedAt:o})):null}function I(e){let{toc:t,className:a,linkClassName:l,isChild:s}=e;return t.length?n.createElement("ul",{className:s?void 0:a},t.map((e=>n.createElement("li",{key:e.id},n.createElement("a",{href:`#${e.id}`,className:l??void 0,dangerouslySetInnerHTML:{__html:e.value}}),n.createElement(I,{isChild:!0,toc:e.children,className:a,linkClassName:l}))))):null}function V(e){let{toc:t,className:a="table-of-contents table-of-contents__left-border",linkClassName:l="table-of-contents__link",linkActiveClassName:s,minHeadingLevel:i,maxHeadingLevel:o,...r}=e;const c=(0,m.LU)(),d=i??c.tableOfContents.minHeadingLevel,u=o??c.tableOfContents.maxHeadingLevel,p=(0,m.DA)({toc:t,minHeadingLevel:d,maxHeadingLevel:u}),v=(0,n.useMemo)((()=>{if(l&&s)return{linkClassName:l,linkActiveClassName:s,minHeadingLevel:d,maxHeadingLevel:u}}),[l,s,d,u]);return(0,m.Si)(v),n.createElement(I,(0,k.Z)({toc:p,className:a,linkClassName:l},r))}const F="tableOfContents_vrFS";const D=function(e){let{className:t,...a}=e;return n.createElement("div",{className:(0,l.Z)(F,"thin-scrollbar",t)},n.createElement(V,(0,k.Z)({},a,{linkClassName:"table-of-contents__link toc-highlight",linkActiveClassName:"table-of-contents__link--active"})))},O={tocCollapsible:"tocCollapsible_aw-L",tocCollapsibleButton:"tocCollapsibleButton_zr6a",tocCollapsibleContent:"tocCollapsibleContent_0dom",tocCollapsibleExpanded:"tocCollapsibleExpanded_FSiv"};function R(e){let{toc:t,className:a,minHeadingLevel:s,maxHeadingLevel:i}=e;const{collapsed:r,toggleCollapsed:c}=(0,m.uR)({initialState:!0});return n.createElement("div",{className:(0,l.Z)(O.tocCollapsible,{[O.tocCollapsibleExpanded]:!r},a)},n.createElement("button",{type:"button",className:(0,l.Z)("clean-btn",O.tocCollapsibleButton),onClick:c},n.createElement(o.Z,{id:"theme.TOCCollapsible.toggleButtonLabel",description:"The label used by the button on the collapsible TOC component"},"On this page")),n.createElement(m.zF,{lazy:!0,className:O.tocCollapsibleContent,collapsed:r},n.createElement(V,{toc:t,minHeadingLevel:s,maxHeadingLevel:i})))}var z=a(39649);const P={docItemContainer:"docItemContainer_oiyr",docItemCol:"docItemCol_zHA2",tocMobile:"tocMobile_Tx6Y"};function W(e){const{content:t}=e,{metadata:a,frontMatter:i}=t,{image:o,keywords:c,hide_title:d,hide_table_of_contents:u,toc_min_heading_level:p,toc_max_heading_level:v}=i,{description:g,title:N}=a,f=!d&&void 0===t.contentTitle,_=(0,s.Z)(),k=!u&&t.toc&&t.toc.length>0,L=k&&("desktop"===_||"ssr"===_);return n.createElement(n.Fragment,null,n.createElement(E.Z,{title:N,description:g,keywords:c,image:o}),n.createElement("div",{className:"row"},n.createElement("div",{className:(0,l.Z)("col",{[P.docItemCol]:!u})},n.createElement(h,null),n.createElement("div",{className:P.docItemContainer},n.createElement("article",null,n.createElement(b,null),k&&n.createElement(R,{toc:t.toc,minHeadingLevel:p,maxHeadingLevel:v,className:(0,l.Z)(m.kM.docs.docTocMobile,P.tocMobile)}),n.createElement("div",{className:(0,l.Z)(m.kM.docs.docMarkdown,"markdown")},f&&n.createElement(z.N,null,N),n.createElement(t,null)),n.createElement(S,e)),n.createElement(r,{previous:a.previous,next:a.next}))),L&&n.createElement("div",{className:"col col--3"},n.createElement(D,{toc:t.toc,minHeadingLevel:p,maxHeadingLevel:v,className:m.kM.docs.docTocDesktop}))))}},39649:(e,t,a)=>{a.d(t,{N:()=>d,Z:()=>m});var n=a(87462),l=a(67294),s=a(86010),i=a(95999),o=a(53810);const r="anchorWithStickyNavbar_y2LR",c="anchorWithHideOnScrollNavbar_3ly5",d=e=>{let{...t}=e;return l.createElement("header",null,l.createElement("h1",(0,n.Z)({},t,{id:void 0}),t.children))},m=e=>{return"h1"===e?d:(t=e,e=>{let{id:a,...d}=e;const{navbar:{hideOnScroll:m}}=(0,o.LU)();return a?l.createElement(t,(0,n.Z)({},d,{className:(0,s.Z)("anchor",{[c]:m,[r]:!m}),id:a}),d.children,l.createElement("a",{"aria-hidden":"true",className:"hash-link",href:`#${a}`,title:(0,i.I)({id:"theme.common.headingLinkTitle",message:"Direct link to heading",description:"Title for link to heading"})},"\u200b")):l.createElement(t,d)});var t}}}]); \ No newline at end of file diff --git a/assets/js/17896441.5399554a.js b/assets/js/17896441.5399554a.js new file mode 100644 index 00000000..78e0929b --- /dev/null +++ b/assets/js/17896441.5399554a.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7918],{2319:(e,t,a)=>{a.r(t),a.d(t,{default:()=>W});var n=a(7294),l=a(6010),s=a(3783),i=a(9960),o=a(5999);const r=function(e){const{previous:t,next:a}=e;return n.createElement("nav",{className:"pagination-nav docusaurus-mt-lg","aria-label":(0,o.I)({id:"theme.docs.paginator.navAriaLabel",message:"Docs pages navigation",description:"The ARIA label for the docs pagination"})},n.createElement("div",{className:"pagination-nav__item"},t&&n.createElement(i.Z,{className:"pagination-nav__link",to:t.permalink},n.createElement("div",{className:"pagination-nav__sublabel"},n.createElement(o.Z,{id:"theme.docs.paginator.previous",description:"The label used to navigate to the previous doc"},"Previous")),n.createElement("div",{className:"pagination-nav__label"},"\xab ",t.title))),n.createElement("div",{className:"pagination-nav__item pagination-nav__item--next"},a&&n.createElement(i.Z,{className:"pagination-nav__link",to:a.permalink},n.createElement("div",{className:"pagination-nav__sublabel"},n.createElement(o.Z,{id:"theme.docs.paginator.next",description:"The label used to navigate to the next doc"},"Next")),n.createElement("div",{className:"pagination-nav__label"},a.title," \xbb"))))};var c=a(2263),d=a(907),m=a(3810);const u={unreleased:function(e){let{siteTitle:t,versionMetadata:a}=e;return n.createElement(o.Z,{id:"theme.docs.versions.unreleasedVersionLabel",description:"The label used to tell the user that he's browsing an unreleased doc version",values:{siteTitle:t,versionLabel:n.createElement("b",null,a.label)}},"This is unreleased documentation for {siteTitle} {versionLabel} version.")},unmaintained:function(e){let{siteTitle:t,versionMetadata:a}=e;return n.createElement(o.Z,{id:"theme.docs.versions.unmaintainedVersionLabel",description:"The label used to tell the user that he's browsing an unmaintained doc version",values:{siteTitle:t,versionLabel:n.createElement("b",null,a.label)}},"This is documentation for {siteTitle} {versionLabel}, which is no longer actively maintained.")}};function p(e){const t=u[e.versionMetadata.banner];return n.createElement(t,e)}function v(e){let{versionLabel:t,to:a,onClick:l}=e;return n.createElement(o.Z,{id:"theme.docs.versions.latestVersionSuggestionLabel",description:"The label used to tell the user to check the latest version",values:{versionLabel:t,latestVersionLink:n.createElement("b",null,n.createElement(i.Z,{to:a,onClick:l},n.createElement(o.Z,{id:"theme.docs.versions.latestVersionLinkLabel",description:"The label used for the latest version suggestion link label"},"latest version")))}},"For up-to-date documentation, see the {latestVersionLink} ({versionLabel}).")}function g(e){let{className:t,versionMetadata:a}=e;const{siteConfig:{title:s}}=(0,c.Z)(),{pluginId:i}=(0,d.gA)({failfast:!0}),{savePreferredVersionName:o}=(0,m.J)(i),{latestDocSuggestion:r,latestVersionSuggestion:u}=(0,d.Jo)(i),g=r??(h=u).docs.find((e=>e.id===h.mainDocId));var h;return n.createElement("div",{className:(0,l.Z)(t,m.kM.docs.docVersionBanner,"alert alert--warning margin-bottom--md"),role:"alert"},n.createElement("div",null,n.createElement(p,{siteTitle:s,versionMetadata:a})),n.createElement("div",{className:"margin-top--md"},n.createElement(v,{versionLabel:u.label,to:g.path,onClick:()=>o(u.name)})))}function h(e){let{className:t}=e;const a=(0,m.E6)();return a.banner?n.createElement(g,{className:t,versionMetadata:a}):null}function b(e){let{className:t}=e;const a=(0,m.E6)();return a.badge?n.createElement("span",{className:(0,l.Z)(t,m.kM.docs.docVersionBadge,"badge badge--secondary")},"Version: ",a.label):null}var E=a(1217);function N(e){let{lastUpdatedAt:t,formattedLastUpdatedAt:a}=e;return n.createElement(o.Z,{id:"theme.lastUpdated.atDate",description:"The words used to describe on which date a page has been last updated",values:{date:n.createElement("b",null,n.createElement("time",{dateTime:new Date(1e3*t).toISOString()},a))}}," on {date}")}function f(e){let{lastUpdatedBy:t}=e;return n.createElement(o.Z,{id:"theme.lastUpdated.byUser",description:"The words used to describe by who the page has been last updated",values:{user:n.createElement("b",null,t)}}," by {user}")}function _(e){let{lastUpdatedAt:t,formattedLastUpdatedAt:a,lastUpdatedBy:l}=e;return n.createElement("span",{className:m.kM.common.lastUpdated},n.createElement(o.Z,{id:"theme.lastUpdated.lastUpdatedAtBy",description:"The sentence used to display when a page has been last updated, and by who",values:{atDate:t&&a?n.createElement(N,{lastUpdatedAt:t,formattedLastUpdatedAt:a}):"",byUser:l?n.createElement(f,{lastUpdatedBy:l}):""}},"Last updated{atDate}{byUser}"),!1)}var k=a(7462);const L="iconEdit_mS5F";const C=function(e){let{className:t,...a}=e;return n.createElement("svg",(0,k.Z)({fill:"currentColor",height:"20",width:"20",viewBox:"0 0 40 40",className:(0,l.Z)(L,t),"aria-hidden":"true"},a),n.createElement("g",null,n.createElement("path",{d:"m34.5 11.7l-3 3.1-6.3-6.3 3.1-3q0.5-0.5 1.2-0.5t1.1 0.5l3.9 3.9q0.5 0.4 0.5 1.1t-0.5 1.2z m-29.5 17.1l18.4-18.5 6.3 6.3-18.4 18.4h-6.3v-6.2z"})))};function Z(e){let{editUrl:t}=e;return n.createElement("a",{href:t,target:"_blank",rel:"noreferrer noopener",className:m.kM.common.editThisPage},n.createElement(C,null),n.createElement(o.Z,{id:"theme.common.editThisPage",description:"The link label to edit the current page"},"Edit this page"))}const U="tag_WK-t",T="tagRegular_LXbV",y="tagWithCount_S5Zl";const w=function(e){const{permalink:t,name:a,count:s}=e;return n.createElement(i.Z,{href:t,className:(0,l.Z)(U,{[T]:!s,[y]:s})},a,s&&n.createElement("span",null,s))},M={tags:"tags_NBRY",tag:"tag_F03v"};function A(e){let{tags:t}=e;return n.createElement(n.Fragment,null,n.createElement("b",null,n.createElement(o.Z,{id:"theme.tags.tagsListLabel",description:"The label alongside a tag list"},"Tags:")),n.createElement("ul",{className:(0,l.Z)(M.tags,"padding--none","margin-left--sm")},t.map((e=>{let{label:t,permalink:a}=e;return n.createElement("li",{key:a,className:M.tag},n.createElement(w,{name:t,permalink:a}))}))))}const x={lastUpdated:"lastUpdated_mt2f"};function H(e){return n.createElement("div",{className:(0,l.Z)(m.kM.docs.docFooterTagsRow,"row margin-bottom--sm")},n.createElement("div",{className:"col"},n.createElement(A,e)))}function B(e){let{editUrl:t,lastUpdatedAt:a,lastUpdatedBy:s,formattedLastUpdatedAt:i}=e;return n.createElement("div",{className:(0,l.Z)(m.kM.docs.docFooterEditMetaRow,"row")},n.createElement("div",{className:"col"},t&&n.createElement(Z,{editUrl:t})),n.createElement("div",{className:(0,l.Z)("col",x.lastUpdated)},(a||s)&&n.createElement(_,{lastUpdatedAt:a,formattedLastUpdatedAt:i,lastUpdatedBy:s})))}function S(e){const{content:t}=e,{metadata:a}=t,{editUrl:s,lastUpdatedAt:i,formattedLastUpdatedAt:o,lastUpdatedBy:r,tags:c}=a,d=c.length>0,u=!!(s||i||r);return d||u?n.createElement("footer",{className:(0,l.Z)(m.kM.docs.docFooter,"docusaurus-mt-lg")},d&&n.createElement(H,{tags:c}),u&&n.createElement(B,{editUrl:s,lastUpdatedAt:i,lastUpdatedBy:r,formattedLastUpdatedAt:o})):null}function I(e){let{toc:t,className:a,linkClassName:l,isChild:s}=e;return t.length?n.createElement("ul",{className:s?void 0:a},t.map((e=>n.createElement("li",{key:e.id},n.createElement("a",{href:`#${e.id}`,className:l??void 0,dangerouslySetInnerHTML:{__html:e.value}}),n.createElement(I,{isChild:!0,toc:e.children,className:a,linkClassName:l}))))):null}function V(e){let{toc:t,className:a="table-of-contents table-of-contents__left-border",linkClassName:l="table-of-contents__link",linkActiveClassName:s,minHeadingLevel:i,maxHeadingLevel:o,...r}=e;const c=(0,m.LU)(),d=i??c.tableOfContents.minHeadingLevel,u=o??c.tableOfContents.maxHeadingLevel,p=(0,m.DA)({toc:t,minHeadingLevel:d,maxHeadingLevel:u}),v=(0,n.useMemo)((()=>{if(l&&s)return{linkClassName:l,linkActiveClassName:s,minHeadingLevel:d,maxHeadingLevel:u}}),[l,s,d,u]);return(0,m.Si)(v),n.createElement(I,(0,k.Z)({toc:p,className:a,linkClassName:l},r))}const F="tableOfContents_vrFS";const D=function(e){let{className:t,...a}=e;return n.createElement("div",{className:(0,l.Z)(F,"thin-scrollbar",t)},n.createElement(V,(0,k.Z)({},a,{linkClassName:"table-of-contents__link toc-highlight",linkActiveClassName:"table-of-contents__link--active"})))},O={tocCollapsible:"tocCollapsible_aw-L",tocCollapsibleButton:"tocCollapsibleButton_zr6a",tocCollapsibleContent:"tocCollapsibleContent_0dom",tocCollapsibleExpanded:"tocCollapsibleExpanded_FSiv"};function R(e){let{toc:t,className:a,minHeadingLevel:s,maxHeadingLevel:i}=e;const{collapsed:r,toggleCollapsed:c}=(0,m.uR)({initialState:!0});return n.createElement("div",{className:(0,l.Z)(O.tocCollapsible,{[O.tocCollapsibleExpanded]:!r},a)},n.createElement("button",{type:"button",className:(0,l.Z)("clean-btn",O.tocCollapsibleButton),onClick:c},n.createElement(o.Z,{id:"theme.TOCCollapsible.toggleButtonLabel",description:"The label used by the button on the collapsible TOC component"},"On this page")),n.createElement(m.zF,{lazy:!0,className:O.tocCollapsibleContent,collapsed:r},n.createElement(V,{toc:t,minHeadingLevel:s,maxHeadingLevel:i})))}var z=a(9649);const P={docItemContainer:"docItemContainer_oiyr",docItemCol:"docItemCol_zHA2",tocMobile:"tocMobile_Tx6Y"};function W(e){const{content:t}=e,{metadata:a,frontMatter:i}=t,{image:o,keywords:c,hide_title:d,hide_table_of_contents:u,toc_min_heading_level:p,toc_max_heading_level:v}=i,{description:g,title:N}=a,f=!d&&void 0===t.contentTitle,_=(0,s.Z)(),k=!u&&t.toc&&t.toc.length>0,L=k&&("desktop"===_||"ssr"===_);return n.createElement(n.Fragment,null,n.createElement(E.Z,{title:N,description:g,keywords:c,image:o}),n.createElement("div",{className:"row"},n.createElement("div",{className:(0,l.Z)("col",{[P.docItemCol]:!u})},n.createElement(h,null),n.createElement("div",{className:P.docItemContainer},n.createElement("article",null,n.createElement(b,null),k&&n.createElement(R,{toc:t.toc,minHeadingLevel:p,maxHeadingLevel:v,className:(0,l.Z)(m.kM.docs.docTocMobile,P.tocMobile)}),n.createElement("div",{className:(0,l.Z)(m.kM.docs.docMarkdown,"markdown")},f&&n.createElement(z.N,null,N),n.createElement(t,null)),n.createElement(S,e)),n.createElement(r,{previous:a.previous,next:a.next}))),L&&n.createElement("div",{className:"col col--3"},n.createElement(D,{toc:t.toc,minHeadingLevel:p,maxHeadingLevel:v,className:m.kM.docs.docTocDesktop}))))}},9649:(e,t,a)=>{a.d(t,{N:()=>d,Z:()=>m});var n=a(7462),l=a(7294),s=a(6010),i=a(5999),o=a(3810);const r="anchorWithStickyNavbar_y2LR",c="anchorWithHideOnScrollNavbar_3ly5",d=e=>{let{...t}=e;return l.createElement("header",null,l.createElement("h1",(0,n.Z)({},t,{id:void 0}),t.children))},m=e=>{return"h1"===e?d:(t=e,e=>{let{id:a,...d}=e;const{navbar:{hideOnScroll:m}}=(0,o.LU)();return a?l.createElement(t,(0,n.Z)({},d,{className:(0,s.Z)("anchor",{[c]:m,[r]:!m}),id:a}),d.children,l.createElement("a",{"aria-hidden":"true",className:"hash-link",href:`#${a}`,title:(0,i.I)({id:"theme.common.headingLinkTitle",message:"Direct link to heading",description:"Title for link to heading"})},"\u200b")):l.createElement(t,d)});var t}}}]); \ No newline at end of file diff --git a/assets/js/178fb8fa.237a5fcb.js b/assets/js/178fb8fa.237a5fcb.js deleted file mode 100644 index b87a2881..00000000 --- a/assets/js/178fb8fa.237a5fcb.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7416],{3905:(a,e,t)=>{t.d(e,{Zo:()=>d,kt:()=>A});var n=t(67294);function i(a,e,t){return e in a?Object.defineProperty(a,e,{value:t,enumerable:!0,configurable:!0,writable:!0}):a[e]=t,a}function r(a,e){var t=Object.keys(a);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(a);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(a,e).enumerable}))),t.push.apply(t,n)}return t}function c(a){for(var e=1;e=0||(i[t]=a[t]);return i}(a,e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(a);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(a,t)&&(i[t]=a[t])}return i}var s=n.createContext({}),l=function(a){var e=n.useContext(s),t=e;return a&&(t="function"==typeof a?a(e):c(c({},e),a)),t},d=function(a){var e=l(a.components);return n.createElement(s.Provider,{value:e},a.children)},m="mdxType",u={inlineCode:"code",wrapper:function(a){var e=a.children;return n.createElement(n.Fragment,{},e)}},p=n.forwardRef((function(a,e){var t=a.components,i=a.mdxType,r=a.originalType,s=a.parentName,d=o(a,["components","mdxType","originalType","parentName"]),m=l(t),p=i,A=m["".concat(s,".").concat(p)]||m[p]||u[p]||r;return t?n.createElement(A,c(c({ref:e},d),{},{components:t})):n.createElement(A,c({ref:e},d))}));function A(a,e){var t=arguments,i=e&&e.mdxType;if("string"==typeof a||i){var r=t.length,c=new Array(r);c[0]=p;var o={};for(var s in e)hasOwnProperty.call(e,s)&&(o[s]=e[s]);o.originalType=a,o[m]="string"==typeof a?a:i,c[1]=o;for(var l=2;l{t.r(e),t.d(e,{contentTitle:()=>c,default:()=>m,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var n=t(87462),i=(t(67294),t(3905));const r={title:"Cancer Hotspots"},c=void 0,o={unversionedId:"data-sources/cancer-hotspots",id:"version-3.21/data-sources/cancer-hotspots",title:"Cancer Hotspots",description:"Overview",source:"@site/versioned_docs/version-3.21/data-sources/cancer-hotspots.mdx",sourceDirName:"data-sources",slug:"/data-sources/cancer-hotspots",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/cancer-hotspots",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/cancer-hotspots.mdx",tags:[],version:"3.21",frontMatter:{title:"Cancer Hotspots"},sidebar:"docs",previous:{title:"Amino Acid Conservation",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/amino-acid-conservation"},next:{title:"ClinGen",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/clingen"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Data extraction",id:"data-extraction",children:[{value:"Example",id:"example",children:[{value:"SNV",id:"snv",children:[],level:4},{value:"Indel",id:"indel",children:[],level:4}],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],l={toc:s},d="wrapper";function m(a){let{components:e,...t}=a;return(0,i.kt)(d,(0,n.Z)({},l,t,{components:e,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"Cancer Hotspots, a resource for statistically significant mutations in cancer. It provides information about statistically significantly recurrent mutations identified in large scale cancer genomics data."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Chang MT, Bhattarai TS, Schram AM, Bielski CM, Donoghue MTA, Jonsson P, Chakravarty D, Phillips S, Kandoth C, Penson A, Gorelick A, Shamu T, Patel S, Harris C, Gao J, Sumer SO, Kundra R, Razavi P, Li BT, Reales DN, Socci ND, Jayakumaran G, Zehir A, Benayed R, Arcila ME, Chandarlapaty S, Ladanyi M, Schultz N, Baselga J, Berger MF, Rosen N, Solit DB, Hyman DM, Taylor BS. Accelerating Discovery of Functional Mutant Alleles in Cancer. Cancer Discov. 2018 Feb;8(2):174-183. doi: 10.1158/2159-8290.CD-17-0321. Epub 2017 Dec 15. PMID: 29247016; PMCID: PMC5809279."),(0,i.kt)("p",{parentName:"div"},"Chang MT, Asthana S, Gao SP, Lee BH, Chapman JS, Kandoth C, Gao J, Socci ND, Solit DB, Olshen AB, Schultz N, Taylor BS. Identifying recurrent mutations in cancer reveals widespread lineage diversity and mutational specificity. Nat Biotechnol. 2016 Feb;34(2):155-63. doi: 10.1038/nbt.3391. Epub 2015 Nov 30. PMID: 26619011; PMCID: PMC4744099."))),(0,i.kt)("h2",{id:"data-extraction"},"Data extraction"),(0,i.kt)("p",null,"Nirvana currently parses SNV and indel tabs from hotspots_v2.xls file to extract the relevant content."),(0,i.kt)("h3",{id:"example"},"Example"),(0,i.kt)("h4",{id:"snv"},"SNV"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},'Hugo_Symbol Amino_Acid_Position log10_pvalue Mutation_Count Reference_Amino_Acid Total_Mutations_in_Gene Median_Allele_Freq_Rank Allele_Freq_Rank Variant_Amino_Acid Codon_Change Genomic_Position Detailed_Cancer_Types Organ_Types Tri-nucleotides Mutability mu_protein Total_Samples Analysis_Type qvalue tm qvalue_pancanIs_repeat seq length align100 pad12entropy pad24entropy pad36entropy TP reason n_MSK n_Retro judgement inNBT inOncokb ref qvaluect ct Samples\nNRAS 61 -1237.69143477067 422 Q:422 620 0.333333333333333 295|0.692307692307692:0.733333333333333:0.2:0.933333333333333:1:0.25:0.666666666666667:1:0.25:0.571428571428571:1:1:0.5:0.363636363636364:0.428571428571429:0.0833333333333333:1:1:1:1:0.5:1:0.125:0.363636363636364:0.173913043478261:0.25:1:0.8:0.153846153846154:0.857142857142857:0.5:0.5:0.5:1:0.272727272727273:0.214285714285714:1:0.5:1:1:0.2:0.333333333333333:0.6875:0.708333333333333:0.25:0.266666666666667:0.111111111111111:1:1:0.333333333333333:0.428571428571429:0.666666666666667:0.25:0.5:0.833333333333333:0.5:0.735294117647059:0.0476190476190476:0.1:0.133333333333333:0.230769230769231:0.25:1:0.5:0.294117647058824:0.217391304347826:0.46875:0.5:1:0.2:0.166666666666667:0.666666666666667:1:0.8:0.407407407407407:1:0.0212765957446809:0.285714285714286:0.0909090909090909:0.333333333333333:0.2:0.333333333333333:0.5:0.5:1:0.111111111111111:0.5:0.903846153846154:0.5:0.2:1:1:0.0909090909090909:0.4:0.428571428571429:0.0625:0.25:0.833333333333333:1:0.956521739130435:0.111111111111111:0.6:0.212765957446809:0.5:0.207547169811321:1:0.75:0.294117647058824:0.666666666666667:1:0.333333333333333:0.714285714285714:0.142857142857143:1:0.3:0.416666666666667:0.272727272727273:0.25:0.333333333333333:0.345454545454545:0.0952380952380952:0.166666666666667:0.111111111111111:0.454545454545455:0.0666666666666667:1:0.636363636363636:0.636363636363636:0.25:0.272727272727273:0.824324324324324:1:0.75:0.545454545454545:1:1:0.0769230769230769:0.363636363636364:0.290322580645161:0.333333333333333:0.179487179487179:1:0.0666666666666667:0.333333333333333:1:0.478260869565217:0.166666666666667:1:1:0.0276497695852535:0.0716845878136201:0.0263736263736264:0.933333333333333:1:0.5:1:1:0.8125:0.361788617886179:0.113761467889908:0.113761467889908:0.157894736842105:0.333333333333333:0.0555555555555556:0.0357142857142857:0.375:0.111111111111111:0.584415584415584:0.0350877192982456:0.751111111111111:0.761245674740484:0.164989939637827:0.196652719665272:0.135549872122762:0.172113289760349:0.0240963855421687:0.0620767494356659:0.142268041237113:0.147441457068517:0.147959183673469:0.038961038961039:0.686274509803922:0.0929054054054054:0.364787111622555:0.331306990881459:0.691449814126394:0.691449814126394:0.0769230769230769:0.347826086956522:0.117647058823529:0.148148148148148:0.05:0.290030211480363:0.680272108843537:0.188679245283019:0.0701754385964912:0.801526717557252:0.236842105263158:0.1953125:0.0539906103286385:0.015625:0.0390492359932088:0.00790513833992095:0.0597826086956522:0.136783733826248:0.362359550561798:0.0713719270420301:0.328621908127208:0.0657672849915683:0.320099255583127:0.075:0.433021806853583:0.524818401937046:0.524818401937046:0.259259259259259:0.483695652173913:0.0269360269360269:0.100486223662885:0.785507246376812:0.137870855148342:0.472340425531915:0.194331983805668:0.0830769230769231:0.418055555555556:0.546296296296296:0.247596153846154:0.52:0.39832285115304:0.601866251944012:0.234016887816647:0.214007782101167:0.153153153153153:0.137180700094607:0.0666666666666667:0.037037037037037:0.1:0.2:0.458333333333333:0.0588235294117647:0.111111111111111:0.333333333333333:0.181818181818182:0.473684210526316:0.5:0.2:0.136363636363636:0.0769230769230769:0.142857142857143:0.285714285714286:0.25:0.445714285714286:0.149377593360996:0.0227790432801822:0.182278481012658:0.540123456790123:0.021505376344086:0.541666666666667:0.00429184549356223:0.473684210526316:0.103508771929825:0.0930232558139535:0.391304347826087:0.072:0.0113636363636364:0.148837209302326:0.448051948051948:0.761038961038961:0.530373831775701:0.222857142857143:0.433862433862434:0.0810810810810811:0.0723327305605787:0.410714285714286:0.247910863509749:0.384615384615385:0.125:0.24:0.783582089552239:0.0646651270207852:0.445569620253165:0.754777070063694:0.165137614678899:0.10732538330494:0.0375:0.538461538461538:0.0981387478849408:0.029126213592233:0.0833333333333333:0.443514644351464:0.0917431192660551:0.03125:0.674418604651163:0.3125:0.375:0.314285714285714 R:204 cAa/cGa:203|Caa/Aaa:140|cAa/cTa:46|caA/caT:14|caA/caC:13|ggACaa/ggCAaa:2|cAa/cCa:2|Caa/Taa:1|CAa/AGa:1 1:115256529_252|1:115256530_143|1:115256528_27 skcm:787:186|thpa:486:43|mm:275:27|thpd:58:18|coadread:683:16|luad:2057:15|coad:712:13|mup:42:7|aml:198:6|blca:852:5|thap:33:5|read:149:5|rms:50:5|uec:339:5|nsgct:152:5|cll:283:4|ihch:104:4|lgsoc:17:3|sem:59:3|thhc:21:3|erms:8:3|lggnos:544:3|utuc:76:2|cup:135:2|thfo:5:2|sarcl:13:2|mfh:53:2|gbm:688:2|soc:468:2|stad:748:2|thym:125:2|es:229:1|npc:66:1|unk:146:1|panet:86:1|hnsc:643:1|armm:21:1|tmt:3:1|acrm:23:1|thyc:9:1|odg:36:1|paasc:8:1|hnmucm:11:1|blad:7:1|esca:556:1|mixed:3:1|chol:152:1|hcc:620:1|sarc:280:1|chrcc:88:1|aca:93:1 skin:974:187|thyroid:618:71|blood:890:37|bowel:1782:35|lung:2761:17|unk:357:11|softtissue:739:11|testis:217:9|bladder:958:8|cnsbrain:2270:6|ovaryfallopiantube:699:5|biliarytract:358:5|uterus:618:5|headandneck:988:3|thymus:162:3|esophagusstomach:1407:3|pancreas:1059:2|bone:297:1|liver:636:1|kidney:1304:1|adrenalgland:291:1 TTG|ACA|CTT|TCG|CCC|CCA 0.0120300464273379 0.0267810594223141 24592 "pancan,skin,thyroid,bowel,blood,lung,softtissue,testis,bladder,cnsbrain,biliarytract,ovaryfallopiantube,uterus,thymus,headandneck,esophagusstomach" 0 NRAS 61 0 FALSE NA 1 1.16795714944678 1.26187131041539 1.29838371117394 TRUE 165 257 RETAIN TRUE TRUE Q 0 skin skin:88|thyroid:54|blood:15|bowel:8|testis:5|biliarytract:4|bladder:4|lung:4|ovaryfallopiantube:4|softtissue:3|unk:3|uterus:3|cnsbrain:2|esophagusstomach:2|headandneck:2|bone:1|pancreas:1|thymus:1\nNRAS 61 -1237.69143477067 422 Q:422 620 0.333333333333333 295|0.692307692307692:0.733333333333333:0.2:0.933333333333333:1:0.25:0.666666666666667:1:0.25:0.571428571428571:1:1:0.5:0.363636363636364:0.428571428571429:0.0833333333333333:1:1:1:1:0.5:1:0.125:0.363636363636364:0.173913043478261:0.25:1:0.8:0.153846153846154:0.857142857142857:0.5:0.5:0.5:1:0.272727272727273:0.214285714285714:1:0.5:1:1:0.2:0.333333333333333:0.6875:0.708333333333333:0.25:0.266666666666667:0.111111111111111:1:1:0.333333333333333:0.428571428571429:0.666666666666667:0.25:0.5:0.833333333333333:0.5:0.735294117647059:0.0476190476190476:0.1:0.133333333333333:0.230769230769231:0.25:1:0.5:0.294117647058824:0.217391304347826:0.46875:0.5:1:0.2:0.166666666666667:0.666666666666667:1:0.8:0.407407407407407:1:0.0212765957446809:0.285714285714286:0.0909090909090909:0.333333333333333:0.2:0.333333333333333:0.5:0.5:1:0.111111111111111:0.5:0.903846153846154:0.5:0.2:1:1:0.0909090909090909:0.4:0.428571428571429:0.0625:0.25:0.833333333333333:1:0.956521739130435:0.111111111111111:0.6:0.212765957446809:0.5:0.207547169811321:1:0.75:0.294117647058824:0.666666666666667:1:0.333333333333333:0.714285714285714:0.142857142857143:1:0.3:0.416666666666667:0.272727272727273:0.25:0.333333333333333:0.345454545454545:0.0952380952380952:0.166666666666667:0.111111111111111:0.454545454545455:0.0666666666666667:1:0.636363636363636:0.636363636363636:0.25:0.272727272727273:0.824324324324324:1:0.75:0.545454545454545:1:1:0.0769230769230769:0.363636363636364:0.290322580645161:0.333333333333333:0.179487179487179:1:0.0666666666666667:0.333333333333333:1:0.478260869565217:0.166666666666667:1:1:0.0276497695852535:0.0716845878136201:0.0263736263736264:0.933333333333333:1:0.5:1:1:0.8125:0.361788617886179:0.113761467889908:0.113761467889908:0.157894736842105:0.333333333333333:0.0555555555555556:0.0357142857142857:0.375:0.111111111111111:0.584415584415584:0.0350877192982456:0.751111111111111:0.761245674740484:0.164989939637827:0.196652719665272:0.135549872122762:0.172113289760349:0.0240963855421687:0.0620767494356659:0.142268041237113:0.147441457068517:0.147959183673469:0.038961038961039:0.686274509803922:0.0929054054054054:0.364787111622555:0.331306990881459:0.691449814126394:0.691449814126394:0.0769230769230769:0.347826086956522:0.117647058823529:0.148148148148148:0.05:0.290030211480363:0.680272108843537:0.188679245283019:0.0701754385964912:0.801526717557252:0.236842105263158:0.1953125:0.0539906103286385:0.015625:0.0390492359932088:0.00790513833992095:0.0597826086956522:0.136783733826248:0.362359550561798:0.0713719270420301:0.328621908127208:0.0657672849915683:0.320099255583127:0.075:0.433021806853583:0.524818401937046:0.524818401937046:0.259259259259259:0.483695652173913:0.0269360269360269:0.100486223662885:0.785507246376812:0.137870855148342:0.472340425531915:0.194331983805668:0.0830769230769231:0.418055555555556:0.546296296296296:0.247596153846154:0.52:0.39832285115304:0.601866251944012:0.234016887816647:0.214007782101167:0.153153153153153:0.137180700094607:0.0666666666666667:0.037037037037037:0.1:0.2:0.458333333333333:0.0588235294117647:0.111111111111111:0.333333333333333:0.181818181818182:0.473684210526316:0.5:0.2:0.136363636363636:0.0769230769230769:0.142857142857143:0.285714285714286:0.25:0.445714285714286:0.149377593360996:0.0227790432801822:0.182278481012658:0.540123456790123:0.021505376344086:0.541666666666667:0.00429184549356223:0.473684210526316:0.103508771929825:0.0930232558139535:0.391304347826087:0.072:0.0113636363636364:0.148837209302326:0.448051948051948:0.761038961038961:0.530373831775701:0.222857142857143:0.433862433862434:0.0810810810810811:0.0723327305605787:0.410714285714286:0.247910863509749:0.384615384615385:0.125:0.24:0.783582089552239:0.0646651270207852:0.445569620253165:0.754777070063694:0.165137614678899:0.10732538330494:0.0375:0.538461538461538:0.0981387478849408:0.029126213592233:0.0833333333333333:0.443514644351464:0.0917431192660551:0.03125:0.674418604651163:0.3125:0.375:0.314285714285714 K:142 cAa/cGa:203|Caa/Aaa:140|cAa/cTa:46|caA/caT:14|caA/caC:13|ggACaa/ggCAaa:2|cAa/cCa:2|Caa/Taa:1|CAa/AGa:1 1:115256529_252|1:115256530_143|1:115256528_27 skcm:787:186|thpa:486:43|mm:275:27|thpd:58:18|coadread:683:16|luad:2057:15|coad:712:13|mup:42:7|aml:198:6|blca:852:5|thap:33:5|read:149:5|rms:50:5|uec:339:5|nsgct:152:5|cll:283:4|ihch:104:4|lgsoc:17:3|sem:59:3|thhc:21:3|erms:8:3|lggnos:544:3|utuc:76:2|cup:135:2|thfo:5:2|sarcl:13:2|mfh:53:2|gbm:688:2|soc:468:2|stad:748:2|thym:125:2|es:229:1|npc:66:1|unk:146:1|panet:86:1|hnsc:643:1|armm:21:1|tmt:3:1|acrm:23:1|thyc:9:1|odg:36:1|paasc:8:1|hnmucm:11:1|blad:7:1|esca:556:1|mixed:3:1|chol:152:1|hcc:620:1|sarc:280:1|chrcc:88:1|aca:93:1 skin:974:187|thyroid:618:71|blood:890:37|bowel:1782:35|lung:2761:17|unk:357:11|softtissue:739:11|testis:217:9|bladder:958:8|cnsbrain:2270:6|ovaryfallopiantube:699:5|biliarytract:358:5|uterus:618:5|headandneck:988:3|thymus:162:3|esophagusstomach:1407:3|pancreas:1059:2|bone:297:1|liver:636:1|kidney:1304:1|adrenalgland:291:1 TTG|ACA|CTT|TCG|CCC|CCA 0.0120300464273379 0.0267810594223141 24592 "pancan,skin,thyroid,bowel,blood,lung,softtissue,testis,bladder,cnsbrain,biliarytract,ovaryfallopiantube,uterus,thymus,headandneck,esophagusstomach" 0 NRAS 61 0 FALSE NA 1 1.16795714944678 1.26187131041539 1.29838371117394 TRUE 165 257 RETAIN TRUE TRUE Q 0 skin skin:62|bowel:18|thyroid:17|blood:12|softtissue:6|lung:5|unk:5|bladder:3|cnsbrain:2|thymus:2|adrenalgland:1|biliarytract:1|esophagusstomach:1|headandneck:1|kidney:1|liver:1|ovaryfallopiantube:1|pancreas:1|testis:1|uterus:1\nNRAS 61 -1237.69143477067 422 Q:422 620 0.333333333333333 295|0.692307692307692:0.733333333333333:0.2:0.933333333333333:1:0.25:0.666666666666667:1:0.25:0.571428571428571:1:1:0.5:0.363636363636364:0.428571428571429:0.0833333333333333:1:1:1:1:0.5:1:0.125:0.363636363636364:0.173913043478261:0.25:1:0.8:0.153846153846154:0.857142857142857:0.5:0.5:0.5:1:0.272727272727273:0.214285714285714:1:0.5:1:1:0.2:0.333333333333333:0.6875:0.708333333333333:0.25:0.266666666666667:0.111111111111111:1:1:0.333333333333333:0.428571428571429:0.666666666666667:0.25:0.5:0.833333333333333:0.5:0.735294117647059:0.0476190476190476:0.1:0.133333333333333:0.230769230769231:0.25:1:0.5:0.294117647058824:0.217391304347826:0.46875:0.5:1:0.2:0.166666666666667:0.666666666666667:1:0.8:0.407407407407407:1:0.0212765957446809:0.285714285714286:0.0909090909090909:0.333333333333333:0.2:0.333333333333333:0.5:0.5:1:0.111111111111111:0.5:0.903846153846154:0.5:0.2:1:1:0.0909090909090909:0.4:0.428571428571429:0.0625:0.25:0.833333333333333:1:0.956521739130435:0.111111111111111:0.6:0.212765957446809:0.5:0.207547169811321:1:0.75:0.294117647058824:0.666666666666667:1:0.333333333333333:0.714285714285714:0.142857142857143:1:0.3:0.416666666666667:0.272727272727273:0.25:0.333333333333333:0.345454545454545:0.0952380952380952:0.166666666666667:0.111111111111111:0.454545454545455:0.0666666666666667:1:0.636363636363636:0.636363636363636:0.25:0.272727272727273:0.824324324324324:1:0.75:0.545454545454545:1:1:0.0769230769230769:0.363636363636364:0.290322580645161:0.333333333333333:0.179487179487179:1:0.0666666666666667:0.333333333333333:1:0.478260869565217:0.166666666666667:1:1:0.0276497695852535:0.0716845878136201:0.0263736263736264:0.933333333333333:1:0.5:1:1:0.8125:0.361788617886179:0.113761467889908:0.113761467889908:0.157894736842105:0.333333333333333:0.0555555555555556:0.0357142857142857:0.375:0.111111111111111:0.584415584415584:0.0350877192982456:0.751111111111111:0.761245674740484:0.164989939637827:0.196652719665272:0.135549872122762:0.172113289760349:0.0240963855421687:0.0620767494356659:0.142268041237113:0.147441457068517:0.147959183673469:0.038961038961039:0.686274509803922:0.0929054054054054:0.364787111622555:0.331306990881459:0.691449814126394:0.691449814126394:0.0769230769230769:0.347826086956522:0.117647058823529:0.148148148148148:0.05:0.290030211480363:0.680272108843537:0.188679245283019:0.0701754385964912:0.801526717557252:0.236842105263158:0.1953125:0.0539906103286385:0.015625:0.0390492359932088:0.00790513833992095:0.0597826086956522:0.136783733826248:0.362359550561798:0.0713719270420301:0.328621908127208:0.0657672849915683:0.320099255583127:0.075:0.433021806853583:0.524818401937046:0.524818401937046:0.259259259259259:0.483695652173913:0.0269360269360269:0.100486223662885:0.785507246376812:0.137870855148342:0.472340425531915:0.194331983805668:0.0830769230769231:0.418055555555556:0.546296296296296:0.247596153846154:0.52:0.39832285115304:0.601866251944012:0.234016887816647:0.214007782101167:0.153153153153153:0.137180700094607:0.0666666666666667:0.037037037037037:0.1:0.2:0.458333333333333:0.0588235294117647:0.111111111111111:0.333333333333333:0.181818181818182:0.473684210526316:0.5:0.2:0.136363636363636:0.0769230769230769:0.142857142857143:0.285714285714286:0.25:0.445714285714286:0.149377593360996:0.0227790432801822:0.182278481012658:0.540123456790123:0.021505376344086:0.541666666666667:0.00429184549356223:0.473684210526316:0.103508771929825:0.0930232558139535:0.391304347826087:0.072:0.0113636363636364:0.148837209302326:0.448051948051948:0.761038961038961:0.530373831775701:0.222857142857143:0.433862433862434:0.0810810810810811:0.0723327305605787:0.410714285714286:0.247910863509749:0.384615384615385:0.125:0.24:0.783582089552239:0.0646651270207852:0.445569620253165:0.754777070063694:0.165137614678899:0.10732538330494:0.0375:0.538461538461538:0.0981387478849408:0.029126213592233:0.0833333333333333:0.443514644351464:0.0917431192660551:0.03125:0.674418604651163:0.3125:0.375:0.314285714285714 L:46 cAa/cGa:203|Caa/Aaa:140|cAa/cTa:46|caA/caT:14|caA/caC:13|ggACaa/ggCAaa:2|cAa/cCa:2|Caa/Taa:1|CAa/AGa:1 1:115256529_252|1:115256530_143|1:115256528_27 skcm:787:186|thpa:486:43|mm:275:27|thpd:58:18|coadread:683:16|luad:2057:15|coad:712:13|mup:42:7|aml:198:6|blca:852:5|thap:33:5|read:149:5|rms:50:5|uec:339:5|nsgct:152:5|cll:283:4|ihch:104:4|lgsoc:17:3|sem:59:3|thhc:21:3|erms:8:3|lggnos:544:3|utuc:76:2|cup:135:2|thfo:5:2|sarcl:13:2|mfh:53:2|gbm:688:2|soc:468:2|stad:748:2|thym:125:2|es:229:1|npc:66:1|unk:146:1|panet:86:1|hnsc:643:1|armm:21:1|tmt:3:1|acrm:23:1|thyc:9:1|odg:36:1|paasc:8:1|hnmucm:11:1|blad:7:1|esca:556:1|mixed:3:1|chol:152:1|hcc:620:1|sarc:280:1|chrcc:88:1|aca:93:1 skin:974:187|thyroid:618:71|blood:890:37|bowel:1782:35|lung:2761:17|unk:357:11|softtissue:739:11|testis:217:9|bladder:958:8|cnsbrain:2270:6|ovaryfallopiantube:699:5|biliarytract:358:5|uterus:618:5|headandneck:988:3|thymus:162:3|esophagusstomach:1407:3|pancreas:1059:2|bone:297:1|liver:636:1|kidney:1304:1|adrenalgland:291:1 TTG|ACA|CTT|TCG|CCC|CCA 0.0120300464273379 0.0267810594223141 24592 "pancan,skin,thyroid,bowel,blood,lung,softtissue,testis,bladder,cnsbrain,biliarytract,ovaryfallopiantube,uterus,thymus,headandneck,esophagusstomach" 0 NRAS 61 0 FALSE NA 1 1.16795714944678 1.26187131041539 1.29838371117394 TRUE 165 257 RETAIN TRUE TRUE Q 0 skin skin:24|bowel:7|lung:6|blood:2|cnsbrain:2|unk:2|bladder:1|softtissue:1|uterus:1\nNRAS 61 -1237.69143477067 422 Q:422 620 0.333333333333333 295|0.692307692307692:0.733333333333333:0.2:0.933333333333333:1:0.25:0.666666666666667:1:0.25:0.571428571428571:1:1:0.5:0.363636363636364:0.428571428571429:0.0833333333333333:1:1:1:1:0.5:1:0.125:0.363636363636364:0.173913043478261:0.25:1:0.8:0.153846153846154:0.857142857142857:0.5:0.5:0.5:1:0.272727272727273:0.214285714285714:1:0.5:1:1:0.2:0.333333333333333:0.6875:0.708333333333333:0.25:0.266666666666667:0.111111111111111:1:1:0.333333333333333:0.428571428571429:0.666666666666667:0.25:0.5:0.833333333333333:0.5:0.735294117647059:0.0476190476190476:0.1:0.133333333333333:0.230769230769231:0.25:1:0.5:0.294117647058824:0.217391304347826:0.46875:0.5:1:0.2:0.166666666666667:0.666666666666667:1:0.8:0.407407407407407:1:0.0212765957446809:0.285714285714286:0.0909090909090909:0.333333333333333:0.2:0.333333333333333:0.5:0.5:1:0.111111111111111:0.5:0.903846153846154:0.5:0.2:1:1:0.0909090909090909:0.4:0.428571428571429:0.0625:0.25:0.833333333333333:1:0.956521739130435:0.111111111111111:0.6:0.212765957446809:0.5:0.207547169811321:1:0.75:0.294117647058824:0.666666666666667:1:0.333333333333333:0.714285714285714:0.142857142857143:1:0.3:0.416666666666667:0.272727272727273:0.25:0.333333333333333:0.345454545454545:0.0952380952380952:0.166666666666667:0.111111111111111:0.454545454545455:0.0666666666666667:1:0.636363636363636:0.636363636363636:0.25:0.272727272727273:0.824324324324324:1:0.75:0.545454545454545:1:1:0.0769230769230769:0.363636363636364:0.290322580645161:0.333333333333333:0.179487179487179:1:0.0666666666666667:0.333333333333333:1:0.478260869565217:0.166666666666667:1:1:0.0276497695852535:0.0716845878136201:0.0263736263736264:0.933333333333333:1:0.5:1:1:0.8125:0.361788617886179:0.113761467889908:0.113761467889908:0.157894736842105:0.333333333333333:0.0555555555555556:0.0357142857142857:0.375:0.111111111111111:0.584415584415584:0.0350877192982456:0.751111111111111:0.761245674740484:0.164989939637827:0.196652719665272:0.135549872122762:0.172113289760349:0.0240963855421687:0.0620767494356659:0.142268041237113:0.147441457068517:0.147959183673469:0.038961038961039:0.686274509803922:0.0929054054054054:0.364787111622555:0.331306990881459:0.691449814126394:0.691449814126394:0.0769230769230769:0.347826086956522:0.117647058823529:0.148148148148148:0.05:0.290030211480363:0.680272108843537:0.188679245283019:0.0701754385964912:0.801526717557252:0.236842105263158:0.1953125:0.0539906103286385:0.015625:0.0390492359932088:0.00790513833992095:0.0597826086956522:0.136783733826248:0.362359550561798:0.0713719270420301:0.328621908127208:0.0657672849915683:0.320099255583127:0.075:0.433021806853583:0.524818401937046:0.524818401937046:0.259259259259259:0.483695652173913:0.0269360269360269:0.100486223662885:0.785507246376812:0.137870855148342:0.472340425531915:0.194331983805668:0.0830769230769231:0.418055555555556:0.546296296296296:0.247596153846154:0.52:0.39832285115304:0.601866251944012:0.234016887816647:0.214007782101167:0.153153153153153:0.137180700094607:0.0666666666666667:0.037037037037037:0.1:0.2:0.458333333333333:0.0588235294117647:0.111111111111111:0.333333333333333:0.181818181818182:0.473684210526316:0.5:0.2:0.136363636363636:0.0769230769230769:0.142857142857143:0.285714285714286:0.25:0.445714285714286:0.149377593360996:0.0227790432801822:0.182278481012658:0.540123456790123:0.021505376344086:0.541666666666667:0.00429184549356223:0.473684210526316:0.103508771929825:0.0930232558139535:0.391304347826087:0.072:0.0113636363636364:0.148837209302326:0.448051948051948:0.761038961038961:0.530373831775701:0.222857142857143:0.433862433862434:0.0810810810810811:0.0723327305605787:0.410714285714286:0.247910863509749:0.384615384615385:0.125:0.24:0.783582089552239:0.0646651270207852:0.445569620253165:0.754777070063694:0.165137614678899:0.10732538330494:0.0375:0.538461538461538:0.0981387478849408:0.029126213592233:0.0833333333333333:0.443514644351464:0.0917431192660551:0.03125:0.674418604651163:0.3125:0.375:0.314285714285714 H:27 cAa/cGa:203|Caa/Aaa:140|cAa/cTa:46|caA/caT:14|caA/caC:13|ggACaa/ggCAaa:2|cAa/cCa:2|Caa/Taa:1|CAa/AGa:1 1:115256529_252|1:115256530_143|1:115256528_27 skcm:787:186|thpa:486:43|mm:275:27|thpd:58:18|coadread:683:16|luad:2057:15|coad:712:13|mup:42:7|aml:198:6|blca:852:5|thap:33:5|read:149:5|rms:50:5|uec:339:5|nsgct:152:5|cll:283:4|ihch:104:4|lgsoc:17:3|sem:59:3|thhc:21:3|erms:8:3|lggnos:544:3|utuc:76:2|cup:135:2|thfo:5:2|sarcl:13:2|mfh:53:2|gbm:688:2|soc:468:2|stad:748:2|thym:125:2|es:229:1|npc:66:1|unk:146:1|panet:86:1|hnsc:643:1|armm:21:1|tmt:3:1|acrm:23:1|thyc:9:1|odg:36:1|paasc:8:1|hnmucm:11:1|blad:7:1|esca:556:1|mixed:3:1|chol:152:1|hcc:620:1|sarc:280:1|chrcc:88:1|aca:93:1 skin:974:187|thyroid:618:71|blood:890:37|bowel:1782:35|lung:2761:17|unk:357:11|softtissue:739:11|testis:217:9|bladder:958:8|cnsbrain:2270:6|ovaryfallopiantube:699:5|biliarytract:358:5|uterus:618:5|headandneck:988:3|thymus:162:3|esophagusstomach:1407:3|pancreas:1059:2|bone:297:1|liver:636:1|kidney:1304:1|adrenalgland:291:1 TTG|ACA|CTT|TCG|CCC|CCA 0.0120300464273379 0.0267810594223141 24592 "pancan,skin,thyroid,bowel,blood,lung,softtissue,testis,bladder,cnsbrain,biliarytract,ovaryfallopiantube,uterus,thymus,headandneck,esophagusstomach" 0 NRAS 61 0 FALSE NA 1 1.16795714944678 1.26187131041539 1.29838371117394 TRUE 165 257 RETAIN TRUE TRUE Q 0 skin skin:12|blood:7|bowel:2|lung:2|testis:2|softtissue:1|unk:1\n')),(0,i.kt)("h4",{id:"indel"},"Indel"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"Hugo_Symbol Amino_Acid_Position log10_pvalue Mutation_Count Reference_Amino_Acid Total_Mutations_in_Gene Median_Allele_Freq_Rank Allele_Freq_Rank SNP_ID Variant_Amino_Acid Codon_Change Genomic_Position Detailed_Cancer_Types Organ_Types Tri-nucleotides Mutability mu_protein ccf Total_Samples indel_size qvalue tm Is_repeat seq length align100 pad12entropy pad24entropy pad36entropy TP reason n_MSK n_Retro judgement inNBT inOncokb Samples\nSMARCA4 546 -7.75235638169585 5 QK:5 101 NA NA :NA K546del:5 cAGAag/cag:5 19:11106926_5 lgg:536:4|dlbcl:246:1 cnsbrain:2283:4|lymph:366:1 NA 0.0573226243518208 0.0473351872460284 NA 24592 1 0.000230672905611517 SMARCA4 546 FALSE NA NA 1 0.91489630957268 1.2950060272429 1.33965330506364 FALSE LOCAL_ENTROPY 1 4 RETAIN FALSE FALSE cnsbrain:4|lymph:1\nCDKN2A 27-42 -6.82111516846557 12 VRALLEA:4|LEAGALP:3|ALPN:1|EV:1|GA:1|PNAPN:1|RALLEA:1 219 NA NA :NA V28_E33del:4 gTGCGGGCGCTGCTGGAGGcg/gcg:4|cTGGAGGCGGGGGCGCTGCcc/ccc:3|GGGGCG/-:1|gCGCTGCCCAac/gac:1|gAGGtg/gtg:1|CGGGCGCTGCTGGAGGCG/-:1|ccCAACGCACCGAAt/cct:1 9:21974727_4|9:21974715_3|9:21974745_1|9:21974725_1|9:21974719_1|9:21974712_1|9:21974702_1 luad:2071:3|esca:556:2|blca:852:1|skcm:192:1|icemu:1:1|paad:932:1|mel:595:1|stad:748:1|hnsc:650:1 esophagusstomach:1413:3|lung:2767:3|skin:974:2|bladder:955:1|cervix:234:1|pancreas:1059:1|headandneck:988:1 NA 0.0573226243518208 0.0473351872460284 NA 24592 15 8.77193090544841e-05 CDKN2A 27-42 FALSE NA NA 1 0.857780912379927 1.13008762297022 1.1577633500238 FALSE LOCAL_ENTROPY 6 6 RETAIN FALSE FALSE cervix:1|esophagusstomach:1|lung:1|pancreas:1\nCDKN2A 27-42 -6.82111516846557 12 VRALLEA:4|LEAGALP:3|ALPN:1|EV:1|GA:1|PNAPN:1|RALLEA:1 219 NA NA :NA L32_L37del:3 gTGCGGGCGCTGCTGGAGGcg/gcg:4|cTGGAGGCGGGGGCGCTGCcc/ccc:3|GGGGCG/-:1|gCGCTGCCCAac/gac:1|gAGGtg/gtg:1|CGGGCGCTGCTGGAGGCG/-:1|ccCAACGCACCGAAt/cct:1 9:21974727_4|9:21974715_3|9:21974745_1|9:21974725_1|9:21974719_1|9:21974712_1|9:21974702_1 luad:2071:3|esca:556:2|blca:852:1|skcm:192:1|icemu:1:1|paad:932:1|mel:595:1|stad:748:1|hnsc:650:1 esophagusstomach:1413:3|lung:2767:3|skin:974:2|bladder:955:1|cervix:234:1|pancreas:1059:1|headandneck:988:1 NA 0.0573226243518208 0.0473351872460284 NA 24592 15 8.77193090544841e-05 CDKN2A 27-42 FALSE NA NA 1 0.857780912379927 1.13008762297022 1.1577633500238 FALSE LOCAL_ENTROPY 6 6 RETAIN FALSE FALSE skin:2|esophagusstomach:1\nCDKN2A 27-42 -6.82111516846557 12 VRALLEA:4|LEAGALP:3|ALPN:1|EV:1|GA:1|PNAPN:1|RALLEA:1 219 NA NA :NA A36_N39delinsD:1 gTGCGGGCGCTGCTGGAGGcg/gcg:4|cTGGAGGCGGGGGCGCTGCcc/ccc:3|GGGGCG/-:1|gCGCTGCCCAac/gac:1|gAGGtg/gtg:1|CGGGCGCTGCTGGAGGCG/-:1|ccCAACGCACCGAAt/cct:1 9:21974727_4|9:21974715_3|9:21974745_1|9:21974725_1|9:21974719_1|9:21974712_1|9:21974702_1 luad:2071:3|esca:556:2|blca:852:1|skcm:192:1|icemu:1:1|paad:932:1|mel:595:1|stad:748:1|hnsc:650:1 esophagusstomach:1413:3|lung:2767:3|skin:974:2|bladder:955:1|cervix:234:1|pancreas:1059:1|headandneck:988:1 NA 0.0573226243518208 0.0473351872460284 NA 24592 15 8.77193090544841e-05 CDKN2A 27-42 FALSE NA NA 0.857780912379927 1.13008762297022 1.1577633500238 FALSE LOCAL_ENTROPY 6 6 RETAIN FALSE FALSE lung:1\n")),(0,i.kt)("h3",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"From the file, we're mainly interested in the following columns:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Hugo_Symbol")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Amino_Acid_Position")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Mutation_Count")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Reference_Amino_Acid")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Variant_Amino_Acid")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"qvalue"))),(0,i.kt)("p",null,"We map the gene symbol onto the canonical transcripts (RefSeq & Ensembl) for that gene. For SNVs, we obtain position, ref and alt amino acid from source file and generate substitution notation. For indels, we get protein change notation from ",(0,i.kt)("inlineCode",{parentName:"p"},"Reference_Amino_Acid")," column.\nThen we match each entry using these notations."),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"We currently skip all variants labeled as splice from the source"))),(0,i.kt)("h2",{id:"json-output"},"JSON Output"),(0,i.kt)("p",null,"The data source will be captured under the cancerHotspots key in the transcript section."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{13-18}","{13-18}":!0},'{\n "transcript":"NM_002524.5",\n "source":"RefSeq",\n "bioType":"mRNA",\n "aminoAcids":"Q/K",\n "proteinPos":"61",\n "geneId":"4893",\n "hgnc":"NRAS",\n "hgvsc":"NM_002524.5:c.181C>A",\n "hgvsp":"NP_002515.1:p.(Gln61Lys)",\n "isCanonical":true,\n "proteinId":"NP_002515.1",\n "cancerHotspots":{\n "residue":"Q61",\n "numSamples":422,\n "numAltAminoAcidSamples":142,\n "qValue":0\n }\n}\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"residue"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"},"how many samples are associated with a variant at the same amino acid position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numAltAminoAcidSamples"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"},"how many samples are associated with a variant with the same position and alternate amino acid position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"qValue"),(0,i.kt)("td",{parentName:"tr",align:"center"},"double"),(0,i.kt)("td",{parentName:"tr",align:"left"})))))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/17efe0eb.423048b5.js b/assets/js/17efe0eb.423048b5.js deleted file mode 100644 index c459cf41..00000000 --- a/assets/js/17efe0eb.423048b5.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7901,9151,3425],{3905:(t,e,n)=>{n.d(e,{Zo:()=>p,kt:()=>N});var a=n(67294);function l(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function r(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(l[n]=t[n]);return l}(t,e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(l[n]=t[n])}return l}var u=a.createContext({}),m=function(t){var e=a.useContext(u),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},p=function(t){var e=m(t.components);return a.createElement(u.Provider,{value:e},t.children)},s="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},g=a.forwardRef((function(t,e){var n=t.components,l=t.mdxType,r=t.originalType,u=t.parentName,p=i(t,["components","mdxType","originalType","parentName"]),s=m(n),g=l,N=s["".concat(u,".").concat(g)]||s[g]||d[g]||r;return n?a.createElement(N,o(o({ref:e},p),{},{components:n})):a.createElement(N,o({ref:e},p))}));function N(t,e){var n=arguments,l=e&&e.mdxType;if("string"==typeof t||l){var r=n.length,o=new Array(r);o[0]=g;var i={};for(var u in e)hasOwnProperty.call(e,u)&&(i[u]=e[u]);i.originalType=t,i[s]="string"==typeof t?t:l,o[1]=i;for(var m=2;m{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>s,frontMatter:()=>r,metadata:()=>i,toc:()=>u});var a=n(87462),l=(n(67294),n(3905));const r={},o=void 0,i={unversionedId:"data-sources/gnomad-exomes-small-variants-json",id:"version-3.2.5/data-sources/gnomad-exomes-small-variants-json",title:"gnomad-exomes-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.2.5/data-sources/gnomad-exomes-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-exomes-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/gnomad-exomes-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/data-sources/gnomad-exomes-small-variants-json.md",tags:[],version:"3.2.5",frontMatter:{}},u=[],m={toc:u},p="wrapper";function s(t){let{components:e,...n}=t;return(0,l.kt)(p,(0,a.Z)({},m,n,{components:e,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomadExome":{ \n "coverage":20,\n "allAf":0.190317,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "failedFilter":true\n}\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"coverage"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the South Asian population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the South Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the South Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")))))}s.isMDXComponent=!0},64501:(t,e,n)=>{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>s,frontMatter:()=>r,metadata:()=>i,toc:()=>u});var a=n(87462),l=(n(67294),n(3905));const r={},o=void 0,i={unversionedId:"data-sources/gnomad-genomes-small-variants-json",id:"version-3.2.5/data-sources/gnomad-genomes-small-variants-json",title:"gnomad-genomes-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.2.5/data-sources/gnomad-genomes-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-genomes-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/gnomad-genomes-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/data-sources/gnomad-genomes-small-variants-json.md",tags:[],version:"3.2.5",frontMatter:{}},u=[],m={toc:u},p="wrapper";function s(t){let{components:e,...n}=t;return(0,l.kt)(p,(0,a.Z)({},m,n,{components:e,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomad":{ \n "coverage":20,\n "allAf":0.190317,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "failedFilter":true\n}\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"coverage"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")))))}s.isMDXComponent=!0},94285:(t,e,n)=>{n.r(e),n.d(e,{contentTitle:()=>u,default:()=>g,frontMatter:()=>i,metadata:()=>m,toc:()=>p});var a=n(87462),l=(n(67294),n(3905)),r=n(64501),o=n(92811);const i={title:"gnomAD"},u=void 0,m={unversionedId:"data-sources/gnomad",id:"version-3.2.5/data-sources/gnomad",title:"gnomAD",description:"Overview",source:"@site/versioned_docs/version-3.2.5/data-sources/gnomad.mdx",sourceDirName:"data-sources",slug:"/data-sources/gnomad",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/gnomad",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/data-sources/gnomad.mdx",tags:[],version:"3.2.5",frontMatter:{title:"gnomAD"},sidebar:"version-3.2.5/docs",previous:{title:"dbSNP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/dbsnp"},next:{title:"Nirvana JSON File Format",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/file-formats/nirvana-json-file-format"}},p=[{value:"Overview",id:"overview",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF extraction",id:"vcf-extraction",children:[],level:3},{value:"Computation",id:"computation",children:[],level:3},{value:"VCF download instructions",id:"vcf-download-instructions",children:[],level:3},{value:"JSON output",id:"json-output",children:[{value:"Genomes",id:"genomes",children:[],level:4},{value:"Exomes",id:"exomes",children:[],level:4}],level:3}],level:2}],s={toc:p},d="wrapper";function g(t){let{components:e,...n}=t;return(0,l.kt)(d,(0,a.Z)({},s,n,{components:e,mdxType:"MDXLayout"}),(0,l.kt)("h2",{id:"overview"},"Overview"),(0,l.kt)("p",null,"The Genome Aggregation Database (",(0,l.kt)("a",{parentName:"p",href:"https://gnomad.broadinstitute.org/"},"gnomAD"),") is a resource developed by an international coalition of investigators, with the goal of aggregating and harmonizing both exome and genome sequencing data from a wide variety of large-scale sequencing projects, and making summary data available for the wider scientific community."),(0,l.kt)("h2",{id:"small-variants"},"Small Variants"),(0,l.kt)("h3",{id:"vcf-extraction"},"VCF extraction"),(0,l.kt)("p",null,"We currently extract the following info fields from gnomAD genome and exome VCF files:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},'##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n')),(0,l.kt)("p",null,"We also extract the following extra fields from gnomAD exome VCF file:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},'##INFO=\n##INFO=\n##INFO=\n')),(0,l.kt)("h3",{id:"computation"},"Computation"),(0,l.kt)("p",null,"Using these, we compute the following:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"Coverage"),(0,l.kt)("li",{parentName:"ul"},"Allele count, Homozygous count, allele number and allele frequencies for:",(0,l.kt)("ul",{parentName:"li"},(0,l.kt)("li",{parentName:"ul"},"Global population"),(0,l.kt)("li",{parentName:"ul"},"African/African Americans"),(0,l.kt)("li",{parentName:"ul"},"Admixed Americans"),(0,l.kt)("li",{parentName:"ul"},"Ashkenazi Jews"),(0,l.kt)("li",{parentName:"ul"},"East Asians"),(0,l.kt)("li",{parentName:"ul"},"Finnish"),(0,l.kt)("li",{parentName:"ul"},"Non-Finnish Europeans"),(0,l.kt)("li",{parentName:"ul"},"South Asian"),(0,l.kt)("li",{parentName:"ul"},"Others (population not assigned)"),(0,l.kt)("li",{parentName:"ul"},"Male"),(0,l.kt)("li",{parentName:"ul"},"Female"),(0,l.kt)("li",{parentName:"ul"},"Controls")))),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Note")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("ul",{parentName:"div"},(0,l.kt)("li",{parentName:"ul"},"Coverage = DP / AN. Frequencies are computed using AC/AN for each population."),(0,l.kt)("li",{parentName:"ul"},"Please note that currently there is no genome sequencing data of south asian (SAS) population available in gnomAD."),(0,l.kt)("li",{parentName:"ul"},"Allele Count, Homozygous count, allele number and allele frequencies for control groups are also provided for the global population.")))),(0,l.kt)("h3",{id:"vcf-download-instructions"},"VCF download instructions"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://gnomad.broadinstitute.org/downloads"},"https://gnomad.broadinstitute.org/downloads")),(0,l.kt)("h3",{id:"json-output"},"JSON output"),(0,l.kt)("p",null,"Genome and exome allele frequencies are provided in separate JSON sections."),(0,l.kt)("h4",{id:"genomes"},"Genomes"),(0,l.kt)(r.default,{mdxType:"GnomadGenomes"}),(0,l.kt)("h4",{id:"exomes"},"Exomes"),(0,l.kt)(o.default,{mdxType:"GnomadExomes"}))}g.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/18946b76.60b3e406.js b/assets/js/18946b76.60b3e406.js deleted file mode 100644 index 52c110e6..00000000 --- a/assets/js/18946b76.60b3e406.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3305,2630],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>h});var a=n(67294);function l(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(l[n]=e[n]);return l}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(l[n]=e[n])}return l}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},m="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,l=e.mdxType,r=e.originalType,s=e.parentName,c=o(e,["components","mdxType","originalType","parentName"]),m=d(n),u=l,h=m["".concat(s,".").concat(u)]||m[u]||p[u]||r;return n?a.createElement(h,i(i({ref:t},c),{},{components:n})):a.createElement(h,i({ref:t},c))}));function h(e,t){var n=arguments,l=t&&t.mdxType;if("string"==typeof e||l){var r=n.length,i=new Array(r);i[0]=u;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[m]="string"==typeof e?e:l,i[1]=o;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>m,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var a=n(87462),l=(n(67294),n(3905));const r={},i=void 0,o={unversionedId:"data-sources/dbsnp-json",id:"data-sources/dbsnp-json",title:"dbsnp-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/dbsnp-json.md",sourceDirName:"data-sources",slug:"/data-sources/dbsnp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dbsnp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/dbsnp-json.md",tags:[],version:"current",frontMatter:{}},s=[],d={toc:s},c="wrapper";function m(e){let{components:t,...n}=e;return(0,l.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"dbsnp":[\n "rs1042821"\n]\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,l.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"dbsnp"),(0,l.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,l.kt)("td",{parentName:"tr",align:"left"},"dbSNP rsIDs")))))}m.isMDXComponent=!0},54266:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>i,metadata:()=>s,toc:()=>d});var a=n(87462),l=(n(67294),n(3905)),r=n(39156);const i={title:"dbSNP"},o=void 0,s={unversionedId:"data-sources/dbsnp",id:"data-sources/dbsnp",title:"dbSNP",description:"Overview",source:"@site/docs/data-sources/dbsnp.mdx",sourceDirName:"data-sources",slug:"/data-sources/dbsnp",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dbsnp",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/dbsnp.mdx",tags:[],version:"current",frontMatter:{title:"dbSNP"},sidebar:"docs",previous:{title:"DANN",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dann"},next:{title:"DECIPHER",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/decipher"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"VCF File",id:"vcf-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Global allele extraction",id:"global-allele-extraction",children:[],level:4},{value:"Equal Allele Frequency Example (2 alleles)",id:"equal-allele-frequency-example-2-alleles",children:[],level:4},{value:"Equal Allele Frequency Example (3 alleles)",id:"equal-allele-frequency-example-3-alleles",children:[],level:4},{value:"Equal Allele Frequency in Alternate Alleles",id:"equal-allele-frequency-in-alternate-alleles",children:[],level:4},{value:"Equal Allele Frequency Between Reference & Alternate Allele",id:"equal-allele-frequency-between-reference--alternate-allele",children:[],level:4}],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:d},m="wrapper";function p(e){let{components:t,...n}=e;return(0,l.kt)(m,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("h2",{id:"overview"},"Overview"),(0,l.kt)("p",null,"dbSNP contains human single nucleotide variations, microsatellites, and small-scale insertions and deletions along with publication, population frequency, molecular consequence, and genomic and RefSeq mapping information for both common variations and clinical mutations."),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Sherry, S.T., Ward, M. and Sirotkin, K. (1999) dbSNP\u2014Database for Single Nucleotide Polymorphisms and Other Classes of Minor Genetic Variation. ",(0,l.kt)("em",{parentName:"p"},"Genome Res."),", ",(0,l.kt)("strong",{parentName:"p"},"9"),", 677\u2013679."))),(0,l.kt)("h2",{id:"vcf-file"},"VCF File"),(0,l.kt)("h3",{id:"example"},"Example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 10177 rs367896724 A AC . . RS=367896724;RSPOS=10177;dbSNPBuildID=138; \\ \n SSR=0;SAO=0;VP=0x050000020005130026000200;GENEINFO=DDX11L1:100287102;WGT=1; \\\n VC=DIV;R5;ASP;G5A;G5;KGPhase3;CAF=0.5747,0.4253;COMMON=1; \\\n TOPMED=0.76728147298674821,0.23271852701325178\n")),(0,l.kt)("h3",{id:"parsing"},"Parsing"),(0,l.kt)("p",null,"From the VCF file, we're mainly interested in the following:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"rsID")," from the ",(0,l.kt)("inlineCode",{parentName:"li"},"ID")," field"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"CAF")," from the ",(0,l.kt)("inlineCode",{parentName:"li"},"INFO")," field")),(0,l.kt)("h4",{id:"global-allele-extraction"},"Global allele extraction"),(0,l.kt)("p",null,"The global major and minor alleles are extracted based on the frequency of the alleles provided in the CAF field. The global minor allele frequency is the second highest value of the CAF comma delimited field (ignoring '.' values). "),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Tie Breaking: Global Major Allele")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"If there are two candidates for global major and the reference allele is one of them, we prefer the reference allele."))),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Tie Breaking: Global Minor Allele")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"If there are two candidates for global minor and the reference allele is one of them, we prefer the other allele. If the reference allele is not involved, they are chosen arbitrarily."))),(0,l.kt)("h4",{id:"equal-allele-frequency-example-2-alleles"},"Equal Allele Frequency Example (2 alleles)"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C CAF=0.5,0.5\n")),(0,l.kt)("p",null,"We will select A to be the global major allele and C to be the global minor allele."),(0,l.kt)("h4",{id:"equal-allele-frequency-example-3-alleles"},"Equal Allele Frequency Example (3 alleles)"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C,T CAF=0.33,0.33,0.33\n")),(0,l.kt)("p",null,"We will select A to be the global major allele and either C or T is chosen (arbitrarily) to be the global minor allele."),(0,l.kt)("h4",{id:"equal-allele-frequency-in-alternate-alleles"},"Equal Allele Frequency in Alternate Alleles"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C,T CAF=0.2,0.4,0.4\n")),(0,l.kt)("p",null,"We will select C or T to be arbitrarily assigned to be the global major or global minor allele."),(0,l.kt)("h4",{id:"equal-allele-frequency-between-reference--alternate-allele"},"Equal Allele Frequency Between Reference & Alternate Allele"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C,T CAF=0.2,0.2,0.6\n")),(0,l.kt)("p",null,"We will select T to be the global major allele and C to be the global minor allele."),(0,l.kt)("h2",{id:"known-issues"},"Known Issues"),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"If there are multiple entries with different CAF values for the same allele, we use the first CAF value."))),(0,l.kt)("h2",{id:"download-url"},"Download URL"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://ftp.ncbi.nih.gov/snp/organisms/"},"https://ftp.ncbi.nih.gov/snp/organisms/")),(0,l.kt)("h2",{id:"json-output"},"JSON Output"),(0,l.kt)(r.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/18946b76.a4c3386a.js b/assets/js/18946b76.a4c3386a.js new file mode 100644 index 00000000..2c58b6ff --- /dev/null +++ b/assets/js/18946b76.a4c3386a.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3305,2630],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>h});var a=n(7294);function l(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(l[n]=e[n]);return l}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(l[n]=e[n])}return l}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},m="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,l=e.mdxType,r=e.originalType,s=e.parentName,c=o(e,["components","mdxType","originalType","parentName"]),m=d(n),u=l,h=m["".concat(s,".").concat(u)]||m[u]||p[u]||r;return n?a.createElement(h,i(i({ref:t},c),{},{components:n})):a.createElement(h,i({ref:t},c))}));function h(e,t){var n=arguments,l=t&&t.mdxType;if("string"==typeof e||l){var r=n.length,i=new Array(r);i[0]=u;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[m]="string"==typeof e?e:l,i[1]=o;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>m,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var a=n(7462),l=(n(7294),n(3905));const r={},i=void 0,o={unversionedId:"data-sources/dbsnp-json",id:"data-sources/dbsnp-json",title:"dbsnp-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/dbsnp-json.md",sourceDirName:"data-sources",slug:"/data-sources/dbsnp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dbsnp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/dbsnp-json.md",tags:[],version:"current",frontMatter:{}},s=[],d={toc:s},c="wrapper";function m(e){let{components:t,...n}=e;return(0,l.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"dbsnp":[\n "rs1042821"\n]\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,l.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"dbsnp"),(0,l.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,l.kt)("td",{parentName:"tr",align:"left"},"dbSNP rsIDs")))))}m.isMDXComponent=!0},4266:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>i,metadata:()=>s,toc:()=>d});var a=n(7462),l=(n(7294),n(3905)),r=n(9156);const i={title:"dbSNP"},o=void 0,s={unversionedId:"data-sources/dbsnp",id:"data-sources/dbsnp",title:"dbSNP",description:"Overview",source:"@site/docs/data-sources/dbsnp.mdx",sourceDirName:"data-sources",slug:"/data-sources/dbsnp",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dbsnp",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/dbsnp.mdx",tags:[],version:"current",frontMatter:{title:"dbSNP"},sidebar:"docs",previous:{title:"DANN",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dann"},next:{title:"DECIPHER",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/decipher"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"VCF File",id:"vcf-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Global allele extraction",id:"global-allele-extraction",children:[],level:4},{value:"Equal Allele Frequency Example (2 alleles)",id:"equal-allele-frequency-example-2-alleles",children:[],level:4},{value:"Equal Allele Frequency Example (3 alleles)",id:"equal-allele-frequency-example-3-alleles",children:[],level:4},{value:"Equal Allele Frequency in Alternate Alleles",id:"equal-allele-frequency-in-alternate-alleles",children:[],level:4},{value:"Equal Allele Frequency Between Reference & Alternate Allele",id:"equal-allele-frequency-between-reference--alternate-allele",children:[],level:4}],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:d},m="wrapper";function p(e){let{components:t,...n}=e;return(0,l.kt)(m,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("h2",{id:"overview"},"Overview"),(0,l.kt)("p",null,"dbSNP contains human single nucleotide variations, microsatellites, and small-scale insertions and deletions along with publication, population frequency, molecular consequence, and genomic and RefSeq mapping information for both common variations and clinical mutations."),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Sherry, S.T., Ward, M. and Sirotkin, K. (1999) dbSNP\u2014Database for Single Nucleotide Polymorphisms and Other Classes of Minor Genetic Variation. ",(0,l.kt)("em",{parentName:"p"},"Genome Res."),", ",(0,l.kt)("strong",{parentName:"p"},"9"),", 677\u2013679."))),(0,l.kt)("h2",{id:"vcf-file"},"VCF File"),(0,l.kt)("h3",{id:"example"},"Example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 10177 rs367896724 A AC . . RS=367896724;RSPOS=10177;dbSNPBuildID=138; \\ \n SSR=0;SAO=0;VP=0x050000020005130026000200;GENEINFO=DDX11L1:100287102;WGT=1; \\\n VC=DIV;R5;ASP;G5A;G5;KGPhase3;CAF=0.5747,0.4253;COMMON=1; \\\n TOPMED=0.76728147298674821,0.23271852701325178\n")),(0,l.kt)("h3",{id:"parsing"},"Parsing"),(0,l.kt)("p",null,"From the VCF file, we're mainly interested in the following:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"rsID")," from the ",(0,l.kt)("inlineCode",{parentName:"li"},"ID")," field"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"CAF")," from the ",(0,l.kt)("inlineCode",{parentName:"li"},"INFO")," field")),(0,l.kt)("h4",{id:"global-allele-extraction"},"Global allele extraction"),(0,l.kt)("p",null,"The global major and minor alleles are extracted based on the frequency of the alleles provided in the CAF field. The global minor allele frequency is the second highest value of the CAF comma delimited field (ignoring '.' values). "),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Tie Breaking: Global Major Allele")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"If there are two candidates for global major and the reference allele is one of them, we prefer the reference allele."))),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Tie Breaking: Global Minor Allele")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"If there are two candidates for global minor and the reference allele is one of them, we prefer the other allele. If the reference allele is not involved, they are chosen arbitrarily."))),(0,l.kt)("h4",{id:"equal-allele-frequency-example-2-alleles"},"Equal Allele Frequency Example (2 alleles)"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C CAF=0.5,0.5\n")),(0,l.kt)("p",null,"We will select A to be the global major allele and C to be the global minor allele."),(0,l.kt)("h4",{id:"equal-allele-frequency-example-3-alleles"},"Equal Allele Frequency Example (3 alleles)"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C,T CAF=0.33,0.33,0.33\n")),(0,l.kt)("p",null,"We will select A to be the global major allele and either C or T is chosen (arbitrarily) to be the global minor allele."),(0,l.kt)("h4",{id:"equal-allele-frequency-in-alternate-alleles"},"Equal Allele Frequency in Alternate Alleles"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C,T CAF=0.2,0.4,0.4\n")),(0,l.kt)("p",null,"We will select C or T to be arbitrarily assigned to be the global major or global minor allele."),(0,l.kt)("h4",{id:"equal-allele-frequency-between-reference--alternate-allele"},"Equal Allele Frequency Between Reference & Alternate Allele"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C,T CAF=0.2,0.2,0.6\n")),(0,l.kt)("p",null,"We will select T to be the global major allele and C to be the global minor allele."),(0,l.kt)("h2",{id:"known-issues"},"Known Issues"),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"If there are multiple entries with different CAF values for the same allele, we use the first CAF value."))),(0,l.kt)("h2",{id:"download-url"},"Download URL"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://ftp.ncbi.nih.gov/snp/organisms/"},"https://ftp.ncbi.nih.gov/snp/organisms/")),(0,l.kt)("h2",{id:"json-output"},"JSON Output"),(0,l.kt)(r.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/18b93cb3.3625892b.js b/assets/js/18b93cb3.3625892b.js deleted file mode 100644 index 96727437..00000000 --- a/assets/js/18b93cb3.3625892b.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3042],{40351:(e,t,n)=>{n.r(t),n.d(t,{default:()=>o});var l=n(67294),a=n(52263),r=n(39960),m=n(18882),c=n(80907);const o=function(){const{siteConfig:e}=(0,a.Z)(),t=(0,c.gB)(),n=(0,c.yW)(),o=t.find((e=>"current"===e.name)),i=t.filter((e=>e!==n&&"current"!==e.name)),u=i.shift(),s="https://github.com/Illumina/Nirvana";return l.createElement(m.Z,{title:"Versions",permalink:"/versions",description:"Nirvana Versions page listing all documented site versions"},l.createElement("main",{className:"container margin-vert--lg"},l.createElement("h1",null,"Nirvana documentation versions"),u&&l.createElement("div",{className:"margin-bottom--lg"},l.createElement("h3",{id:"next"},"Current version (Stable)"),l.createElement("p",null,"Here you can find the documentation for current released version."),l.createElement("table",null,l.createElement("tbody",null,l.createElement("tr",null,l.createElement("th",null,u.name),l.createElement("td",null,l.createElement(r.Z,{to:u.path},"Documentation")),l.createElement("td",null,l.createElement("a",{href:`${s}/releases/tag/v${u.name}.0`},"Release Notes")))))),l.createElement("div",{className:"margin-bottom--lg"},l.createElement("h3",{id:"latest"},"Next version (Unreleased)"),l.createElement("p",null,"Here you can find the documentation for unreleased version currently in development."),l.createElement("table",null,l.createElement("tbody",null,l.createElement("tr",null,l.createElement("th",null,n.label),l.createElement("td",null,l.createElement(r.Z,{to:n.path},"Documentation")))))),o!==n&&l.createElement("div",{className:"margin-bottom--lg"},l.createElement("h3",{id:"next"},"Next version (Unreleased)"),l.createElement("p",null,"Here you can find the documentation for unreleased version."),l.createElement("table",null,l.createElement("tbody",null,l.createElement("tr",null,l.createElement("th",null,"master"),l.createElement("td",null,l.createElement(r.Z,{to:o.path},"Documentation")),l.createElement("td",null,l.createElement("a",{href:s},"Source Code")))))),i.length>0&&l.createElement("div",{className:"margin-bottom--lg"},l.createElement("h3",{id:"archive"},"Past versions"),l.createElement("p",null,"Here you can find documentation for previous versions of Nirvana."),l.createElement("table",null,l.createElement("tbody",null,i.map((e=>l.createElement("tr",{key:e.name},l.createElement("th",null,e.label),l.createElement("td",null,l.createElement(r.Z,{to:e.path},"Documentation")),l.createElement("td",null,l.createElement("a",{href:`${s}/releases/tag/v${e.name}.0`},"Release Notes"))))))))))}}}]); \ No newline at end of file diff --git a/assets/js/18b93cb3.53a9b2af.js b/assets/js/18b93cb3.53a9b2af.js new file mode 100644 index 00000000..1700ad10 --- /dev/null +++ b/assets/js/18b93cb3.53a9b2af.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3042],{351:(e,t,n)=>{n.r(t),n.d(t,{default:()=>o});var l=n(7294),a=n(2263),r=n(9960),m=n(8882),c=n(907);const o=function(){const{siteConfig:e}=(0,a.Z)(),t=(0,c.gB)(),n=(0,c.yW)(),o=t.find((e=>"current"===e.name)),i=t.filter((e=>e!==n&&"current"!==e.name)),u=i.shift(),s="https://github.com/Illumina/Nirvana";return l.createElement(m.Z,{title:"Versions",permalink:"/versions",description:"Nirvana Versions page listing all documented site versions"},l.createElement("main",{className:"container margin-vert--lg"},l.createElement("h1",null,"Nirvana documentation versions"),u&&l.createElement("div",{className:"margin-bottom--lg"},l.createElement("h3",{id:"next"},"Current version (Stable)"),l.createElement("p",null,"Here you can find the documentation for current released version."),l.createElement("table",null,l.createElement("tbody",null,l.createElement("tr",null,l.createElement("th",null,u.name),l.createElement("td",null,l.createElement(r.Z,{to:u.path},"Documentation")),l.createElement("td",null,l.createElement("a",{href:`${s}/releases/tag/v${u.name}.0`},"Release Notes")))))),l.createElement("div",{className:"margin-bottom--lg"},l.createElement("h3",{id:"latest"},"Next version (Unreleased)"),l.createElement("p",null,"Here you can find the documentation for unreleased version currently in development."),l.createElement("table",null,l.createElement("tbody",null,l.createElement("tr",null,l.createElement("th",null,n.label),l.createElement("td",null,l.createElement(r.Z,{to:n.path},"Documentation")))))),o!==n&&l.createElement("div",{className:"margin-bottom--lg"},l.createElement("h3",{id:"next"},"Next version (Unreleased)"),l.createElement("p",null,"Here you can find the documentation for unreleased version."),l.createElement("table",null,l.createElement("tbody",null,l.createElement("tr",null,l.createElement("th",null,"master"),l.createElement("td",null,l.createElement(r.Z,{to:o.path},"Documentation")),l.createElement("td",null,l.createElement("a",{href:s},"Source Code")))))),i.length>0&&l.createElement("div",{className:"margin-bottom--lg"},l.createElement("h3",{id:"archive"},"Past versions"),l.createElement("p",null,"Here you can find documentation for previous versions of Nirvana."),l.createElement("table",null,l.createElement("tbody",null,i.map((e=>l.createElement("tr",{key:e.name},l.createElement("th",null,e.label),l.createElement("td",null,l.createElement(r.Z,{to:e.path},"Documentation")),l.createElement("td",null,l.createElement("a",{href:`${s}/releases/tag/v${e.name}.0`},"Release Notes"))))))))))}}}]); \ No newline at end of file diff --git a/assets/js/191d3c1c.01ee261e.js b/assets/js/191d3c1c.01ee261e.js deleted file mode 100644 index 8439af5d..00000000 --- a/assets/js/191d3c1c.01ee261e.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4899],{3905:(t,e,n)=>{n.d(e,{Zo:()=>s,kt:()=>f});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function o(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function i(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var l=r.createContext({}),p=function(t){var e=r.useContext(l),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},s=function(t){var e=p(t.components);return r.createElement(l.Provider,{value:e},t.children)},d="mdxType",m={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},u=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,o=t.originalType,l=t.parentName,s=c(t,["components","mdxType","originalType","parentName"]),d=p(n),u=a,f=d["".concat(l,".").concat(u)]||d[u]||m[u]||o;return n?r.createElement(f,i(i({ref:e},s),{},{components:n})):r.createElement(f,i({ref:e},s))}));function f(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var o=n.length,i=new Array(o);i[0]=u;var c={};for(var l in e)hasOwnProperty.call(e,l)&&(c[l]=e[l]);c.originalType=t,c[d]="string"==typeof t?t:a,i[1]=c;for(var p=2;p{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>d,frontMatter:()=>o,metadata:()=>c,toc:()=>l});var r=n(87462),a=(n(67294),n(3905));const o={},i=void 0,c={unversionedId:"data-sources/splice-ai-json",id:"data-sources/splice-ai-json",title:"splice-ai-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/splice-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/splice-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/splice-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/splice-ai-json.md",tags:[],version:"current",frontMatter:{}},l=[],p={toc:l},s="wrapper";function d(t){let{components:e,...n}=t;return(0,a.kt)(s,(0,r.Z)({},p,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"spliceAI":[ \n {\n "hgnc":"BLCAP",\n "acceptorGainDistance":-3,\n "acceptorGainScore":0.3,\n "donorLossDistance":7,\n "donorLossScore":0.9\n },\n { \n "hgnc":"NNAT",\n "acceptorGainDistance":-1,\n "acceptorGainScore":0.2,\n "donorGainDistance":-2,\n "donorGainScore":0.3\n }\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorGainDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorGainScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorLossDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorLossScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorGainDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorGainScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorLossDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorLossScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/191d3c1c.0a96c1ed.js b/assets/js/191d3c1c.0a96c1ed.js new file mode 100644 index 00000000..4dd72774 --- /dev/null +++ b/assets/js/191d3c1c.0a96c1ed.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4899],{3905:(t,e,n)=>{n.d(e,{Zo:()=>s,kt:()=>f});var r=n(7294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function o(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function i(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var l=r.createContext({}),p=function(t){var e=r.useContext(l),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},s=function(t){var e=p(t.components);return r.createElement(l.Provider,{value:e},t.children)},d="mdxType",m={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},u=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,o=t.originalType,l=t.parentName,s=c(t,["components","mdxType","originalType","parentName"]),d=p(n),u=a,f=d["".concat(l,".").concat(u)]||d[u]||m[u]||o;return n?r.createElement(f,i(i({ref:e},s),{},{components:n})):r.createElement(f,i({ref:e},s))}));function f(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var o=n.length,i=new Array(o);i[0]=u;var c={};for(var l in e)hasOwnProperty.call(e,l)&&(c[l]=e[l]);c.originalType=t,c[d]="string"==typeof t?t:a,i[1]=c;for(var p=2;p{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>d,frontMatter:()=>o,metadata:()=>c,toc:()=>l});var r=n(7462),a=(n(7294),n(3905));const o={},i=void 0,c={unversionedId:"data-sources/splice-ai-json",id:"data-sources/splice-ai-json",title:"splice-ai-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/splice-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/splice-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/splice-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/splice-ai-json.md",tags:[],version:"current",frontMatter:{}},l=[],p={toc:l},s="wrapper";function d(t){let{components:e,...n}=t;return(0,a.kt)(s,(0,r.Z)({},p,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"spliceAI":[ \n {\n "hgnc":"BLCAP",\n "acceptorGainDistance":-3,\n "acceptorGainScore":0.3,\n "donorLossDistance":7,\n "donorLossScore":0.9\n },\n { \n "hgnc":"NNAT",\n "acceptorGainDistance":-1,\n "acceptorGainScore":0.2,\n "donorGainDistance":-2,\n "donorGainScore":0.3\n }\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorGainDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorGainScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorLossDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorLossScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorGainDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorGainScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorLossDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorLossScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/1a55fec0.337313a1.js b/assets/js/1a55fec0.337313a1.js deleted file mode 100644 index 9e5b40b7..00000000 --- a/assets/js/1a55fec0.337313a1.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9779],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>v});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},p=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,l=e.originalType,s=e.parentName,p=o(e,["components","mdxType","originalType","parentName"]),m=c(n),u=r,v=m["".concat(s,".").concat(u)]||m[u]||d[u]||l;return n?a.createElement(v,i(i({ref:t},p),{},{components:n})):a.createElement(v,i({ref:t},p))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=n.length,i=new Array(l);i[0]=u;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[m]="string"==typeof e?e:r,i[1]=o;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const l={title:"Variant IDs"},i=void 0,o={unversionedId:"core-functionality/variant-ids",id:"version-3.18/core-functionality/variant-ids",title:"Variant IDs",description:"Overview",source:"@site/versioned_docs/version-3.18/core-functionality/variant-ids.md",sourceDirName:"core-functionality",slug:"/core-functionality/variant-ids",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/core-functionality/variant-ids",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/core-functionality/variant-ids.md",tags:[],version:"3.18",frontMatter:{title:"Variant IDs"},sidebar:"docs",previous:{title:"MNV Recomposition",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/core-functionality/mnv-recomposition"},next:{title:"Jasix",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/utilities/jasix"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF Examples",id:"vcf-examples",children:[],level:3},{value:"Format",id:"format",children:[],level:3},{value:"VID Examples",id:"vid-examples",children:[],level:3}],level:2},{value:"Translocation Breakends",id:"translocation-breakends",children:[{value:"VCF Example",id:"vcf-example",children:[],level:3},{value:"Format",id:"format-1",children:[],level:3},{value:"VID Example",id:"vid-example",children:[],level:3}],level:2},{value:"All Other Structural Variants",id:"all-other-structural-variants",children:[{value:"VCF Examples",id:"vcf-examples-1",children:[],level:3},{value:"Format",id:"format-2",children:[],level:3},{value:"VID Examples",id:"vid-examples-1",children:[],level:3}],level:2}],c={toc:s},p="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"Many downstream tools use a variant identifier to store annotation results. We've standardized on using variant identifiers (VIDs) that originated from the notation used by the Broad Institute."),(0,r.kt)("p",null,"The Broad VID scheme is not only simple, but it has the advantage that a user could create a bare bones VCF entry from the information captured in the identifier. One of the limitations of the Broad VID scheme is that it does not define how to handle structural variants. Our VID scheme attempts to fill that gap."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Conventions")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("ul",{parentName:"div"},(0,r.kt)("li",{parentName:"ul"},"all chromosomes use Ensembl style notation (i.e. 22 instead of chr22)"),(0,r.kt)("li",{parentName:"ul"},"for a reference variant (i.e. no alt allele), replace the period (.) with the reference base"),(0,r.kt)("li",{parentName:"ul"},"padding bases are used, neither the reference nor alternate allele can be empty"),(0,r.kt)("li",{parentName:"ul"},"some large variant callers lazily output N for the reference allele. If this is the case, replace it with the true reference base")))),(0,r.kt)("h2",{id:"small-variants"},"Small Variants"),(0,r.kt)("h3",{id:"vcf-examples"},"VCF Examples"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 66507 . T A 184.45 PASS .\nchr1 66521 . T TATATA 144.53 PASS .\nchr1 66572 . GTA G,GTACTATATATTATA 45.45 PASS .\n")),(0,r.kt)("h3",{id:"format"},"Format"),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},"chromosome"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"reference allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"alternate allele")),(0,r.kt)("h3",{id:"vid-examples"},"VID Examples"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"1-66507-T-A"),(0,r.kt)("li",{parentName:"ul"},"1-66521-T-TATATA"),(0,r.kt)("li",{parentName:"ul"},"1-66572-GTA-G"),(0,r.kt)("li",{parentName:"ul"},"1-66572-G-GTACTATATATTA")),(0,r.kt)("h2",{id:"translocation-breakends"},"Translocation Breakends"),(0,r.kt)("h3",{id:"vcf-example"},"VCF Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 2617277 . A AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[ . PASS SVTYPE=BND\n")),(0,r.kt)("h3",{id:"format-1"},"Format"),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},"chromosome"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"reference allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"alternate allele")),(0,r.kt)("h3",{id:"vid-example"},"VID Example"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"1-2617277-A-AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[")),(0,r.kt)("h2",{id:"all-other-structural-variants"},"All Other Structural Variants"),(0,r.kt)("h3",{id:"vcf-examples-1"},"VCF Examples"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 1000 . G . PASS END=3001000;SVTYPE=ROH\nchr1 1350082 . G . PASS END=1351320;SVTYPE=DEL\nchr1 1477854 . C . PASS END=1477984;SVTYPE=DUP\nchr1 1477968 . T . PASS END=1477968;SVTYPE=INS\nchr1 1715898 . N . PASS SVTYPE=CNV;END=1750149\nchr1 2650426 . N . PASS SVTYPE=CNV;END=2653074\nchr2 321682 . T . PASS SVTYPE=INV;END=421681\nchr20 2633403 . G . PASS END=2633421\n")),(0,r.kt)("h3",{id:"format-2"},"Format"),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},"chromosome"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"end position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"reference allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"alternate allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"SVTYPE")),(0,r.kt)("h3",{id:"vid-examples-1"},"VID Examples"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"1-1000-3001000-G-","<","ROH",">","-ROH"),(0,r.kt)("li",{parentName:"ul"},"1-1350082-1351320-G-","<","DEL",">","-DEL"),(0,r.kt)("li",{parentName:"ul"},"1-1477854-1477984-C-","<","DUP:TANDEM",">","-DUP"),(0,r.kt)("li",{parentName:"ul"},"1-1477968-1477968-T-","<","INS",">","-INS"),(0,r.kt)("li",{parentName:"ul"},"1-1715898-1750149-A-","<","DUP",">","-CNV ",(0,r.kt)("strong",{parentName:"li"},"(replace the N with A)")),(0,r.kt)("li",{parentName:"ul"},"1-2650426-2653074-N-","<","DEL",">","-CNV ",(0,r.kt)("strong",{parentName:"li"},"(keep the N)")),(0,r.kt)("li",{parentName:"ul"},"2-321682-421681-T-","<","INV",">","-INV"),(0,r.kt)("li",{parentName:"ul"},"20-2633403-2633421-G-","<","STR2",">","-STR")))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/1b8bda22.cf4a4af8.js b/assets/js/1b8bda22.cf4a4af8.js deleted file mode 100644 index 661b8bb9..00000000 --- a/assets/js/1b8bda22.cf4a4af8.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5636],{3905:(t,e,a)=>{a.d(e,{Zo:()=>p,kt:()=>g});var n=a(67294);function l(t,e,a){return e in t?Object.defineProperty(t,e,{value:a,enumerable:!0,configurable:!0,writable:!0}):t[e]=a,t}function r(t,e){var a=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),a.push.apply(a,n)}return a}function i(t){for(var e=1;e=0||(l[a]=t[a]);return l}(t,e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,a)&&(l[a]=t[a])}return l}var s=n.createContext({}),m=function(t){var e=n.useContext(s),a=e;return t&&(a="function"==typeof t?t(e):i(i({},e),t)),a},p=function(t){var e=m(t.components);return n.createElement(s.Provider,{value:e},t.children)},d="mdxType",k={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},N=n.forwardRef((function(t,e){var a=t.components,l=t.mdxType,r=t.originalType,s=t.parentName,p=o(t,["components","mdxType","originalType","parentName"]),d=m(a),N=l,g=d["".concat(s,".").concat(N)]||d[N]||k[N]||r;return a?n.createElement(g,i(i({ref:e},p),{},{components:a})):n.createElement(g,i({ref:e},p))}));function g(t,e){var a=arguments,l=e&&e.mdxType;if("string"==typeof t||l){var r=a.length,i=new Array(r);i[0]=N;var o={};for(var s in e)hasOwnProperty.call(e,s)&&(o[s]=e[s]);o.originalType=t,o[d]="string"==typeof t?t:l,i[1]=o;for(var m=2;m{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>d,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var n=a(87462),l=(a(67294),a(3905));const r={title:"Custom Annotations"},i=void 0,o={unversionedId:"file-formats/custom-annotations",id:"version-3.17/file-formats/custom-annotations",title:"Custom Annotations",description:"Overview",source:"@site/versioned_docs/version-3.17/file-formats/custom-annotations.md",sourceDirName:"file-formats",slug:"/file-formats/custom-annotations",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/file-formats/custom-annotations",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/file-formats/custom-annotations.md",tags:[],version:"3.17",frontMatter:{title:"Custom Annotations"},sidebar:"version-3.17/docs",previous:{title:"Nirvana JSON File Format",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/file-formats/nirvana-json-file-format"},next:{title:"Canonical Transcripts",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/core-functionality/canonical-transcripts"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Variant File Format",id:"variant-file-format",children:[{value:"Basic Allele Frequency Example",id:"basic-allele-frequency-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv",children:[],level:4},{value:"Convert to Nirvana Format",id:"convert-to-nirvana-format",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results",children:[],level:4}],level:3},{value:"Categories & Descriptions Example",id:"categories--descriptions-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-1",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana-1",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-1",children:[],level:4},{value:"Using Positional Matches",id:"using-positional-matches",children:[],level:4}],level:3},{value:"Genomic Region Example",id:"genomic-region-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-2",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana-2",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-2",children:[],level:4}],level:3},{value:"Mixing Small Variants and Genomic Regions",id:"mixing-small-variants-and-genomic-regions",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-3",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana-3",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-3",children:[],level:4}],level:3}],level:2},{value:"Gene File Format",id:"gene-file-format",children:[{value:"Basic Gene Example",id:"basic-gene-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-4",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana-4",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-4",children:[],level:4}],level:3}],level:2},{value:"Customizing the Header",id:"customizing-the-header",children:[{value:"Title",id:"title",children:[],level:3},{value:"Genome Assemblies",id:"genome-assemblies",children:[],level:3},{value:"Matching Criteria",id:"matching-criteria",children:[],level:3},{value:"Categories",id:"categories",children:[],level:3},{value:"Descriptions",id:"descriptions",children:[{value:"Populations",id:"populations",children:[],level:4}],level:3},{value:"Data Types",id:"data-types",children:[],level:3}],level:2},{value:"Using SAUtils",id:"using-sautils",children:[{value:"Convert Variant File",id:"convert-variant-file",children:[],level:3},{value:"Convert Gene File",id:"convert-gene-file",children:[],level:3}],level:2}],m={toc:s},p="wrapper";function d(t){let{components:e,...a}=t;return(0,l.kt)(p,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,l.kt)("h2",{id:"overview"},"Overview"),(0,l.kt)("p",null,"While the team tries to keep data sources up-to-date, you might want to start incorporate new annotations ahead of our update cycle. Another\ncommon use case involves protected health information (PHI). Custom annotations are a mechanism that enables both use cases."),(0,l.kt)("p",null,"Here are some examples of how our collaborators use custom annotations:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"associating context from both a patient-level and a patient cohort level with the variant annotations"),(0,l.kt)("li",{parentName:"ul"},"adding content that is licensed (e.g. HGMD) to the variant annotations")),(0,l.kt)("p",null,"At the moment, we have two different custom annotation file formats. One provides additional annotations to variants (both small variants and SVs)\nwhile the other caters to gene annotations."),(0,l.kt)("p",null,"In both cases, the custom annotation file format is a tab-delimited file that is separated into two parts: the header & the data."),(0,l.kt)("p",null,"The header is where you can customize how you want the data to appear in the JSON file and provide context about the genome assembly and how\nNirvana should match the variants."),(0,l.kt)("p",null,"At Illumina, there are usually many components downstream of Nirvana that have to parse our annotations. If a customer provides a custom\nannotation, those downstream tools need to understand more about the data such as:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"data type (e.g. number, boolean, or a string)"),(0,l.kt)("li",{parentName:"ul"},"data category (e.g. is this an allele count, allele number, allele frequency, etc.)"),(0,l.kt)("li",{parentName:"ul"},"associated population (i.e. if this is an allele frequency)")),(0,l.kt)("p",null,"For each custom annotation, Nirvana uses this context to create a ",(0,l.kt)("a",{parentName:"p",href:"https://json-schema.org/"},"JSON schema")," that can be sent to downstream tools. If\na tool knows that this is an allele frequency, it can validate user input to ensure that it's in the range of ","[0, 1]","."),(0,l.kt)("h2",{id:"variant-file-format"},"Variant File Format"),(0,l.kt)("h3",{id:"basic-allele-frequency-example"},"Basic Allele Frequency Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Imagine that you want to create a basic allele frequency custom annotation for small variants. If we visualized the tab-delimited file\n(TSV), it would look something like this:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allAf")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleFrequency")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"number")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"23603511"),(0,l.kt)("td",{parentName:"tr",align:"left"},"TGA"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006579")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"68801894"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006569")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr19"),(0,l.kt)("td",{parentName:"tr",align:"left"},"11107436"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.00003291")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over the header and discuss the contents:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"title")," indicates the name of the JSON key"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"assembly")," indicates that this data is only valid for ",(0,l.kt)("inlineCode",{parentName:"li"},"GRCh38")),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"matchVariantsBy")," indicates that we should only match the annotations if they are allele-specific"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"categories")," provides hints to downstream tools on how they might want to treat the data. In this case, we indicate that it's an allele\nfrequency."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"descriptions")," are used in special circumstances to provide more context. Even though column 5 is called ",(0,l.kt)("inlineCode",{parentName:"li"},"allAf"),", it might not be clear to a\ndownstream tool that this means a global allele frequency using all sub-populations. In this case, ",(0,l.kt)("inlineCode",{parentName:"li"},"ALL")," indicates the intended population."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"type")," indicates to downstream tools the data type. Since allele frequencies are numbers, we'll write ",(0,l.kt)("inlineCode",{parentName:"li"},"number")," in this column.")),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Reference Base Checking")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Nirvana validates all the reference bases in a custom annotation. If a variant or genomic region is specified that has the wrong reference base, an error will be produced."))),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Sorting")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"The variants within each chromosome must be sorted by genomic position."))),(0,l.kt)("h4",{id:"convert-to-nirvana-format"},"Convert to Nirvana Format"),(0,l.kt)("p",null,"First we need to convert the TSV file to Nirvana's native file format and let's put that file in a new directory called CA:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"$ mkdir CA\n$ dotnet bin/Release/netcoreapp2.1/SAUtils.dll customvar \\\n -r Data/References/Homo_sapiens.GRCh38.Nirvana.dat -i MyDataSource.tsv -o CA\n---------------------------------------------------------------------------\nSAUtils (c) 2020 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0\n---------------------------------------------------------------------------\n\nChromosome 16 completed in 00:00:00.1\nChromosome 19 completed in 00:00:00.0\n\nTime: 00:00:00.2\n")),(0,l.kt)("h4",{id:"annotate-with-nirvana"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's annotate the following VCF (notice that it's one of the variants that we have in our custom annotation):"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n16 68801894 . G A . . .\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA.vcf"},"the full VCF file"),"."),(0,l.kt)("p",null,"Since Nirvana can handle multiple directories with external annotations, all we need to do is specify our new CA directory in addition to\nthe normal Nirvana command-line."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash",metastring:"{3}","{3}":!0},"$ dotnet bin/Release/netcoreapp2.1/Nirvana.dll -c Data/Cache/GRCh38/Both \\\n -r Data/References/Homo_sapiens.GRCh38.Nirvana.dat \\\n --sd Data/SupplementaryAnnotation/GRCh38 --sd CA -i TestCA.vcf -o TestCA\n---------------------------------------------------------------------------\nNirvana (c) 2020 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0\n---------------------------------------------------------------------------\n\nInitialization Time Positions/s\n---------------------------------------------------------------------------\nCache 00:00:01.8\nSA Position Scan 00:00:00.0 19\n\nReference Preload Annotation Variants/s\n---------------------------------------------------------------------------\nchr16 00:00:00.2 00:00:01.3 1\n\nSummary Time Percent\n---------------------------------------------------------------------------\nInitialization 00:00:01.9 25.5 %\nPreload 00:00:00.2 3.3 %\nAnnotation 00:00:01.3 18.2 %\n\nTime: 00:00:06.3\n")),(0,l.kt)("h4",{id:"investigate-the-results"},"Investigate the Results"),(0,l.kt)("p",null,"We would expect the following data to show up in our JSON output file:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-16}","{12-16}":!0},' "variants": [\n {\n "vid": "16-68801894-G-A",\n "chromosome": "16",\n "begin": 68801894,\n "end": 68801894,\n "refAllele": "G",\n "altAllele": "A",\n "variantType": "SNV",\n "hgvsg": "NC_000016.10:g.68801894G>A",\n "phylopScore": 1,\n "MyDataSource": {\n "refAllele": "G",\n "altAllele": "A",\n "allAf": 7e-06\n },\n "clinvar": [\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA.json.gz"},"the full JSON file"),"."),(0,l.kt)("p",null,"Nirvana preserves up to 6 decimal places for allele frequency data."),(0,l.kt)("h3",{id:"categories--descriptions-example"},"Categories & Descriptions Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-1"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Building on the previous example, we can add other types of annotations like predictions and general notes."),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 6"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 7"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,l.kt)("td",{parentName:"tr",align:"left"},"pathogenicity"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleFrequency"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Prediction"),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"number"),(0,l.kt)("td",{parentName:"tr",align:"left"},"string"),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"23603511"),(0,l.kt)("td",{parentName:"tr",align:"left"},"TGA"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006579"),(0,l.kt)("td",{parentName:"tr",align:"left"},"P"),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"68801894"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006569"),(0,l.kt)("td",{parentName:"tr",align:"left"},"LP"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Seen in case 123")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr19"),(0,l.kt)("td",{parentName:"tr",align:"left"},"11107436"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.00003291"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource2.tsv"},"the full TSV file"),"."),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Placeholders")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"You can use a period to denote an empty value (much in the same way as periods are used in VCF files to signify missing values). While\nNirvana also accepts empty columns in the TSV file, we use them in these examples to promote readability."))),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 6")," adds a field called ",(0,l.kt)("inlineCode",{parentName:"li"},"pathogenicity")," which uses the ",(0,l.kt)("inlineCode",{parentName:"li"},"Prediction")," category. When using this category, Nirvana will\nvalidate to make\nsure that the field contains either the abbreviations (B, LB, VUS, LP, and P) or the long-form equivalents (e.g. benign or pathogenic)."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 7")," adds a field called ",(0,l.kt)("inlineCode",{parentName:"li"},"notes")," and it doesn't have a category or description. We're just going to use it to add some internal\nnotes.")),(0,l.kt)("h4",{id:"annotate-with-nirvana-1"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's use a new VCF file. It includes all the same positions as our custom annotation file, but only the middle variant also matches the\nalternate allele (allele-specific match):"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n16 23603511 . TG T . . .\n16 68801894 . G A . . .\n19 11107436 . G C . . .\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA2.vcf"},"the full VCF file"),"."),(0,l.kt)("h4",{id:"investigate-the-results-1"},"Investigate the Results"),(0,l.kt)("p",null,"Because we specified ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=allele")," in our custom annotation file, only the middle variant will get an annotation:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-18}","{12-18}":!0},' "variants": [\n {\n "vid": "16-68801894-G-A",\n "chromosome": "16",\n "begin": 68801894,\n "end": 68801894,\n "refAllele": "G",\n "altAllele": "A",\n "variantType": "SNV",\n "hgvsg": "NC_000016.10:g.68801894G>A",\n "phylopScore": 1,\n "MyDataSource": {\n "refAllele": "G",\n "altAllele": "A",\n "allAf": 7e-06,\n "pathogenicity": "LP",\n "notes": "Seen in case 123"\n },\n "clinvar": [\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA2.json.gz"},"the full JSON file"),"."),(0,l.kt)("h4",{id:"using-positional-matches"},"Using Positional Matches"),(0,l.kt)("p",null,"What would happen if we changed to ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=position"),"? Two things will happen. First, our positional variants will now match:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-17}","{12-17}":!0},' "variants": [\n {\n "vid": "16-23603511-TG-T",\n "chromosome": "16",\n "begin": 23603512,\n "end": 23603512,\n "refAllele": "G",\n "altAllele": "-",\n "variantType": "deletion",\n "hgvsg": "NC_000016.10:g.23603512delG",\n "MyDataSource": [\n {\n "refAllele": "GA",\n "altAllele": "-",\n "allAf": 7e-06,\n "pathogenicity": "P"\n }\n ],\n "clinvar": [\n')),(0,l.kt)("p",null,"In addition, you will now see an extra flag for our allele-specific variant:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-20}","{12-20}":!0},' "variants": [\n {\n "vid": "16-68801894-G-A",\n "chromosome": "16",\n "begin": 68801894,\n "end": 68801894,\n "refAllele": "G",\n "altAllele": "A",\n "variantType": "SNV",\n "hgvsg": "NC_000016.10:g.68801894G>A",\n "phylopScore": 1,\n "MyDataSource": [\n {\n "refAllele": "G",\n "altAllele": "A",\n "allAf": 7e-06,\n "pathogenicity": "LP",\n "notes": "Seen in case 123",\n "isAlleleSpecific": true\n }\n ],\n "clinvar": [\n')),(0,l.kt)("h3",{id:"genomic-region-example"},"Genomic Region Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-2"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"In the previous example, we added a note for the middle variant, but sometimes it's handy to annotate a genomic region. Consider the following example:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"END"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"20000000"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"70000000"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Lots of false positives in this region")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource3.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 5")," now has a field called ",(0,l.kt)("inlineCode",{parentName:"li"},"notes"),". In essence, it looks exactly like column 7 from our previous example."),(0,l.kt)("li",{parentName:"ul"},"The main difference is that now one of our custom annotation entries is actually a genomic region. Any variant that overlaps with that region will get a custom annotation.")),(0,l.kt)("p",null,"In the previous example we learned about positional matching vs allele-specific matching. For genomic regions, ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=allele")," and ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=position")," produce\nthe same result."),(0,l.kt)("h4",{id:"annotate-with-nirvana-2"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's use the same VCF file as our previous example."),(0,l.kt)("h4",{id:"investigate-the-results-2"},"Investigate the Results"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{9-17}","{9-17}":!0},' {\n "chromosome": "16",\n "position": 23603511,\n "refAllele": "TG",\n "altAlleles": [\n "T"\n ],\n "cytogeneticBand": "16p12.2",\n "MyDataSource": [\n {\n "start": 20000000,\n "end": 70000000,\n "notes": "Lots of false positives in this region",\n "reciprocalOverlap": 0,\n "annotationOverlap": 0\n }\n ],\n "variants": [\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA3.json.gz"},"the full JSON file"),"."),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Reciprocal & Annotation Overlap")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"For all intervals, Nirvana internally calculates two overlaps: a ",(0,l.kt)("strong",{parentName:"p"},"variant overlap")," and an ",(0,l.kt)("strong",{parentName:"p"},"annotation overlap"),". Variant overlap is the percentage of the variant's length that is\noverlapped. Annotation overlap is the percentage of the annotation's length that is overlap. "),(0,l.kt)("p",{parentName:"div"},(0,l.kt)("strong",{parentName:"p"},"Reciprocal overlap")," is the minimum of those two overlaps. Given that the annotation is 50 Mbp and the deletion is one 1 bp, both overlaps will be pretty close to 0."))),(0,l.kt)("p",null,"We will also see this annotation for the other variant on chr16:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{9-17}","{9-17}":!0},' {\n "chromosome": "16",\n "position": 68801894,\n "refAllele": "G",\n "altAlleles": [\n "A"\n ],\n "cytogeneticBand": "16q22.1",\n "MyDataSource": [\n {\n "start": 20000000,\n "end": 70000000,\n "notes": "Lots of false positives in this region",\n "reciprocalOverlap": 0,\n "annotationOverlap": 0\n }\n ],\n "variants": [\n')),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Targeting Structural Variants")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Often we use genomic regions to represent other known CNVs and SVs in the genome. In this use case, we usually don't want to match these regions to other small variants. To\nforce Nirvana to match regions only to other SVs, use the ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=sv")," option in the header."))),(0,l.kt)("h3",{id:"mixing-small-variants-and-genomic-regions"},"Mixing Small Variants and Genomic Regions"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-3"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Previously we looked at examples that either had small variants or genomic regions. Let's create a file that contains both:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 6"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"END"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"23603511"),(0,l.kt)("td",{parentName:"tr",align:"left"},"TGA"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"68801894"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr19"),(0,l.kt)("td",{parentName:"tr",align:"left"},"11107436"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr21"),(0,l.kt)("td",{parentName:"tr",align:"left"},"10510818"),(0,l.kt)("td",{parentName:"tr",align:"left"},"C"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"10699435"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Interval #1")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr21"),(0,l.kt)("td",{parentName:"tr",align:"left"},"10510818"),(0,l.kt)("td",{parentName:"tr",align:"left"},"C"),(0,l.kt)("td",{parentName:"tr",align:"left"},"<","DEL",">"),(0,l.kt)("td",{parentName:"tr",align:"left"},"10699435"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Interval #2")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr22"),(0,l.kt)("td",{parentName:"tr",align:"left"},"12370388"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T[chr22:12370729["),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"Known false-positive")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource4.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 4")," now has the ",(0,l.kt)("inlineCode",{parentName:"li"},"REF")," field. Exception for the case listed below, this is only used by small variants or translocation breakends."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 5")," now has the ",(0,l.kt)("inlineCode",{parentName:"li"},"END")," field. This is only used by genomic regions."),(0,l.kt)("li",{parentName:"ul"},"There are two custom annotations on chr21 and the start and end coordinates look the same, so what's different? Interval #2 has ",(0,l.kt)("strong",{parentName:"li"},"a symbolic allele in the ALT column"),". When this is used in custom annotation, the start position is treated as the padding base (using VCF conventions). When Nirvana matches a variant to interval #2, it will ignore the padding base and consider the start position to be at position 10510819.")),(0,l.kt)("h4",{id:"annotate-with-nirvana-3"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's use a new VCF file to study how matching works for intervals #1 and #2:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n21 10510818 . C . . END=10699435;SVTYPE=DUP\n22 12370388 . T T[chr22:12370729[ . . SVTYPE=BND\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA3.vcf"},"the full VCF file"),"."),(0,l.kt)("p",null,'The first variant is similar to the custom annotation labelled "interval #2". Position 10510818 is the padding base, so it effectively starts at position 10510819.'),(0,l.kt)("h4",{id:"investigate-the-results-3"},"Investigate the Results"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{11-26}","{11-26}":!0},' "positions": [\n {\n "chromosome": "21",\n "position": 10510818,\n "svEnd": 10699435,\n "refAllele": "C",\n "altAlleles": [\n ""\n ],\n "cytogeneticBand": "21p11.2",\n "MyDataSource": [\n {\n "start": 10510818,\n "end": 10699435,\n "notes": "Interval #1",\n "reciprocalOverlap": 0.99999,\n "annotationOverlap": 0.99999\n },\n {\n "start": 10510819,\n "end": 10699435,\n "notes": "Interval #2",\n "reciprocalOverlap": 1,\n "annotationOverlap": 1\n }\n ],\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA4.json.gz"},"the full JSON file"),"."),(0,l.kt)("p",null,"As expected, the variant and interval #2 have matching endpoints, therefore there is 100% overlap. Interval #1 technically starts 1 bp earlier, so its overlap 99.9%."),(0,l.kt)("p",null,"Further down the JSON file, we find the annotated translocation breakend:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{11-15}","{11-15}":!0},' "variants": [\n {\n "vid": "22-12370388-T-T[chr22:12370729[",\n "chromosome": "22",\n "begin": 12370388,\n "end": 12370388,\n "isStructuralVariant": true,\n "refAllele": "T",\n "altAllele": "T[chr22:12370729[",\n "variantType": "translocation_breakend",\n "MyDataSource": {\n "refAllele": "T",\n "altAllele": "T[chr22:12370729[",\n "notes": "Known false-positive"\n }\n }\n')),(0,l.kt)("h2",{id:"gene-file-format"},"Gene File Format"),(0,l.kt)("h3",{id:"basic-gene-example"},"Basic Gene Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-4"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Previously we looked at examples that either had small variants or genomic regions, however, sometimes we would like to add custom gene annotations. The gene custom annotation file format\nlooks slightly different:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#geneSymbol"),(0,l.kt)("td",{parentName:"tr",align:"left"},"geneId"),(0,l.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string"),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"TP53"),(0,l.kt)("td",{parentName:"tr",align:"left"},"7157"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Colorectal cancer, hereditary nonpolyposis, type 5"),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"KRAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ENSG00000133703"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mismatch repair cancer syndrome"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Seen in cohort 123")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource5.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 2")," has the ",(0,l.kt)("inlineCode",{parentName:"li"},"geneId")," field. This can be either an ",(0,l.kt)("strong",{parentName:"li"},"Entrez Gene ID")," or an ",(0,l.kt)("strong",{parentName:"li"},"Ensembl ID"),".")),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Gene Symbols")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Gene symbols are always in flux and are being updated on a daily basis at the NCBI and at HGNC. Due to this, Nirvana uses the ",(0,l.kt)("inlineCode",{parentName:"p"},"geneId")," to match genes rather than the gene symbol. However, to\nmake the custom annotation files easier to read, we've included the ",(0,l.kt)("inlineCode",{parentName:"p"},"geneSymbol")," column as well."))),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Unknown Gene IDs")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"When Nirvana parses the gene custom annotation file, it will note any gene IDs that are currently not recognized in Nirvana. In such a case, Nirvana will display an error showing all the\nunrecognized gene IDs."))),(0,l.kt)("h4",{id:"annotate-with-nirvana-4"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's use a VCF file that contain variants in TP53 and KRAS:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n12 25227255 . A T . . .\n17 7675074 . C A . . .\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA4.vcf"},"the full VCF file"),"."),(0,l.kt)("h4",{id:"investigate-the-results-4"},"Investigate the Results"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{24-27}","{24-27}":!0},' "genes": [\n {\n "name": "KRAS",\n "clingenGeneValidity": [\n {\n "diseaseId": "MONDO_0009026",\n "disease": "Costello syndrome",\n "classification": "disputed",\n "classificationDate": "2018-07-24"\n }\n ],\n "clingenDosageSensitivityMap": {\n "haploinsufficiency": "no evidence to suggest that dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "no evidence to suggest that dosage sensitivity is associated with clinical phenotype"\n },\n "gnomAD": {\n "pLi": 0.000788,\n "pRec": 0.789,\n "pNull": 0.21,\n "synZ": 0.336,\n "misZ": 2.32,\n "loeuf": 1.24\n },\n "MyDataSource": {\n "phenotype": "Mismatch repair cancer syndrome",\n "notes": "Seen in cohort 123"\n }\n },\n')),(0,l.kt)("p",null,"This is the abbreviated output for KRAS. Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA5.json.gz"},"the full JSON file")," if you want to see the complete KRAS entry."),(0,l.kt)("h2",{id:"customizing-the-header"},"Customizing the Header"),(0,l.kt)("h3",{id:"title"},"Title"),(0,l.kt)("p",null,"For the title, you can provide any string that hasn't already been used. The title should be unique."),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Make sure that the title does not conflict with other keys in the JSON file."))),(0,l.kt)("p",null,"For small variants, you can't provide a title that conflicts with other keys in the variant object. Some examples of this would be\n",(0,l.kt)("inlineCode",{parentName:"p"},"vid"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"chromosome"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"transcripts"),", etc.. The title should also not conflict with other data source keys like ",(0,l.kt)("inlineCode",{parentName:"p"},"clinvar")," or ",(0,l.kt)("inlineCode",{parentName:"p"},"gnomad"),"."),(0,l.kt)("p",null,"For structural variants, you can't provide a title that conflicts with other keys in the position object. Some examples of this would be\n",(0,l.kt)("inlineCode",{parentName:"p"},"chromosome"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"svLength"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"cytogeneticBand"),", etc. The title should also not conflict with other data source keys like ",(0,l.kt)("inlineCode",{parentName:"p"},"clingen")," or ",(0,l.kt)("inlineCode",{parentName:"p"},"dgv"),"."),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Care should be taken not to annotate using multiple custom annotations that all use the same title."))),(0,l.kt)("h3",{id:"genome-assemblies"},"Genome Assemblies"),(0,l.kt)("p",null,"The following genome assemblies can be specified:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"GRCh37"),(0,l.kt)("li",{parentName:"ul"},"GRCh38")),(0,l.kt)("h3",{id:"matching-criteria"},"Matching Criteria"),(0,l.kt)("p",null,"The matching criteria instructs how Nirvana should match a VCF variant to the custom annotation."),(0,l.kt)("p",null,"The following matching criteria can be specified:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"allele")," - use this when you only want allele-specific matches. This is commonly the case when using allele frequency data sources like ",(0,l.kt)("inlineCode",{parentName:"li"},"gnomAD")),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"position")," - use this when you want positional matches. This is commonly used with disease phenotype data sources like ",(0,l.kt)("inlineCode",{parentName:"li"},"ClinVar")),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"sv")," - use this when you want to match to all other overlapping SVs. This use case arose when we were adding custom annotations for baseline\ncopy number intervals along the genome.")),(0,l.kt)("h3",{id:"categories"},"Categories"),(0,l.kt)("p",null,"Categories are not used by Nirvana, but are often used by downstream tools. Categories provide hints for how those tools should filter or display\nthe annotation data."),(0,l.kt)("p",null,"When a category is specified, Nirvana will provide additional validation for those fields. The following table describes each category:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Category"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Description"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Validation"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleCount"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allele counts for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleNumber"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allele numbers for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleFrequency"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allele frequencies for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Prediction"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ACMG-style pathogenicity classifications"),(0,l.kt)("td",{parentName:"tr",align:"left"},"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"benign")," (B)",(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"likely benign")," (LB)",(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"VUS"),(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"likely pathogenic")," (LP)",(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"pathogenic")," (P)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Filter"),(0,l.kt)("td",{parentName:"tr",align:"left"},"free text that signals downstream tools to add the column to the filter"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Max 20 characters")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Description"),(0,l.kt)("td",{parentName:"tr",align:"left"},"free-text description"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Max 100 characters")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Identifier"),(0,l.kt)("td",{parentName:"tr",align:"left"},"any ID"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Max 50 characters")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"HomozygousCount"),(0,l.kt)("td",{parentName:"tr",align:"left"},"count of homozygous individuals for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Score"),(0,l.kt)("td",{parentName:"tr",align:"left"},"any score value"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Any double-precision floating point number")))),(0,l.kt)("h3",{id:"descriptions"},"Descriptions"),(0,l.kt)("p",null,"Descriptions are used to add more context to the categories. For now, descriptions are mainly used to associate allele counts, numbers, and frequencies with their respective populations."),(0,l.kt)("h4",{id:"populations"},"Populations"),(0,l.kt)("p",null,"The following populations were specified in the HapMap project, 1000 Genomes Project, ExAC, and gnomAD."),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Population Code"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Super-population Code"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Description"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ACB"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"African Caribbeans in Barbados")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"African")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"All populations")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Ad Mixed American")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ASJ"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"},"Ashkenazi Jewish")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ASW"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Americans of African Ancestry in SW USA")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"BEB"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Bengali from Bangladesh")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CDX"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Chinese Dai in Xishuangbanna, China")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CEU"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Utah Residents (CEPH) with Northern and Western European Ancestry")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CHB"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Han Chinese in Beijing, China")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CHS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Southern Han Chinese")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CLM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Colombians from Medellin, Colombia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"East Asian")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ESN"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Esan in Nigeria")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"European")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"FIN"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Finnish in Finland")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"GBR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"British in England and Scotland")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"GIH"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Gujarati Indian from Houston, Texas")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"GWD"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Gambian in Western Divisions in the Gambia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"IBS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Iberian population in Spain")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ITU"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Indian Telugu from the UK")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"JPT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Japanese in Tokyo, Japan")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"KHV"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Kinh in Ho Chi Minh City, Vietnam")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"LWK"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Luhya in Webuye, Kenya")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MAG"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mandinka in the Gambia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MKK"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Maasai in Kinyawa, Kenya")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MSL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mende in Sierra Leone")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MXL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mexican Ancestry from Los Angeles, USA")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"NFE"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"European (Non-Finnish)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"OTH"),(0,l.kt)("td",{parentName:"tr",align:"left"},"OTH"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Other")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"PEL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Peruvians from Lima, Peru")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"PJL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Punjabi from Lahore, Pakistan")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"PUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Puerto Ricans from Puerto Rico")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"South Asian")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"STU"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Sri Lankan Tamil from the UK")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"TSI"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Toscani in Italia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"YRI"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Yoruba in Ibadan, Nigeria")))),(0,l.kt)("h3",{id:"data-types"},"Data Types"),(0,l.kt)("p",null,"Each custom annotation can be one of the following data types:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"bool")," - true or false"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"number")," - any integer or floating-point number"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"string")," - text")),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"For boolean variables, only keys with a ",(0,l.kt)("inlineCode",{parentName:"p"},"true")," value will be output to the JSON object."))),(0,l.kt)("h2",{id:"using-sautils"},"Using SAUtils"),(0,l.kt)("p",null,"Nirvana includes a tool called ",(0,l.kt)("inlineCode",{parentName:"p"},"SAUtils")," that converts various data sources into Nirvana's native binary format. The sub-commands ",(0,l.kt)("inlineCode",{parentName:"p"},"customvar")," and ",(0,l.kt)("inlineCode",{parentName:"p"},"customgene")," are used to specify a variant file or a gene file respectively."),(0,l.kt)("h3",{id:"convert-variant-file"},"Convert Variant File"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/SAUtils.dll customvar \\\n -r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n -i MyDataSource.tsv \\\n -o SupplementaryAnnotation\n")),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-r")," argument specifies the compressed reference path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input TSV path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output directory")),(0,l.kt)("h3",{id:"convert-gene-file"},"Convert Gene File"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/SAUtils.dll customgene \\\n --uga Nirvana_UGA.tsv \\\n -i MyDataSource.tsv \\\n -o SupplementaryAnnotation\n")),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"--uga")," argument specifies the Nirvana universal gene archive (UGA) path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input TSV path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output directory")))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/1be78505.067a63d0.js b/assets/js/1be78505.067a63d0.js new file mode 100644 index 00000000..896a2292 --- /dev/null +++ b/assets/js/1be78505.067a63d0.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9514,4608],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>b});var a=n(7294);function o(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}var i=a.createContext({}),s=function(e){var t=a.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},d=function(e){var t=s(e.components);return a.createElement(i.Provider,{value:t},e.children)},m="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},p=a.forwardRef((function(e,t){var n=e.components,o=e.mdxType,r=e.originalType,i=e.parentName,d=c(e,["components","mdxType","originalType","parentName"]),m=s(n),p=o,b=m["".concat(i,".").concat(p)]||m[p]||u[p]||r;return n?a.createElement(b,l(l({ref:t},d),{},{components:n})):a.createElement(b,l({ref:t},d))}));function b(e,t){var n=arguments,o=t&&t.mdxType;if("string"==typeof e||o){var r=n.length,l=new Array(r);l[0]=p;var c={};for(var i in t)hasOwnProperty.call(t,i)&&(c[i]=t[i]);c.originalType=e,c[m]="string"==typeof e?e:o,l[1]=c;for(var s=2;s{n.r(t),n.d(t,{default:()=>me});var a=n(7294),o=n(3905),r=n(6291),l=n(8882),c=n(6010),i=n(3810),s=n(3783),d=n(5537),m=n(7462);const u=function(e){return a.createElement("svg",(0,m.Z)({width:"20",height:"20","aria-hidden":"true"},e),a.createElement("g",{fill:"#7a7a7a"},a.createElement("path",{d:"M9.992 10.023c0 .2-.062.399-.172.547l-4.996 7.492a.982.982 0 01-.828.454H1c-.55 0-1-.453-1-1 0-.2.059-.403.168-.551l4.629-6.942L.168 3.078A.939.939 0 010 2.528c0-.548.45-.997 1-.997h2.996c.352 0 .649.18.828.45L9.82 9.472c.11.148.172.347.172.55zm0 0"}),a.createElement("path",{d:"M19.98 10.023c0 .2-.058.399-.168.547l-4.996 7.492a.987.987 0 01-.828.454h-3c-.547 0-.996-.453-.996-1 0-.2.059-.403.168-.551l4.625-6.942-4.625-6.945a.939.939 0 01-.168-.55 1 1 0 01.996-.997h3c.348 0 .649.18.828.45l4.996 7.492c.11.148.168.347.168.55zm0 0"})))};var p=n(5999),b=n(9960),h=n(3919),y=n(541);const g={menuLinkText:"menuLinkText_OKON",hasHref:"hasHref_TwRn"};var f=n(2389);const v=(0,a.memo)((e=>{let{items:t,...n}=e;return a.createElement(a.Fragment,null,t.map(((e,t)=>a.createElement(k,(0,m.Z)({key:t,item:e},n)))))}));function k(e){let{item:t,...n}=e;return"category"===t.type?0===t.items.length?null:a.createElement(E,(0,m.Z)({item:t},n)):a.createElement(T,(0,m.Z)({item:t},n))}function E(e){let{item:t,onItemClick:n,activePath:o,level:r,...l}=e;const{items:s,label:d,collapsible:u,className:h,href:y}=t,k=function(e){const t=(0,f.Z)();return(0,a.useMemo)((()=>e.href?e.href:!t&&e.collapsible?(0,i.Wl)(e):void 0),[e,t])}(t),E=(0,i._F)(t,o),{collapsed:T,setCollapsed:N,toggleCollapsed:C}=(0,i.uR)({initialState:()=>!!u&&(!E&&t.collapsed)});return function(e){let{isActive:t,collapsed:n,setCollapsed:o}=e;const r=(0,i.D9)(t);(0,a.useEffect)((()=>{t&&!r&&n&&o(!1)}),[t,r,n,o])}({isActive:E,collapsed:T,setCollapsed:N}),a.createElement("li",{className:(0,c.Z)(i.kM.docs.docSidebarItemCategory,i.kM.docs.docSidebarItemCategoryLevel(r),"menu__list-item",{"menu__list-item--collapsed":T},h)},a.createElement("div",{className:"menu__list-item-collapsible"},a.createElement(b.Z,(0,m.Z)({className:(0,c.Z)("menu__link",{"menu__link--sublist":u&&!y,"menu__link--active":E,[g.menuLinkText]:!u,[g.hasHref]:!!k}),onClick:u?e=>{n?.(t),y?N(!1):(e.preventDefault(),C())}:()=>{n?.(t)},href:u?k??"#":k},l),d),y&&u&&a.createElement("button",{"aria-label":(0,p.I)({id:"theme.DocSidebarItem.toggleCollapsedCategoryAriaLabel",message:"Toggle the collapsible sidebar category '{label}'",description:"The ARIA label to toggle the collapsible sidebar category"},{label:d}),type:"button",className:"clean-btn menu__caret",onClick:e=>{e.preventDefault(),C()}})),a.createElement(i.zF,{lazy:!0,as:"ul",className:"menu__list",collapsed:T},a.createElement(v,{items:s,tabIndex:T?-1:0,onItemClick:n,activePath:o,level:r+1})))}function T(e){let{item:t,onItemClick:n,activePath:o,level:r,...l}=e;const{href:s,label:d,className:u}=t,p=(0,i._F)(t,o);return a.createElement("li",{className:(0,c.Z)(i.kM.docs.docSidebarItemLink,i.kM.docs.docSidebarItemLinkLevel(r),"menu__list-item",u),key:d},a.createElement(b.Z,(0,m.Z)({className:(0,c.Z)("menu__link",{"menu__link--active":p}),"aria-current":p?"page":void 0,to:s},(0,h.Z)(s)&&{onClick:n?()=>n(t):void 0},l),(0,h.Z)(s)?d:a.createElement("span",null,d,a.createElement(y.Z,null))))}const N={sidebar:"sidebar_a3j0",sidebarWithHideableNavbar:"sidebarWithHideableNavbar_VlPv",sidebarHidden:"sidebarHidden_OqfG",sidebarLogo:"sidebarLogo_hmkv",menu:"menu_cyFh",menuWithAnnouncementBar:"menuWithAnnouncementBar_+O1J",collapseSidebarButton:"collapseSidebarButton_eoK2",collapseSidebarButtonIcon:"collapseSidebarButtonIcon_e+kA",sidebarMenuIcon:"sidebarMenuIcon_iZzd",sidebarMenuCloseIcon:"sidebarMenuCloseIcon_6kU2"};function C(e){let{onClick:t}=e;return a.createElement("button",{type:"button",title:(0,p.I)({id:"theme.docs.sidebar.collapseButtonTitle",message:"Collapse sidebar",description:"The title attribute for collapse button of doc sidebar"}),"aria-label":(0,p.I)({id:"theme.docs.sidebar.collapseButtonAriaLabel",message:"Collapse sidebar",description:"The title attribute for collapse button of doc sidebar"}),className:(0,c.Z)("button button--secondary button--outline",N.collapseSidebarButton),onClick:t},a.createElement(u,{className:N.collapseSidebarButtonIcon}))}function _(e){let{path:t,sidebar:n,onCollapse:o,isHidden:r}=e;const l=function(){const{isActive:e}=(0,i.nT)(),[t,n]=(0,a.useState)(e);return(0,i.RF)((t=>{let{scrollY:a}=t;e&&n(0===a)}),[e]),e&&t}(),{navbar:{hideOnScroll:s},hideableSidebar:m}=(0,i.LU)();return a.createElement("div",{className:(0,c.Z)(N.sidebar,{[N.sidebarWithHideableNavbar]:s,[N.sidebarHidden]:r})},s&&a.createElement(d.Z,{tabIndex:-1,className:N.sidebarLogo}),a.createElement("nav",{className:(0,c.Z)("menu thin-scrollbar",N.menu,{[N.menuWithAnnouncementBar]:l})},a.createElement("ul",{className:(0,c.Z)(i.kM.docs.docSidebarMenu,"menu__list")},a.createElement(v,{items:n,activePath:t,level:1}))),m&&a.createElement(C,{onClick:o}))}const Z=e=>{let{toggleSidebar:t,sidebar:n,path:o}=e;return a.createElement("ul",{className:(0,c.Z)(i.kM.docs.docSidebarMenu,"menu__list")},a.createElement(v,{items:n,activePath:o,onItemClick:e=>{"category"===e.type&&e.href&&t(),"link"===e.type&&t()},level:1}))};function S(e){return a.createElement(i.Cv,{component:Z,props:e})}const O=a.memo(_),I=a.memo(S);function w(e){const t=(0,s.Z)(),n="desktop"===t||"ssr"===t,o="mobile"===t;return a.createElement(a.Fragment,null,n&&a.createElement(O,e),o&&a.createElement(I,e))}var P=n(2859);const x={plain:{backgroundColor:"#2a2734",color:"#9a86fd"},styles:[{types:["comment","prolog","doctype","cdata","punctuation"],style:{color:"#6c6783"}},{types:["namespace"],style:{opacity:.7}},{types:["tag","operator","number"],style:{color:"#e09142"}},{types:["property","function"],style:{color:"#9a86fd"}},{types:["tag-id","selector","atrule-id"],style:{color:"#eeebff"}},{types:["attr-name"],style:{color:"#c4b9fe"}},{types:["boolean","string","entity","url","attr-value","keyword","control","directive","unit","statement","regex","atrule","placeholder","variable"],style:{color:"#ffcc99"}},{types:["deleted"],style:{textDecorationLine:"line-through"}},{types:["inserted"],style:{textDecorationLine:"underline"}},{types:["italic"],style:{fontStyle:"italic"}},{types:["important","bold"],style:{fontWeight:"bold"}},{types:["important"],style:{color:"#c4b9fe"}}]};var L={Prism:n(7410).default,theme:x};function B(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function A(){return A=Object.assign||function(e){for(var t=1;t0&&e[n-1]===t?e:e.concat(t)};function F(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&-1===t.indexOf(a)&&(n[a]=e[a]);return n}var R=function(e){function t(){for(var t=this,n=[],a=arguments.length;a--;)n[a]=arguments[a];e.apply(this,n),B(this,"getThemeDict",(function(e){if(void 0!==t.themeDict&&e.theme===t.prevTheme&&e.language===t.prevLanguage)return t.themeDict;t.prevTheme=e.theme,t.prevLanguage=e.language;var n=e.theme?function(e,t){var n=e.plain,a=Object.create(null),o=e.styles.reduce((function(e,n){var a=n.languages,o=n.style;return a&&!a.includes(t)||n.types.forEach((function(t){var n=A({},e[t],o);e[t]=n})),e}),a);return o.root=n,o.plain=A({},n,{backgroundColor:null}),o}(e.theme,e.language):void 0;return t.themeDict=n})),B(this,"getLineProps",(function(e){var n=e.key,a=e.className,o=e.style,r=A({},F(e,["key","className","style","line"]),{className:"token-line",style:void 0,key:void 0}),l=t.getThemeDict(t.props);return void 0!==l&&(r.style=l.plain),void 0!==o&&(r.style=void 0!==r.style?A({},r.style,o):o),void 0!==n&&(r.key=n),a&&(r.className+=" "+a),r})),B(this,"getStyleForToken",(function(e){var n=e.types,a=e.empty,o=n.length,r=t.getThemeDict(t.props);if(void 0!==r){if(1===o&&"plain"===n[0])return a?{display:"inline-block"}:void 0;if(1===o&&!a)return r[n[0]];var l=a?{display:"inline-block"}:{},c=n.map((function(e){return r[e]}));return Object.assign.apply(Object,[l].concat(c))}})),B(this,"getTokenProps",(function(e){var n=e.key,a=e.className,o=e.style,r=e.token,l=A({},F(e,["key","className","style","token"]),{className:"token "+r.types.join(" "),children:r.content,style:t.getStyleForToken(r),key:void 0});return void 0!==o&&(l.style=void 0!==l.style?A({},l.style,o):o),void 0!==n&&(l.key=n),a&&(l.className+=" "+a),l})),B(this,"tokenize",(function(e,t,n,a){var o={code:t,grammar:n,language:a,tokens:[]};e.hooks.run("before-tokenize",o);var r=o.tokens=e.tokenize(o.code,o.grammar,o.language);return e.hooks.run("after-tokenize",o),r}))}return e&&(t.__proto__=e),t.prototype=Object.create(e&&e.prototype),t.prototype.constructor=t,t.prototype.render=function(){var e=this.props,t=e.Prism,n=e.language,a=e.code,o=e.children,r=this.getThemeDict(this.props),l=t.languages[n];return o({tokens:function(e){for(var t=[[]],n=[e],a=[0],o=[e.length],r=0,l=0,c=[],i=[c];l>-1;){for(;(r=a[l]++)0?d:["plain"],s=m):(d=j(d,m.type),m.alias&&(d=j(d,m.alias)),s=m.content),"string"==typeof s){var u=s.split(M),p=u.length;c.push({types:d,content:u[0]});for(var b=1;b{const{prism:e}=(0,i.LU)(),{isDarkTheme:t}=(0,z.Z)(),n=e.theme||W,a=e.darkTheme||n;return t?a:n},V="codeBlockContainer_J+bg",q="codeBlockContent_csEI",Y="codeBlockTitle_oQzk",J="codeBlock_rtdJ",K="copyButton_M3SB",Q="codeBlockLines_1zSZ";function X(e){let{children:t,className:n,metastring:o,title:r}=e;const{prism:l}=(0,i.LU)(),[s,d]=(0,a.useState)(!1),[u,b]=(0,a.useState)(!1);(0,a.useEffect)((()=>{b(!0)}),[]);const h=(0,i.bc)(o)||r,y=U(),g=Array.isArray(t)?t.join(""):t,f=(0,i.Vo)(n)??l.defaultLanguage,{highlightLines:v,code:k}=(0,i.nZ)(g,o,f),E=()=>{!function(e,t){let{target:n=document.body}=void 0===t?{}:t;if("string"!=typeof e)throw new TypeError(`Expected parameter \`text\` to be a \`string\`, got \`${typeof e}\`.`);const a=document.createElement("textarea"),o=document.activeElement;a.value=e,a.setAttribute("readonly",""),a.style.contain="strict",a.style.position="absolute",a.style.left="-9999px",a.style.fontSize="12pt";const r=document.getSelection(),l=r.rangeCount>0&&r.getRangeAt(0);n.append(a),a.select(),a.selectionStart=0,a.selectionEnd=e.length;let c=!1;try{c=document.execCommand("copy")}catch{}a.remove(),l&&(r.removeAllRanges(),r.addRange(l)),o&&o.focus()}(k),d(!0),setTimeout((()=>d(!1)),2e3)};return a.createElement(H,(0,m.Z)({},L,{key:String(u),theme:y,code:k,language:f}),(e=>{let{className:t,style:o,tokens:r,getLineProps:l,getTokenProps:i}=e;return a.createElement("div",{className:(0,c.Z)(V,n)},h&&a.createElement("div",{style:o,className:Y},h),a.createElement("div",{className:(0,c.Z)(q,f)},a.createElement("pre",{tabIndex:0,className:(0,c.Z)(t,J,"thin-scrollbar"),style:o},a.createElement("code",{className:Q},r.map(((e,t)=>{1===e.length&&"\n"===e[0].content&&(e[0].content="");const n=l({line:e,key:t});return v.includes(t)&&(n.className+=" docusaurus-highlight-code-line"),a.createElement("span",(0,m.Z)({key:t},n),e.map(((e,t)=>a.createElement("span",(0,m.Z)({key:t},i({token:e,key:t}))))),a.createElement("br",null))})))),a.createElement("button",{type:"button","aria-label":(0,p.I)({id:"theme.CodeBlock.copyButtonAriaLabel",message:"Copy code to clipboard",description:"The ARIA label for copy code blocks button"}),className:(0,c.Z)(K,"clean-btn"),onClick:E},s?a.createElement(p.Z,{id:"theme.CodeBlock.copied",description:"The copied button label on code blocks"},"Copied"):a.createElement(p.Z,{id:"theme.CodeBlock.copy",description:"The copy button label on code blocks"},"Copy"))))}))}var $=n(9649);const G="details_h+cY";function ee(e){let{...t}=e;return a.createElement(i.PO,(0,m.Z)({},t,{className:(0,c.Z)("alert alert--info",G,t.className)}))}const te={head:e=>{const t=a.Children.map(e.children,(e=>function(e){if(e?.props?.mdxType&&e?.props?.originalType){const{mdxType:t,originalType:n,...o}=e.props;return a.createElement(e.props.originalType,o)}return e}(e)));return a.createElement(P.Z,e,t)},code:e=>{const{children:t}=e;return(0,a.isValidElement)(t)?t:t.includes("\n")?a.createElement(X,e):a.createElement("code",e)},a:e=>a.createElement(b.Z,e),pre:e=>{const{children:t}=e;return(0,a.isValidElement)(t)&&(0,a.isValidElement)(t?.props?.children)?t.props.children:a.createElement(X,(0,a.isValidElement)(t)?t?.props:{...e})},details:e=>{const t=a.Children.toArray(e.children),n=t.find((e=>"summary"===e?.props?.mdxType)),o=a.createElement(a.Fragment,null,t.filter((e=>e!==n)));return a.createElement(ee,(0,m.Z)({},e,{summary:n}),o)},h1:(0,$.Z)("h1"),h2:(0,$.Z)("h2"),h3:(0,$.Z)("h3"),h4:(0,$.Z)("h4"),h5:(0,$.Z)("h5"),h6:(0,$.Z)("h6")};var ne=n(4608);const ae="backToTopButton_i9tI",oe="backToTopButtonShow_wCmF",re=!1;function le(){const e=(0,a.useRef)(null);return{smoothScrollTop:function(){e.current=re?(window.scrollTo({top:0,behavior:"smooth"}),()=>{}):function(){let e=null;return function t(){const n=document.documentElement.scrollTop;n>0&&(e=requestAnimationFrame(t),window.scrollTo(0,Math.floor(.85*n)))}(),()=>e&&cancelAnimationFrame(e)}()},cancelScrollToTop:()=>e.current?.()}}const ce=function(){const[e,t]=(0,a.useState)(!1),n=(0,a.useRef)(!1),{smoothScrollTop:o,cancelScrollToTop:r}=le();return(0,i.RF)(((e,a)=>{let{scrollY:o}=e;const l=a?.scrollY;if(!l)return;if(n.current)return void(n.current=!1);const c=o{e.location.hash&&(n.current=!0,t(!1))})),a.createElement("button",{"aria-label":(0,p.I)({id:"theme.BackToTopButton.buttonAriaLabel",message:"Scroll back to top",description:"The ARIA label for the back to top button"}),className:(0,c.Z)("clean-btn",i.kM.common.backToTopButton,ae,{[oe]:e}),type:"button",onClick:()=>o()})};var ie=n(6550);const se={docPage:"docPage_lDyR",docMainContainer:"docMainContainer_r8cw",docSidebarContainer:"docSidebarContainer_0YBq",docMainContainerEnhanced:"docMainContainerEnhanced_SOUu",docSidebarContainerHidden:"docSidebarContainerHidden_Qlt2",collapsedDocSidebar:"collapsedDocSidebar_zZpm",expandSidebarButtonIcon:"expandSidebarButtonIcon_cxi8",docItemWrapperEnhanced:"docItemWrapperEnhanced_aT5H"};function de(e){let{currentDocRoute:t,versionMetadata:n,children:r,sidebarName:s}=e;const d=(0,i.Vq)(),{pluginId:m,version:b}=n,[h,y]=(0,a.useState)(!1),[g,f]=(0,a.useState)(!1),v=(0,a.useCallback)((()=>{g&&f(!1),y((e=>!e))}),[g]);return a.createElement(l.Z,{wrapperClassName:i.kM.wrapper.docsPages,pageClassName:i.kM.page.docsDocPage,searchMetadata:{version:b,tag:(0,i.os)(m,b)}},a.createElement("div",{className:se.docPage},a.createElement(ce,null),d&&a.createElement("aside",{className:(0,c.Z)(se.docSidebarContainer,{[se.docSidebarContainerHidden]:h}),onTransitionEnd:e=>{e.currentTarget.classList.contains(se.docSidebarContainer)&&h&&f(!0)}},a.createElement(w,{key:s,sidebar:d,path:t.path,onCollapse:v,isHidden:g}),g&&a.createElement("div",{className:se.collapsedDocSidebar,title:(0,p.I)({id:"theme.docs.sidebar.expandButtonTitle",message:"Expand sidebar",description:"The ARIA label and title attribute for expand button of doc sidebar"}),"aria-label":(0,p.I)({id:"theme.docs.sidebar.expandButtonAriaLabel",message:"Expand sidebar",description:"The ARIA label and title attribute for expand button of doc sidebar"}),tabIndex:0,role:"button",onKeyDown:v,onClick:v},a.createElement(u,{className:se.expandSidebarButtonIcon}))),a.createElement("main",{className:(0,c.Z)(se.docMainContainer,{[se.docMainContainerEnhanced]:h||!d})},a.createElement("div",{className:(0,c.Z)("container padding-top--md padding-bottom--lg",se.docItemWrapper,{[se.docItemWrapperEnhanced]:h})},a.createElement(o.Zo,{components:te},r)))))}const me=function(e){const{route:{routes:t},versionMetadata:n,location:o}=e,l=t.find((e=>(0,ie.LX)(o.pathname,e)));if(!l)return a.createElement(ne.default,null);const c=l.sidebar,s=c?n.docsSidebars[c]:null;return a.createElement(a.Fragment,null,a.createElement(P.Z,null,a.createElement("html",{className:n.className})),a.createElement(i.qu,{version:n},a.createElement(i.bT,{sidebar:s},a.createElement(de,{currentDocRoute:l,versionMetadata:n,sidebarName:c},(0,r.Z)(t,{versionMetadata:n})))))}},9649:(e,t,n)=>{n.d(t,{N:()=>d,Z:()=>m});var a=n(7462),o=n(7294),r=n(6010),l=n(5999),c=n(3810);const i="anchorWithStickyNavbar_y2LR",s="anchorWithHideOnScrollNavbar_3ly5",d=e=>{let{...t}=e;return o.createElement("header",null,o.createElement("h1",(0,a.Z)({},t,{id:void 0}),t.children))},m=e=>{return"h1"===e?d:(t=e,e=>{let{id:n,...d}=e;const{navbar:{hideOnScroll:m}}=(0,c.LU)();return n?o.createElement(t,(0,a.Z)({},d,{className:(0,r.Z)("anchor",{[s]:m,[i]:!m}),id:n}),d.children,o.createElement("a",{"aria-hidden":"true",className:"hash-link",href:`#${n}`,title:(0,l.I)({id:"theme.common.headingLinkTitle",message:"Direct link to heading",description:"Title for link to heading"})},"\u200b")):o.createElement(t,d)});var t}},4608:(e,t,n)=>{n.r(t),n.d(t,{default:()=>l});var a=n(7294),o=n(8882),r=n(5999);const l=function(){return a.createElement(o.Z,{title:(0,r.I)({id:"theme.NotFound.title",message:"Page Not Found"})},a.createElement("main",{className:"container margin-vert--xl"},a.createElement("div",{className:"row"},a.createElement("div",{className:"col col--6 col--offset-3"},a.createElement("h1",{className:"hero__title"},a.createElement(r.Z,{id:"theme.NotFound.title",description:"The title of the 404 page"},"Page Not Found")),a.createElement("p",null,a.createElement(r.Z,{id:"theme.NotFound.p1",description:"The first paragraph of the 404 page"},"We could not find what you were looking for.")),a.createElement("p",null,a.createElement(r.Z,{id:"theme.NotFound.p2",description:"The 2nd paragraph of the 404 page"},"Please contact the owner of the site that linked you to the original URL and let them know their link is broken."))))))}}}]); \ No newline at end of file diff --git a/assets/js/1be78505.13aec0d0.js b/assets/js/1be78505.13aec0d0.js deleted file mode 100644 index 7e3e60bc..00000000 --- a/assets/js/1be78505.13aec0d0.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9514,4608],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>b});var a=n(67294);function o(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}var i=a.createContext({}),s=function(e){var t=a.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},d=function(e){var t=s(e.components);return a.createElement(i.Provider,{value:t},e.children)},m="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},p=a.forwardRef((function(e,t){var n=e.components,o=e.mdxType,r=e.originalType,i=e.parentName,d=c(e,["components","mdxType","originalType","parentName"]),m=s(n),p=o,b=m["".concat(i,".").concat(p)]||m[p]||u[p]||r;return n?a.createElement(b,l(l({ref:t},d),{},{components:n})):a.createElement(b,l({ref:t},d))}));function b(e,t){var n=arguments,o=t&&t.mdxType;if("string"==typeof e||o){var r=n.length,l=new Array(r);l[0]=p;var c={};for(var i in t)hasOwnProperty.call(t,i)&&(c[i]=t[i]);c.originalType=e,c[m]="string"==typeof e?e:o,l[1]=c;for(var s=2;s{n.r(t),n.d(t,{default:()=>me});var a=n(67294),o=n(3905),r=n(46291),l=n(18882),c=n(86010),i=n(53810),s=n(93783),d=n(55537),m=n(87462);const u=function(e){return a.createElement("svg",(0,m.Z)({width:"20",height:"20","aria-hidden":"true"},e),a.createElement("g",{fill:"#7a7a7a"},a.createElement("path",{d:"M9.992 10.023c0 .2-.062.399-.172.547l-4.996 7.492a.982.982 0 01-.828.454H1c-.55 0-1-.453-1-1 0-.2.059-.403.168-.551l4.629-6.942L.168 3.078A.939.939 0 010 2.528c0-.548.45-.997 1-.997h2.996c.352 0 .649.18.828.45L9.82 9.472c.11.148.172.347.172.55zm0 0"}),a.createElement("path",{d:"M19.98 10.023c0 .2-.058.399-.168.547l-4.996 7.492a.987.987 0 01-.828.454h-3c-.547 0-.996-.453-.996-1 0-.2.059-.403.168-.551l4.625-6.942-4.625-6.945a.939.939 0 01-.168-.55 1 1 0 01.996-.997h3c.348 0 .649.18.828.45l4.996 7.492c.11.148.168.347.168.55zm0 0"})))};var p=n(95999),b=n(39960),h=n(13919),y=n(90541);const g={menuLinkText:"menuLinkText_OKON",hasHref:"hasHref_TwRn"};var f=n(72389);const v=(0,a.memo)((e=>{let{items:t,...n}=e;return a.createElement(a.Fragment,null,t.map(((e,t)=>a.createElement(k,(0,m.Z)({key:t,item:e},n)))))}));function k(e){let{item:t,...n}=e;return"category"===t.type?0===t.items.length?null:a.createElement(E,(0,m.Z)({item:t},n)):a.createElement(T,(0,m.Z)({item:t},n))}function E(e){let{item:t,onItemClick:n,activePath:o,level:r,...l}=e;const{items:s,label:d,collapsible:u,className:h,href:y}=t,k=function(e){const t=(0,f.Z)();return(0,a.useMemo)((()=>e.href?e.href:!t&&e.collapsible?(0,i.Wl)(e):void 0),[e,t])}(t),E=(0,i._F)(t,o),{collapsed:T,setCollapsed:N,toggleCollapsed:C}=(0,i.uR)({initialState:()=>!!u&&(!E&&t.collapsed)});return function(e){let{isActive:t,collapsed:n,setCollapsed:o}=e;const r=(0,i.D9)(t);(0,a.useEffect)((()=>{t&&!r&&n&&o(!1)}),[t,r,n,o])}({isActive:E,collapsed:T,setCollapsed:N}),a.createElement("li",{className:(0,c.Z)(i.kM.docs.docSidebarItemCategory,i.kM.docs.docSidebarItemCategoryLevel(r),"menu__list-item",{"menu__list-item--collapsed":T},h)},a.createElement("div",{className:"menu__list-item-collapsible"},a.createElement(b.Z,(0,m.Z)({className:(0,c.Z)("menu__link",{"menu__link--sublist":u&&!y,"menu__link--active":E,[g.menuLinkText]:!u,[g.hasHref]:!!k}),onClick:u?e=>{n?.(t),y?N(!1):(e.preventDefault(),C())}:()=>{n?.(t)},href:u?k??"#":k},l),d),y&&u&&a.createElement("button",{"aria-label":(0,p.I)({id:"theme.DocSidebarItem.toggleCollapsedCategoryAriaLabel",message:"Toggle the collapsible sidebar category '{label}'",description:"The ARIA label to toggle the collapsible sidebar category"},{label:d}),type:"button",className:"clean-btn menu__caret",onClick:e=>{e.preventDefault(),C()}})),a.createElement(i.zF,{lazy:!0,as:"ul",className:"menu__list",collapsed:T},a.createElement(v,{items:s,tabIndex:T?-1:0,onItemClick:n,activePath:o,level:r+1})))}function T(e){let{item:t,onItemClick:n,activePath:o,level:r,...l}=e;const{href:s,label:d,className:u}=t,p=(0,i._F)(t,o);return a.createElement("li",{className:(0,c.Z)(i.kM.docs.docSidebarItemLink,i.kM.docs.docSidebarItemLinkLevel(r),"menu__list-item",u),key:d},a.createElement(b.Z,(0,m.Z)({className:(0,c.Z)("menu__link",{"menu__link--active":p}),"aria-current":p?"page":void 0,to:s},(0,h.Z)(s)&&{onClick:n?()=>n(t):void 0},l),(0,h.Z)(s)?d:a.createElement("span",null,d,a.createElement(y.Z,null))))}const N={sidebar:"sidebar_a3j0",sidebarWithHideableNavbar:"sidebarWithHideableNavbar_VlPv",sidebarHidden:"sidebarHidden_OqfG",sidebarLogo:"sidebarLogo_hmkv",menu:"menu_cyFh",menuWithAnnouncementBar:"menuWithAnnouncementBar_+O1J",collapseSidebarButton:"collapseSidebarButton_eoK2",collapseSidebarButtonIcon:"collapseSidebarButtonIcon_e+kA",sidebarMenuIcon:"sidebarMenuIcon_iZzd",sidebarMenuCloseIcon:"sidebarMenuCloseIcon_6kU2"};function C(e){let{onClick:t}=e;return a.createElement("button",{type:"button",title:(0,p.I)({id:"theme.docs.sidebar.collapseButtonTitle",message:"Collapse sidebar",description:"The title attribute for collapse button of doc sidebar"}),"aria-label":(0,p.I)({id:"theme.docs.sidebar.collapseButtonAriaLabel",message:"Collapse sidebar",description:"The title attribute for collapse button of doc sidebar"}),className:(0,c.Z)("button button--secondary button--outline",N.collapseSidebarButton),onClick:t},a.createElement(u,{className:N.collapseSidebarButtonIcon}))}function _(e){let{path:t,sidebar:n,onCollapse:o,isHidden:r}=e;const l=function(){const{isActive:e}=(0,i.nT)(),[t,n]=(0,a.useState)(e);return(0,i.RF)((t=>{let{scrollY:a}=t;e&&n(0===a)}),[e]),e&&t}(),{navbar:{hideOnScroll:s},hideableSidebar:m}=(0,i.LU)();return a.createElement("div",{className:(0,c.Z)(N.sidebar,{[N.sidebarWithHideableNavbar]:s,[N.sidebarHidden]:r})},s&&a.createElement(d.Z,{tabIndex:-1,className:N.sidebarLogo}),a.createElement("nav",{className:(0,c.Z)("menu thin-scrollbar",N.menu,{[N.menuWithAnnouncementBar]:l})},a.createElement("ul",{className:(0,c.Z)(i.kM.docs.docSidebarMenu,"menu__list")},a.createElement(v,{items:n,activePath:t,level:1}))),m&&a.createElement(C,{onClick:o}))}const Z=e=>{let{toggleSidebar:t,sidebar:n,path:o}=e;return a.createElement("ul",{className:(0,c.Z)(i.kM.docs.docSidebarMenu,"menu__list")},a.createElement(v,{items:n,activePath:o,onItemClick:e=>{"category"===e.type&&e.href&&t(),"link"===e.type&&t()},level:1}))};function S(e){return a.createElement(i.Cv,{component:Z,props:e})}const O=a.memo(_),I=a.memo(S);function w(e){const t=(0,s.Z)(),n="desktop"===t||"ssr"===t,o="mobile"===t;return a.createElement(a.Fragment,null,n&&a.createElement(O,e),o&&a.createElement(I,e))}var P=n(12859);const x={plain:{backgroundColor:"#2a2734",color:"#9a86fd"},styles:[{types:["comment","prolog","doctype","cdata","punctuation"],style:{color:"#6c6783"}},{types:["namespace"],style:{opacity:.7}},{types:["tag","operator","number"],style:{color:"#e09142"}},{types:["property","function"],style:{color:"#9a86fd"}},{types:["tag-id","selector","atrule-id"],style:{color:"#eeebff"}},{types:["attr-name"],style:{color:"#c4b9fe"}},{types:["boolean","string","entity","url","attr-value","keyword","control","directive","unit","statement","regex","atrule","placeholder","variable"],style:{color:"#ffcc99"}},{types:["deleted"],style:{textDecorationLine:"line-through"}},{types:["inserted"],style:{textDecorationLine:"underline"}},{types:["italic"],style:{fontStyle:"italic"}},{types:["important","bold"],style:{fontWeight:"bold"}},{types:["important"],style:{color:"#c4b9fe"}}]};var L={Prism:n(87410).default,theme:x};function B(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function A(){return A=Object.assign||function(e){for(var t=1;t0&&e[n-1]===t?e:e.concat(t)};function F(e,t){var n={};for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&-1===t.indexOf(a)&&(n[a]=e[a]);return n}var R=function(e){function t(){for(var t=this,n=[],a=arguments.length;a--;)n[a]=arguments[a];e.apply(this,n),B(this,"getThemeDict",(function(e){if(void 0!==t.themeDict&&e.theme===t.prevTheme&&e.language===t.prevLanguage)return t.themeDict;t.prevTheme=e.theme,t.prevLanguage=e.language;var n=e.theme?function(e,t){var n=e.plain,a=Object.create(null),o=e.styles.reduce((function(e,n){var a=n.languages,o=n.style;return a&&!a.includes(t)||n.types.forEach((function(t){var n=A({},e[t],o);e[t]=n})),e}),a);return o.root=n,o.plain=A({},n,{backgroundColor:null}),o}(e.theme,e.language):void 0;return t.themeDict=n})),B(this,"getLineProps",(function(e){var n=e.key,a=e.className,o=e.style,r=A({},F(e,["key","className","style","line"]),{className:"token-line",style:void 0,key:void 0}),l=t.getThemeDict(t.props);return void 0!==l&&(r.style=l.plain),void 0!==o&&(r.style=void 0!==r.style?A({},r.style,o):o),void 0!==n&&(r.key=n),a&&(r.className+=" "+a),r})),B(this,"getStyleForToken",(function(e){var n=e.types,a=e.empty,o=n.length,r=t.getThemeDict(t.props);if(void 0!==r){if(1===o&&"plain"===n[0])return a?{display:"inline-block"}:void 0;if(1===o&&!a)return r[n[0]];var l=a?{display:"inline-block"}:{},c=n.map((function(e){return r[e]}));return Object.assign.apply(Object,[l].concat(c))}})),B(this,"getTokenProps",(function(e){var n=e.key,a=e.className,o=e.style,r=e.token,l=A({},F(e,["key","className","style","token"]),{className:"token "+r.types.join(" "),children:r.content,style:t.getStyleForToken(r),key:void 0});return void 0!==o&&(l.style=void 0!==l.style?A({},l.style,o):o),void 0!==n&&(l.key=n),a&&(l.className+=" "+a),l})),B(this,"tokenize",(function(e,t,n,a){var o={code:t,grammar:n,language:a,tokens:[]};e.hooks.run("before-tokenize",o);var r=o.tokens=e.tokenize(o.code,o.grammar,o.language);return e.hooks.run("after-tokenize",o),r}))}return e&&(t.__proto__=e),t.prototype=Object.create(e&&e.prototype),t.prototype.constructor=t,t.prototype.render=function(){var e=this.props,t=e.Prism,n=e.language,a=e.code,o=e.children,r=this.getThemeDict(this.props),l=t.languages[n];return o({tokens:function(e){for(var t=[[]],n=[e],a=[0],o=[e.length],r=0,l=0,c=[],i=[c];l>-1;){for(;(r=a[l]++)0?d:["plain"],s=m):(d=j(d,m.type),m.alias&&(d=j(d,m.alias)),s=m.content),"string"==typeof s){var u=s.split(M),p=u.length;c.push({types:d,content:u[0]});for(var b=1;b{const{prism:e}=(0,i.LU)(),{isDarkTheme:t}=(0,z.Z)(),n=e.theme||W,a=e.darkTheme||n;return t?a:n},V="codeBlockContainer_J+bg",q="codeBlockContent_csEI",Y="codeBlockTitle_oQzk",J="codeBlock_rtdJ",K="copyButton_M3SB",Q="codeBlockLines_1zSZ";function X(e){let{children:t,className:n,metastring:o,title:r}=e;const{prism:l}=(0,i.LU)(),[s,d]=(0,a.useState)(!1),[u,b]=(0,a.useState)(!1);(0,a.useEffect)((()=>{b(!0)}),[]);const h=(0,i.bc)(o)||r,y=U(),g=Array.isArray(t)?t.join(""):t,f=(0,i.Vo)(n)??l.defaultLanguage,{highlightLines:v,code:k}=(0,i.nZ)(g,o,f),E=()=>{!function(e,t){let{target:n=document.body}=void 0===t?{}:t;if("string"!=typeof e)throw new TypeError(`Expected parameter \`text\` to be a \`string\`, got \`${typeof e}\`.`);const a=document.createElement("textarea"),o=document.activeElement;a.value=e,a.setAttribute("readonly",""),a.style.contain="strict",a.style.position="absolute",a.style.left="-9999px",a.style.fontSize="12pt";const r=document.getSelection(),l=r.rangeCount>0&&r.getRangeAt(0);n.append(a),a.select(),a.selectionStart=0,a.selectionEnd=e.length;let c=!1;try{c=document.execCommand("copy")}catch{}a.remove(),l&&(r.removeAllRanges(),r.addRange(l)),o&&o.focus()}(k),d(!0),setTimeout((()=>d(!1)),2e3)};return a.createElement(H,(0,m.Z)({},L,{key:String(u),theme:y,code:k,language:f}),(e=>{let{className:t,style:o,tokens:r,getLineProps:l,getTokenProps:i}=e;return a.createElement("div",{className:(0,c.Z)(V,n)},h&&a.createElement("div",{style:o,className:Y},h),a.createElement("div",{className:(0,c.Z)(q,f)},a.createElement("pre",{tabIndex:0,className:(0,c.Z)(t,J,"thin-scrollbar"),style:o},a.createElement("code",{className:Q},r.map(((e,t)=>{1===e.length&&"\n"===e[0].content&&(e[0].content="");const n=l({line:e,key:t});return v.includes(t)&&(n.className+=" docusaurus-highlight-code-line"),a.createElement("span",(0,m.Z)({key:t},n),e.map(((e,t)=>a.createElement("span",(0,m.Z)({key:t},i({token:e,key:t}))))),a.createElement("br",null))})))),a.createElement("button",{type:"button","aria-label":(0,p.I)({id:"theme.CodeBlock.copyButtonAriaLabel",message:"Copy code to clipboard",description:"The ARIA label for copy code blocks button"}),className:(0,c.Z)(K,"clean-btn"),onClick:E},s?a.createElement(p.Z,{id:"theme.CodeBlock.copied",description:"The copied button label on code blocks"},"Copied"):a.createElement(p.Z,{id:"theme.CodeBlock.copy",description:"The copy button label on code blocks"},"Copy"))))}))}var $=n(39649);const G="details_h+cY";function ee(e){let{...t}=e;return a.createElement(i.PO,(0,m.Z)({},t,{className:(0,c.Z)("alert alert--info",G,t.className)}))}const te={head:e=>{const t=a.Children.map(e.children,(e=>function(e){if(e?.props?.mdxType&&e?.props?.originalType){const{mdxType:t,originalType:n,...o}=e.props;return a.createElement(e.props.originalType,o)}return e}(e)));return a.createElement(P.Z,e,t)},code:e=>{const{children:t}=e;return(0,a.isValidElement)(t)?t:t.includes("\n")?a.createElement(X,e):a.createElement("code",e)},a:e=>a.createElement(b.Z,e),pre:e=>{const{children:t}=e;return(0,a.isValidElement)(t)&&(0,a.isValidElement)(t?.props?.children)?t.props.children:a.createElement(X,(0,a.isValidElement)(t)?t?.props:{...e})},details:e=>{const t=a.Children.toArray(e.children),n=t.find((e=>"summary"===e?.props?.mdxType)),o=a.createElement(a.Fragment,null,t.filter((e=>e!==n)));return a.createElement(ee,(0,m.Z)({},e,{summary:n}),o)},h1:(0,$.Z)("h1"),h2:(0,$.Z)("h2"),h3:(0,$.Z)("h3"),h4:(0,$.Z)("h4"),h5:(0,$.Z)("h5"),h6:(0,$.Z)("h6")};var ne=n(24608);const ae="backToTopButton_i9tI",oe="backToTopButtonShow_wCmF",re=!1;function le(){const e=(0,a.useRef)(null);return{smoothScrollTop:function(){e.current=re?(window.scrollTo({top:0,behavior:"smooth"}),()=>{}):function(){let e=null;return function t(){const n=document.documentElement.scrollTop;n>0&&(e=requestAnimationFrame(t),window.scrollTo(0,Math.floor(.85*n)))}(),()=>e&&cancelAnimationFrame(e)}()},cancelScrollToTop:()=>e.current?.()}}const ce=function(){const[e,t]=(0,a.useState)(!1),n=(0,a.useRef)(!1),{smoothScrollTop:o,cancelScrollToTop:r}=le();return(0,i.RF)(((e,a)=>{let{scrollY:o}=e;const l=a?.scrollY;if(!l)return;if(n.current)return void(n.current=!1);const c=o{e.location.hash&&(n.current=!0,t(!1))})),a.createElement("button",{"aria-label":(0,p.I)({id:"theme.BackToTopButton.buttonAriaLabel",message:"Scroll back to top",description:"The ARIA label for the back to top button"}),className:(0,c.Z)("clean-btn",i.kM.common.backToTopButton,ae,{[oe]:e}),type:"button",onClick:()=>o()})};var ie=n(16550);const se={docPage:"docPage_lDyR",docMainContainer:"docMainContainer_r8cw",docSidebarContainer:"docSidebarContainer_0YBq",docMainContainerEnhanced:"docMainContainerEnhanced_SOUu",docSidebarContainerHidden:"docSidebarContainerHidden_Qlt2",collapsedDocSidebar:"collapsedDocSidebar_zZpm",expandSidebarButtonIcon:"expandSidebarButtonIcon_cxi8",docItemWrapperEnhanced:"docItemWrapperEnhanced_aT5H"};function de(e){let{currentDocRoute:t,versionMetadata:n,children:r,sidebarName:s}=e;const d=(0,i.Vq)(),{pluginId:m,version:b}=n,[h,y]=(0,a.useState)(!1),[g,f]=(0,a.useState)(!1),v=(0,a.useCallback)((()=>{g&&f(!1),y((e=>!e))}),[g]);return a.createElement(l.Z,{wrapperClassName:i.kM.wrapper.docsPages,pageClassName:i.kM.page.docsDocPage,searchMetadata:{version:b,tag:(0,i.os)(m,b)}},a.createElement("div",{className:se.docPage},a.createElement(ce,null),d&&a.createElement("aside",{className:(0,c.Z)(se.docSidebarContainer,{[se.docSidebarContainerHidden]:h}),onTransitionEnd:e=>{e.currentTarget.classList.contains(se.docSidebarContainer)&&h&&f(!0)}},a.createElement(w,{key:s,sidebar:d,path:t.path,onCollapse:v,isHidden:g}),g&&a.createElement("div",{className:se.collapsedDocSidebar,title:(0,p.I)({id:"theme.docs.sidebar.expandButtonTitle",message:"Expand sidebar",description:"The ARIA label and title attribute for expand button of doc sidebar"}),"aria-label":(0,p.I)({id:"theme.docs.sidebar.expandButtonAriaLabel",message:"Expand sidebar",description:"The ARIA label and title attribute for expand button of doc sidebar"}),tabIndex:0,role:"button",onKeyDown:v,onClick:v},a.createElement(u,{className:se.expandSidebarButtonIcon}))),a.createElement("main",{className:(0,c.Z)(se.docMainContainer,{[se.docMainContainerEnhanced]:h||!d})},a.createElement("div",{className:(0,c.Z)("container padding-top--md padding-bottom--lg",se.docItemWrapper,{[se.docItemWrapperEnhanced]:h})},a.createElement(o.Zo,{components:te},r)))))}const me=function(e){const{route:{routes:t},versionMetadata:n,location:o}=e,l=t.find((e=>(0,ie.LX)(o.pathname,e)));if(!l)return a.createElement(ne.default,null);const c=l.sidebar,s=c?n.docsSidebars[c]:null;return a.createElement(a.Fragment,null,a.createElement(P.Z,null,a.createElement("html",{className:n.className})),a.createElement(i.qu,{version:n},a.createElement(i.bT,{sidebar:s},a.createElement(de,{currentDocRoute:l,versionMetadata:n,sidebarName:c},(0,r.Z)(t,{versionMetadata:n})))))}},39649:(e,t,n)=>{n.d(t,{N:()=>d,Z:()=>m});var a=n(87462),o=n(67294),r=n(86010),l=n(95999),c=n(53810);const i="anchorWithStickyNavbar_y2LR",s="anchorWithHideOnScrollNavbar_3ly5",d=e=>{let{...t}=e;return o.createElement("header",null,o.createElement("h1",(0,a.Z)({},t,{id:void 0}),t.children))},m=e=>{return"h1"===e?d:(t=e,e=>{let{id:n,...d}=e;const{navbar:{hideOnScroll:m}}=(0,c.LU)();return n?o.createElement(t,(0,a.Z)({},d,{className:(0,r.Z)("anchor",{[s]:m,[i]:!m}),id:n}),d.children,o.createElement("a",{"aria-hidden":"true",className:"hash-link",href:`#${n}`,title:(0,l.I)({id:"theme.common.headingLinkTitle",message:"Direct link to heading",description:"Title for link to heading"})},"\u200b")):o.createElement(t,d)});var t}},24608:(e,t,n)=>{n.r(t),n.d(t,{default:()=>l});var a=n(67294),o=n(18882),r=n(95999);const l=function(){return a.createElement(o.Z,{title:(0,r.I)({id:"theme.NotFound.title",message:"Page Not Found"})},a.createElement("main",{className:"container margin-vert--xl"},a.createElement("div",{className:"row"},a.createElement("div",{className:"col col--6 col--offset-3"},a.createElement("h1",{className:"hero__title"},a.createElement(r.Z,{id:"theme.NotFound.title",description:"The title of the 404 page"},"Page Not Found")),a.createElement("p",null,a.createElement(r.Z,{id:"theme.NotFound.p1",description:"The first paragraph of the 404 page"},"We could not find what you were looking for.")),a.createElement("p",null,a.createElement(r.Z,{id:"theme.NotFound.p2",description:"The 2nd paragraph of the 404 page"},"Please contact the owner of the site that linked you to the original URL and let them know their link is broken."))))))}}}]); \ No newline at end of file diff --git a/assets/js/1c1afca4.271fbd6c.js b/assets/js/1c1afca4.271fbd6c.js deleted file mode 100644 index 103a90f5..00000000 --- a/assets/js/1c1afca4.271fbd6c.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5337],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>g});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var p=a.createContext({}),u=function(t){var e=a.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},m=function(t){var e=u(t.components);return a.createElement(p.Provider,{value:e},t.children)},s="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},c=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),s=u(n),c=r,g=s["".concat(p,".").concat(c)]||s[c]||d[c]||l;return n?a.createElement(g,o(o({ref:e},m),{},{components:n})):a.createElement(g,o({ref:e},m))}));function g(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=c;var i={};for(var p in e)hasOwnProperty.call(e,p)&&(i[p]=e[p]);i.originalType=t,i[s]="string"==typeof t?t:r,o[1]=i;for(var u=2;u{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>s,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/1000Genomes-sv-json",id:"version-3.2.5/data-sources/1000Genomes-sv-json",title:"1000Genomes-sv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.2.5/data-sources/1000Genomes-sv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-sv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/1000Genomes-sv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/data-sources/1000Genomes-sv-json.md",tags:[],version:"3.2.5",frontMatter:{}},p=[],u={toc:p},m="wrapper";function s(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},u,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":[\n {\n "chromosome":"1",\n "begin":1595369,\n "end":1612441,\n "variantType": "copy_number_variation",\n "id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",\n "allAn": 5008,\n "allAc": 2702,\n "allAf": 0.539537,\n "afrAf": 0.6052,\n "amrAf": 0.3675,\n "eurAf": 0.5357,\n "easAf": 0.5368,\n "sasAf": 0.5797,\n "reciprocalOverlap": 0.07555\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"range: 0 - 1.")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/1c8fb1a2.e8a6f5df.js b/assets/js/1c8fb1a2.e8a6f5df.js deleted file mode 100644 index 089b0b01..00000000 --- a/assets/js/1c8fb1a2.e8a6f5df.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6433,6974,7138],{3905:(e,t,a)=>{a.d(t,{Zo:()=>d,kt:()=>h});var n=a(67294);function i(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function r(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function o(e){for(var t=1;t=0||(i[a]=e[a]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(i[a]=e[a])}return i}var s=n.createContext({}),m=function(e){var t=n.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):o(o({},t),e)),a},d=function(e){var t=m(e.components);return n.createElement(s.Provider,{value:t},e.children)},p="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},u=n.forwardRef((function(e,t){var a=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,d=l(e,["components","mdxType","originalType","parentName"]),p=m(a),u=i,h=p["".concat(s,".").concat(u)]||p[u]||c[u]||r;return a?n.createElement(h,o(o({ref:t},d),{},{components:a})):n.createElement(h,o({ref:t},d))}));function h(e,t){var a=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=a.length,o=new Array(r);o[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[p]="string"==typeof e?e:i,o[1]=l;for(var m=2;m{a.r(t),a.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var n=a(87462),i=(a(67294),a(3905));const r={},o=void 0,l={unversionedId:"data-sources/mitomap-small-variants-json",id:"version-3.16/data-sources/mitomap-small-variants-json",title:"mitomap-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/mitomap-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/mitomap-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/mitomap-small-variants-json.md",tags:[],version:"3.16",frontMatter:{}},s=[],m={toc:s},d="wrapper";function p(e){let{components:t,...a}=e;return(0,i.kt)(d,(0,n.Z)({},m,a,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "refAllele":"G",\n "altAllele":"A",\n "diseases":[ \n "Bipolar disorder",\n "Melanoma"\n ],\n "hasHomoplasmy":false,\n "hasHeteroplasmy":true,\n "status":"Reported",\n "clinicalSignificance":"confirmed pathogenic",\n "scorePercentile":83.30,\n "numGenBankFullLengthSeqs":2,\n "pubMedIds":["2316527","6299878","6301949"],\n "isAlleleSpecific":true\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"diseases"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"associated diseases")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hasHomoplasmy"),(0,i.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hasHeteroplasmy"),(0,i.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"status"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"record status")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"clinicalSignificance"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"predicted pathogenicity")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float"),(0,i.kt)("td",{parentName:"tr",align:"left"},"MitoTIP score")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numGenBankFullLengthSeqs"),(0,i.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,i.kt)("td",{parentName:"tr",align:"left"},"# of GenBank full-length sequences")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,i.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,i.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the MITOMAP alternate allele")))))}p.isMDXComponent=!0},96502:(e,t,a)=>{a.r(t),a.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var n=a(87462),i=(a(67294),a(3905));const r={},o=void 0,l={unversionedId:"data-sources/mitomap-structural-variants-json",id:"version-3.16/data-sources/mitomap-structural-variants-json",title:"mitomap-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/mitomap-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/mitomap-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/mitomap-structural-variants-json.md",tags:[],version:"3.16",frontMatter:{}},s=[],m={toc:s},d="wrapper";function p(e){let{components:t,...a}=e;return(0,i.kt)(d,(0,n.Z)({},m,a,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "chromosome":"MT",\n "begin":3166,\n "end":14152,\n "variantType":"deletion",\n "reciprocalOverlap":0.18068,\n "annotationOverlap":0.42405\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,i.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"end"),(0,i.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"annotationOverlap"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")))))}p.isMDXComponent=!0},46860:(e,t,a)=>{a.r(t),a.d(t,{contentTitle:()=>s,default:()=>u,frontMatter:()=>l,metadata:()=>m,toc:()=>d});var n=a(87462),i=(a(67294),a(3905)),r=a(35270),o=a(96502);const l={title:"MITOMAP"},s=void 0,m={unversionedId:"data-sources/mitomap",id:"version-3.16/data-sources/mitomap",title:"MITOMAP",description:"Overview",source:"@site/versioned_docs/version-3.16/data-sources/mitomap.mdx",sourceDirName:"data-sources",slug:"/data-sources/mitomap",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/mitomap",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/mitomap.mdx",tags:[],version:"3.16",frontMatter:{title:"MITOMAP"},sidebar:"version-3.16/docs",previous:{title:"Mitochondrial Heteroplasmy",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/mito-heteroplasmy"},next:{title:"OMIM",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/omim"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"Scraping HTML Pages",id:"scraping-html-pages",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Allele Parsing",id:"allele-parsing",children:[],level:4}],level:3}],level:2},{value:"PostgreSQL Dump File",id:"postgresql-dump-file",children:[{value:"Example",id:"example-1",children:[],level:3},{value:"Parsing",id:"parsing-1",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URLs",id:"download-urls",children:[],level:2},{value:"JSON Output",id:"json-output",children:[{value:"Small Variants",id:"small-variants",children:[],level:3},{value:"Structural Variants",id:"structural-variants",children:[],level:3}],level:2}],p={toc:d},c="wrapper";function u(e){let{components:t,...l}=e;return(0,i.kt)(c,(0,n.Z)({},p,l,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"MITOMAP provides a compendium of polymorphisms and mutations in human mitochondrial DNA."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Lott, M.T., Leipzig, J.N., Derbeneva, O., Xie, H.M., Chalkia, D., Sarmady, M., Procaccio, V., and Wallace, D.C. mtDNA variation and analysis using MITOMAP and MITOMASTER. ",(0,i.kt)("em",{parentName:"p"},"Current Protocols in Bioinformatics")," 1(123):1.23.1-26 (2013). ",(0,i.kt)("a",{parentName:"p",href:"http://www.mitomap.org"},"http://www.mitomap.org")))),(0,i.kt)("h2",{id:"scraping-html-pages"},"Scraping HTML Pages"),(0,i.kt)("h3",{id:"example"},"Example"),(0,i.kt)("p",null,"MITOMAP is unique in that it doesn't offer the data in a downloadable format. As a result, the annotation content in Nirvana is scraped from the following MITOMAP pages:"),(0,i.kt)("ol",null,(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/PolymorphismsControl"},"mtDNA Control Region Sequence Variants")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/PolymorphismsCoding"},"mtDNA Coding Region & RNA Sequence Variants")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/MutationsRNA"},"Reported Mitochondrial DNA Base Substitution Diseases: rRNA/tRNA mutations")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/MutationsCodingControl"},"Reported Mitochondrial DNA Base Substitution Diseases: Coding and Control Region Point Mutations")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/DeletionsSingle"},"Reported mtDNA Deletions")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/InsertionsSimple"},"mtDNA Simple Insertions"))),(0,i.kt)("p",null,(0,i.kt)("img",{src:a(85058).Z})),(0,i.kt)("h3",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"Here's what the HTML code looks like:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-html"},"[\"582\",\"MT-TF\",\"Mitochondrial myopathy\",\"T582C\",\"tRNA Phe\",\"-\",\"+\",\"Reported\",\"72.90% \",\"0\",\"2\"],\n[\"583\",\"MT-TF\",\"MELAS / MM & EXIT\",\"G583A\",\"tRNA Phe\",\"-\",\"+\",\"Cfrm\",\"93.10% \",\"0\",\"3\"],\n")),(0,i.kt)("p",null,"We're mainly interested in the following columns (numbers indicate the HTML page above):"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"Position",(0,i.kt)("sup",null,"1,2,3,4")),(0,i.kt)("li",{parentName:"ul"},"Disease",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Nucleotide Change",(0,i.kt)("sup",null,"1,2")),(0,i.kt)("li",{parentName:"ul"},"Allele",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Homoplasmy",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Heteroplasmy",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Status",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"MitoTIP",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"GB Seqs FL(CR)",(0,i.kt)("sup",null,"1,2,3,4")),(0,i.kt)("li",{parentName:"ul"},"Deletion Junction",(0,i.kt)("sup",null,"5")),(0,i.kt)("li",{parentName:"ul"},"Insert (nt)",(0,i.kt)("sup",null,"6")),(0,i.kt)("li",{parentName:"ul"},"Insert Point (nt)",(0,i.kt)("sup",null,"6")),(0,i.kt)("li",{parentName:"ul"},"References/Curated References",(0,i.kt)("sup",null,"1,2,3,4"))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"MitoTIP")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The MitoTIP information is used to populate the ",(0,i.kt)("inlineCode",{parentName:"p"},"clinicalSignificance")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"scorePercentile"),' JSON keys. The "frequency alert" entries are skipped since it\'s not directly relevant to clinical significance.'))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Left alignment")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Many of the variants in MITOMAP have not been normalized. As part of our import procedure, we left align all insertions and deletions."))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Variant Enumeration")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Sometimes MITOMAP provides data that indicates that multiple values have been observed. Some examples of this are ",(0,i.kt)("inlineCode",{parentName:"p"},"C-C(2-8)")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"A-AC or ACC"),". Alternate alleles containing IUPAC ambiguity codes are similarly enumerated."))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Inversions")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"MITOMAP inversions are currently treated as MNVs."))),(0,i.kt)("h4",{id:"allele-parsing"},"Allele Parsing"),(0,i.kt)("p",null,"The following MITOMAP allele parsing conventions are supported:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"C123T"),(0,i.kt)("li",{parentName:"ul"},"16021_16022del"),(0,i.kt)("li",{parentName:"ul"},"8042del2"),(0,i.kt)("li",{parentName:"ul"},"C9537insC"),(0,i.kt)("li",{parentName:"ul"},"3902_3908invACCTTGC"),(0,i.kt)("li",{parentName:"ul"},"A-AC or ACC"),(0,i.kt)("li",{parentName:"ul"},"C-C(2-8)"),(0,i.kt)("li",{parentName:"ul"},"8042delAT")),(0,i.kt)("h2",{id:"postgresql-dump-file"},"PostgreSQL Dump File"),(0,i.kt)("h3",{id:"example-1"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"COPY mitomap.reference (id, authors, title, publication, editors, volume, number, pages, date, city, publisher, keywords, abstract, nlmid) FROM stdin;\n1 Albring, M., Griffith, J. and Attardi, G. Association of a protein structure of probable membrane derivation with HeLa cell mitochondrial DNA near its origin of replication Proceedings of the National Academy of Sciences of the United States of America . 74 4 1348-1352 1977 . . Deoxyribonucleoproteins; DNA Replication; DNA, Mitochondrial; Hela Cells; Membrane Proteins; Microscopy, Electron; Molecular Weight; Neoplasm Proteins; Protein Binding Almost all (about 95 percent) of the mitochondrial DNA molecules released by Triton X-100 lysis of HeLa cell mitochondria in the presence of 0.15 M salt are associated with a single protein-containing structure varying in appearance between a 10-20 nm knob and a 100-500 nm membrane-like patch. Analysis by high resolution electron microscopy and by polyacrylamide gel electrophoresis after cleavage of mitochondrial DNA with the endonucleases EcoRI, HindIII, and Hpa II has shown that the protein structure is attached to the DNA in the region of the D-loop, and probably near the origin of mitochondrial DNA replication. The data strongly suggest that HeLa cell mitochondrial DNA is attached in vivo to the inner mitochondrial membrane at or near the origin of replication, and that a membrane fragment of variable size remains associated with the DNA during the isolation. After sodium dodecyl sulfate extraction of mitochondrial DNA, a small 5-10 nm protein is found at the same site on a fraction of the mitochondrial DNA molecules. 266177\n2 Anderson, S., Bankier, A.T., Barrell, B.G., de Bruijn, M.H., Coulson, A.R., Drouin, J., Eperon, I.C., Nierlich, D.P., Roe, B.A., Sanger, F., Schreier, P.H., Smith, A.J., Staden, R., Young, I.G. Sequence and organization of the human mitochondrial genome Nature . 290 5806 457-465 1981 . . Base Sequence; Codon; DNA Replication; mtDNA; Evolution; Genes, Structural; Human; Nucleic Acid Precursors; Peptide Chain Initiation; Peptide Chain Termination; RNA, Ribosomal; RNA, Transfer; Transcription, Genetic The complete sequence of the 16,569-base pair human mitochondrial genome is presented. The genes for the 12S and 16S rRNAs, 22 tRNAs, cytochrome c oxidase subunits I, II and III, ATPase subunit 6, cytochrome b and eight other predicted protein coding genes have been located. The sequence shows extreme economy in that the genes have none or only a few noncoding bases between them, and in many cases the termination codons are not coded in the DNA but are created post- transcriptionally by polyadenylation of the mRNAs. 7219534\n")),(0,i.kt)("h3",{id:"parsing-1"},"Parsing"),(0,i.kt)("p",null,"From the PostgreSQL dump file, we're interested in parsing the mapping between reference IDs and the PubMed IDs:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"id"),(0,i.kt)("li",{parentName:"ul"},"nlmid")),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Why not use the PostgreSQL file for everything?")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Ideally we would use this file for parsing all of our data, but the schema contains 80+ tables and we haven't invested the time yet to see how the tables are linked together to produce the 6 main HTML pages that we're interested in."))),(0,i.kt)("h2",{id:"known-issues"},"Known Issues"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Duplicated records")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Multiple records describing the same nucleotide change are merged into the same record. If any conflicting information is found (homoplasmy, heteroplasmy, status, clinical significance, score percentile, end coordinate, variant type), an exception is thrown."),(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"For diseases and PubMed IDs, we take the union of the values in the duplicated records."),(0,i.kt)("li",{parentName:"ul"},"For full length GenBank sequences, we take the largest number from each of the duplicated records since it provides the strongest evidence for this variant.")))),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Skipped records")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Records that represent an alternate notation of the original variant are skipped. Similarly some variants with confusing alleles (T961delT+ / -C(n)ins) are also skipped."))),(0,i.kt)("h2",{id:"download-urls"},"Download URLs"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"see ",(0,i.kt)("a",{parentName:"li",href:"#example"},"HTML Pages")," above"),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/downloads/mitomap.dump.sql.gz"},"PostgreSQL dump file"))),(0,i.kt)("h2",{id:"json-output"},"JSON Output"),(0,i.kt)("h3",{id:"small-variants"},"Small Variants"),(0,i.kt)(r.default,{mdxType:"SmallJSON"}),(0,i.kt)("h3",{id:"structural-variants"},"Structural Variants"),(0,i.kt)(o.default,{mdxType:"SVJSON"}))}u.isMDXComponent=!0},85058:(e,t,a)=>{a.d(t,{Z:()=>n});const n=a.p+"assets/images/MITOMAP-d8d4dd35c2336fdba5fcced77ec438e6.png"}}]); \ No newline at end of file diff --git a/assets/js/1f54683e.416ee99f.js b/assets/js/1f54683e.416ee99f.js deleted file mode 100644 index 240989b1..00000000 --- a/assets/js/1f54683e.416ee99f.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[753,3499],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>v});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),p=d(n),u=r,v=p["".concat(s,".").concat(u)]||p[u]||m[u]||o;return n?a.createElement(v,i(i({ref:t},c),{},{components:n})):a.createElement(v,i({ref:t},c))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[p]="string"==typeof e?e:r,i[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>p,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/topmed-json",id:"version-3.18/data-sources/topmed-json",title:"topmed-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/topmed-json.md",sourceDirName:"data-sources",slug:"/data-sources/topmed-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/topmed-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/topmed-json.md",tags:[],version:"3.18",frontMatter:{}},s=[],d={toc:s},c="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"topmed":{ \n "allAc":20,\n "allAn":125568,\n "allAf":0.000159,\n "allHc":0,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele number. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele frequency (computed by Nirvana)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed homozygous count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}p.isMDXComponent=!0},33276:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>i,metadata:()=>s,toc:()=>d});var a=n(87462),r=(n(67294),n(3905)),o=n(89201);const i={title:"TOPMed"},l=void 0,s={unversionedId:"data-sources/topmed",id:"version-3.18/data-sources/topmed",title:"TOPMed",description:"Overview",source:"@site/versioned_docs/version-3.18/data-sources/topmed.mdx",sourceDirName:"data-sources",slug:"/data-sources/topmed",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/topmed",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/topmed.mdx",tags:[],version:"3.18",frontMatter:{title:"TOPMed"},sidebar:"docs",previous:{title:"Splice AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/splice-ai"},next:{title:"Nirvana JSON File Format",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/file-formats/nirvana-json-file-format"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"VCF extraction",id:"vcf-extraction",children:[],level:2},{value:"GRCh37 liftover",id:"grch37-liftover",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON output",id:"json-output",children:[],level:2}],c={toc:d},p="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"The ",(0,r.kt)("a",{parentName:"p",href:"https://www.nhlbi.nih.gov/science/trans-omics-precision-medicine-topmed-program"},"Trans-Omics for Precision Medicine")," (TOPMed) program, sponsored by the National Institutes of Health (NIH) National Heart, Lung and Blood Institute (NHLBI), is part of a broader Precision Medicine Initiative, which aims to provide disease treatments tailored to an individual\u2019s unique genes and environment. TOPMed contributes to this Initiative through the integration of whole-genome sequencing (WGS) and other omics (e.g., metabolic profiles, epigenomics, protein and RNA expression patterns) data with molecular, behavioral, imaging, environmental, and clinical data."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Kowalski, M.H., Qian, H., Hou, Z., Rosen, J.D., Tapia, A.L., Shan, Y., Jain, D., Argos, M., Arnett, D.K., Avery, C. and Barnes, K.C., 2019. Use of> 100,000 NHLBI Trans-Omics for Precision Medicine (TOPMed) Consortium whole genome sequences improves imputation quality and detection of rare variant associations in admixed African and Hispanic/Latino populations. ",(0,r.kt)("em",{parentName:"p"},"PLoS genetics"),", ",(0,r.kt)("strong",{parentName:"p"},"15(12)"),", p.e1008500."))),(0,r.kt)("h2",{id:"vcf-extraction"},"VCF extraction"),(0,r.kt)("p",null,"We currently extract the following fields from TOPMed VCF file:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},'##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n')),(0,r.kt)("p",null,"Example:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 10132 TOPMed_freeze_5?chr1:10,132 T C 255 SVM VRT=1;NS=62784;AN=125568;AC=32;AF=0.000254842;Het=32;Hom=0 NA:FRQ 125568:0.000254842\n")),(0,r.kt)("h2",{id:"grch37-liftover"},"GRCh37 liftover"),(0,r.kt)("p",null,"The data is not available for GRCh37 on TOPMed website. We performed a liftover from GRCh38 to GRCh37 using dbSNP ids."),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://bravo.sph.umich.edu/freeze5/hg38/download"},"https://bravo.sph.umich.edu/freeze5/hg38/download")),(0,r.kt)("h2",{id:"json-output"},"JSON output"),(0,r.kt)(o.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/1f664332.a707c097.js b/assets/js/1f664332.a707c097.js deleted file mode 100644 index 36a598f1..00000000 --- a/assets/js/1f664332.a707c097.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4680],{3905:(t,e,r)=>{r.d(e,{Zo:()=>m,kt:()=>f});var n=r(67294);function a(t,e,r){return e in t?Object.defineProperty(t,e,{value:r,enumerable:!0,configurable:!0,writable:!0}):t[e]=r,t}function o(t,e){var r=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),r.push.apply(r,n)}return r}function i(t){for(var e=1;e=0||(a[r]=t[r]);return a}(t,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,r)&&(a[r]=t[r])}return a}var p=n.createContext({}),c=function(t){var e=n.useContext(p),r=e;return t&&(r="function"==typeof t?t(e):i(i({},e),t)),r},m=function(t){var e=c(t.components);return n.createElement(p.Provider,{value:e},t.children)},s="mdxType",u={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},d=n.forwardRef((function(t,e){var r=t.components,a=t.mdxType,o=t.originalType,p=t.parentName,m=l(t,["components","mdxType","originalType","parentName"]),s=c(r),d=a,f=s["".concat(p,".").concat(d)]||s[d]||u[d]||o;return r?n.createElement(f,i(i({ref:e},m),{},{components:r})):n.createElement(f,i({ref:e},m))}));function f(t,e){var r=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var o=r.length,i=new Array(o);i[0]=d;var l={};for(var p in e)hasOwnProperty.call(e,p)&&(l[p]=e[p]);l.originalType=t,l[s]="string"==typeof t?t:a,i[1]=l;for(var c=2;c{r.r(e),r.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>o,metadata:()=>l,toc:()=>p});var n=r(87462),a=(r(67294),r(3905));const o={},i=void 0,l={unversionedId:"data-sources/mitomap-structural-variants-json",id:"version-3.18/data-sources/mitomap-structural-variants-json",title:"mitomap-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/mitomap-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/mitomap-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/mitomap-structural-variants-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],c={toc:p},m="wrapper";function s(t){let{components:e,...r}=t;return(0,a.kt)(m,(0,n.Z)({},c,r,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "chromosome":"MT",\n "begin":3166,\n "end":14152,\n "variantType":"deletion",\n "reciprocalOverlap":0.18068,\n "annotationOverlap":0.42405\n }\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,a.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"end"),(0,a.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"reciprocalOverlap"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"annotationOverlap"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/1fda6d09.cb728c37.js b/assets/js/1fda6d09.cb728c37.js deleted file mode 100644 index e116d893..00000000 --- a/assets/js/1fda6d09.cb728c37.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7821],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>v});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},p=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,l=e.originalType,s=e.parentName,p=o(e,["components","mdxType","originalType","parentName"]),m=c(n),u=r,v=m["".concat(s,".").concat(u)]||m[u]||d[u]||l;return n?a.createElement(v,i(i({ref:t},p),{},{components:n})):a.createElement(v,i({ref:t},p))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=n.length,i=new Array(l);i[0]=u;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[m]="string"==typeof e?e:r,i[1]=o;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const l={title:"Variant IDs"},i=void 0,o={unversionedId:"core-functionality/variant-ids",id:"version-3.21/core-functionality/variant-ids",title:"Variant IDs",description:"Overview",source:"@site/versioned_docs/version-3.21/core-functionality/variant-ids.md",sourceDirName:"core-functionality",slug:"/core-functionality/variant-ids",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/core-functionality/variant-ids",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/core-functionality/variant-ids.md",tags:[],version:"3.21",frontMatter:{title:"Variant IDs"},sidebar:"docs",previous:{title:"MNV Recomposition",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/core-functionality/mnv-recomposition"},next:{title:"Jasix",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/utilities/jasix"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF Examples",id:"vcf-examples",children:[],level:3},{value:"Format",id:"format",children:[],level:3},{value:"VID Examples",id:"vid-examples",children:[],level:3}],level:2},{value:"Translocation Breakends",id:"translocation-breakends",children:[{value:"VCF Example",id:"vcf-example",children:[],level:3},{value:"Format",id:"format-1",children:[],level:3},{value:"VID Example",id:"vid-example",children:[],level:3}],level:2},{value:"All Other Structural Variants",id:"all-other-structural-variants",children:[{value:"VCF Examples",id:"vcf-examples-1",children:[],level:3},{value:"Format",id:"format-2",children:[],level:3},{value:"VID Examples",id:"vid-examples-1",children:[],level:3}],level:2}],c={toc:s},p="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"Many downstream tools use a variant identifier to store annotation results. We've standardized on using variant identifiers (VIDs) that originated from the notation used by the Broad Institute."),(0,r.kt)("p",null,"The Broad VID scheme is not only simple, but it has the advantage that a user could create a bare bones VCF entry from the information captured in the identifier. One of the limitations of the Broad VID scheme is that it does not define how to handle structural variants. Our VID scheme attempts to fill that gap."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Conventions")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("ul",{parentName:"div"},(0,r.kt)("li",{parentName:"ul"},"all chromosomes use Ensembl style notation (i.e. 22 instead of chr22)"),(0,r.kt)("li",{parentName:"ul"},"for a reference variant (i.e. no alt allele), replace the period (.) with the reference base"),(0,r.kt)("li",{parentName:"ul"},"padding bases are used, neither the reference nor alternate allele can be empty"),(0,r.kt)("li",{parentName:"ul"},"some large variant callers lazily output N for the reference allele. If this is the case, replace it with the true reference base")))),(0,r.kt)("h2",{id:"small-variants"},"Small Variants"),(0,r.kt)("h3",{id:"vcf-examples"},"VCF Examples"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 66507 . T A 184.45 PASS .\nchr1 66521 . T TATATA 144.53 PASS .\nchr1 66572 . GTA G,GTACTATATATTATA 45.45 PASS .\n")),(0,r.kt)("h3",{id:"format"},"Format"),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},"chromosome"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"reference allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"alternate allele")),(0,r.kt)("h3",{id:"vid-examples"},"VID Examples"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"1-66507-T-A"),(0,r.kt)("li",{parentName:"ul"},"1-66521-T-TATATA"),(0,r.kt)("li",{parentName:"ul"},"1-66572-GTA-G"),(0,r.kt)("li",{parentName:"ul"},"1-66572-G-GTACTATATATTA")),(0,r.kt)("h2",{id:"translocation-breakends"},"Translocation Breakends"),(0,r.kt)("h3",{id:"vcf-example"},"VCF Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 2617277 . A AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[ . PASS SVTYPE=BND\n")),(0,r.kt)("h3",{id:"format-1"},"Format"),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},"chromosome"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"reference allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"alternate allele")),(0,r.kt)("h3",{id:"vid-example"},"VID Example"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"1-2617277-A-AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[")),(0,r.kt)("h2",{id:"all-other-structural-variants"},"All Other Structural Variants"),(0,r.kt)("h3",{id:"vcf-examples-1"},"VCF Examples"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 1000 . G . PASS END=3001000;SVTYPE=ROH\nchr1 1350082 . G . PASS END=1351320;SVTYPE=DEL\nchr1 1477854 . C . PASS END=1477984;SVTYPE=DUP\nchr1 1477968 . T . PASS END=1477968;SVTYPE=INS\nchr1 1715898 . N . PASS SVTYPE=CNV;END=1750149\nchr1 2650426 . N . PASS SVTYPE=CNV;END=2653074\nchr2 321682 . T . PASS SVTYPE=INV;END=421681\nchr20 2633403 . G . PASS END=2633421\n")),(0,r.kt)("h3",{id:"format-2"},"Format"),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},"chromosome"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"end position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"reference allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"alternate allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"SVTYPE")),(0,r.kt)("h3",{id:"vid-examples-1"},"VID Examples"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"1-1000-3001000-G-","<","ROH",">","-ROH"),(0,r.kt)("li",{parentName:"ul"},"1-1350082-1351320-G-","<","DEL",">","-DEL"),(0,r.kt)("li",{parentName:"ul"},"1-1477854-1477984-C-","<","DUP:TANDEM",">","-DUP"),(0,r.kt)("li",{parentName:"ul"},"1-1477968-1477968-T-","<","INS",">","-INS"),(0,r.kt)("li",{parentName:"ul"},"1-1715898-1750149-A-","<","DUP",">","-CNV ",(0,r.kt)("strong",{parentName:"li"},"(replace the N with A)")),(0,r.kt)("li",{parentName:"ul"},"1-2650426-2653074-N-","<","DEL",">","-CNV ",(0,r.kt)("strong",{parentName:"li"},"(keep the N)")),(0,r.kt)("li",{parentName:"ul"},"2-321682-421681-T-","<","INV",">","-INV"),(0,r.kt)("li",{parentName:"ul"},"20-2633403-2633421-G-","<","STR2",">","-STR")))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/214b07a5.67265c2d.js b/assets/js/214b07a5.67265c2d.js deleted file mode 100644 index da4bf1e4..00000000 --- a/assets/js/214b07a5.67265c2d.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7339],{3905:(M,L,t)=>{t.d(L,{Zo:()=>o,kt:()=>C});var i=t(67294);function e(M,L,t){return L in M?Object.defineProperty(M,L,{value:t,enumerable:!0,configurable:!0,writable:!0}):M[L]=t,M}function j(M,L){var t=Object.keys(M);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(M);L&&(i=i.filter((function(L){return Object.getOwnPropertyDescriptor(M,L).enumerable}))),t.push.apply(t,i)}return t}function u(M){for(var L=1;L=0||(e[t]=M[t]);return e}(M,L);if(Object.getOwnPropertySymbols){var j=Object.getOwnPropertySymbols(M);for(i=0;i=0||Object.prototype.propertyIsEnumerable.call(M,t)&&(e[t]=M[t])}return e}var N=i.createContext({}),n=function(M){var L=i.useContext(N),t=L;return M&&(t="function"==typeof M?M(L):u(u({},L),M)),t},o=function(M){var L=n(M.components);return i.createElement(N.Provider,{value:L},M.children)},s="mdxType",w={inlineCode:"code",wrapper:function(M){var L=M.children;return i.createElement(i.Fragment,{},L)}},y=i.forwardRef((function(M,L){var t=M.components,e=M.mdxType,j=M.originalType,N=M.parentName,o=a(M,["components","mdxType","originalType","parentName"]),s=n(t),y=e,C=s["".concat(N,".").concat(y)]||s[y]||w[y]||j;return t?i.createElement(C,u(u({ref:L},o),{},{components:t})):i.createElement(C,u({ref:L},o))}));function C(M,L){var t=arguments,e=L&&L.mdxType;if("string"==typeof M||e){var j=t.length,u=new Array(j);u[0]=y;var a={};for(var N in L)hasOwnProperty.call(L,N)&&(a[N]=L[N]);a.originalType=M,a[s]="string"==typeof M?M:e,u[1]=a;for(var n=2;n{t.d(L,{Z:()=>e});var i=t(67294);function e(M){let{className:L,name:t,children:e,githubUrl:j,twitterUrl:u}=M;return i.createElement("div",{className:L},i.createElement("div",{className:"card card--full-height"},i.createElement("div",{className:"card__header"},i.createElement("div",{className:"avatar avatar--vertical"},i.createElement("img",{className:"avatar__photo avatar__photo--xl",src:j+".png"}),i.createElement("div",{className:"avatar__intro"},i.createElement("h3",{className:"avatar__name"},t)))),i.createElement("div",{className:"card__body"},e),i.createElement("div",{className:"card__footer"},i.createElement("div",{className:"button-group button-group--block"},j&&i.createElement("a",{className:"button button--secondary",href:j},"GitHub"),u&&i.createElement("a",{className:"button button--secondary",href:u},"Twitter")))))}},29875:(M,L,t)=>{t.r(L),t.d(L,{TeamProfileCardCol:()=>o,contentTitle:()=>a,default:()=>y,frontMatter:()=>u,metadata:()=>N,toc:()=>n});var i=t(87462),e=(t(67294),t(3905)),j=t(63427);const u={id:"introduction",title:"Introduction",description:"Clinical-grade variant annotation",hide_title:!0,slug:"/"},a=void 0,N={unversionedId:"introduction/introduction",id:"version-3.18/introduction/introduction",title:"Introduction",description:"Clinical-grade variant annotation",source:"@site/versioned_docs/version-3.18/introduction/introduction.mdx",sourceDirName:"introduction",slug:"/",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/introduction/introduction.mdx",tags:[],version:"3.18",frontMatter:{id:"introduction",title:"Introduction",description:"Clinical-grade variant annotation",hide_title:!0,slug:"/"},sidebar:"docs",next:{title:"Dependencies",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/introduction/dependencies"}},n=[{value:"What does Nirvana annotate?",id:"what-does-nirvana-annotate",children:[],level:2},{value:"Licensing",id:"licensing",children:[{value:"Code",id:"code",children:[],level:3},{value:"Data",id:"data",children:[],level:3}],level:2},{value:"Nirvana Team",id:"nirvana-team",children:[{value:"Active Team",id:"active-team",children:[],level:3},{value:"Honorary Alumni",id:"honorary-alumni",children:[],level:3}],level:2}];function o(M){return(0,e.kt)(j.Z,(0,i.Z)({},M,{className:"col col--6 margin-bottom--lg",mdxType:"TeamProfileCard"}))}const s={toc:n,TeamProfileCardCol:o},w="wrapper";function y(M){let{components:L,...j}=M;return(0,e.kt)(w,(0,i.Z)({},s,j,{components:L,mdxType:"MDXLayout"}),(0,e.kt)("p",null,(0,e.kt)("img",{src:t(68054).Z})),(0,e.kt)("p",null,"Nirvana provides clinical-grade annotation of genomic variants (SNVs, MNVs, insertions, deletions, indels, STRs, gene fusions, and SVs (including CNVs). It can be run as a stand-alone package, as an AWS Lambda function, or integrated into larger software tools that require variant annotation."),(0,e.kt)("p",null,"The input to Nirvana are VCFs and the output is a structured JSON representation of all annotation and sample information (as extracted from the VCF). Nirvana handles multiple alternate alleles and multiple samples with ease."),(0,e.kt)("p",null,"The software is being developed under a rigorous SDLC and testing process to ensure accuracy of the results and enable embedding in other software with regulatory needs. Nirvana uses a continuous integration pipeline where millions of variant annotations are monitored against baseline values daily."),(0,e.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,e.kt)("div",{parentName:"div",className:"admonition-heading"},(0,e.kt)("h5",{parentName:"div"},(0,e.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,e.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,e.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Fun Fact")),(0,e.kt)("div",{parentName:"div",className:"admonition-content"},(0,e.kt)("p",{parentName:"div"},"Nirvana is a backronym for ",(0,e.kt)("strong",{parentName:"p"},"NI"),"mble and ",(0,e.kt)("strong",{parentName:"p"},"R"),"obust ",(0,e.kt)("strong",{parentName:"p"},"VA"),"riant a",(0,e.kt)("strong",{parentName:"p"},"N"),"not",(0,e.kt)("strong",{parentName:"p"},"A"),"tor"))),(0,e.kt)("h2",{id:"what-does-nirvana-annotate"},"What does Nirvana annotate?"),(0,e.kt)("p",null,"We use Sequence Ontology consequences to describe how each variant impacts a given transcript:"),(0,e.kt)("p",null,(0,e.kt)("img",{src:t(9876).Z})),(0,e.kt)("p",null,"In addition, we also use external data sources to provide additional context for each variant:"),(0,e.kt)("p",null,(0,e.kt)("img",{src:t(53541).Z})),(0,e.kt)("h2",{id:"licensing"},"Licensing"),(0,e.kt)("h3",{id:"code"},"Code"),(0,e.kt)("p",null,"Nirvana source code is provided under the ",(0,e.kt)("a",{parentName:"p",href:"https://github.com/Illumina/Nirvana/blob/develop/LICENSE"},"GPLv3")," license. Nirvana includes several third party packages provided under other open source licenses, please see ",(0,e.kt)("a",{parentName:"p",href:"introduction/dependencies"},"Dependencies")," for additional details."),(0,e.kt)("h3",{id:"data"},"Data"),(0,e.kt)("p",null,"The data used by Nirvana is publicly available, however some data sources have special restrictions on use by non-academic entities."),(0,e.kt)("h2",{id:"nirvana-team"},"Nirvana Team"),(0,e.kt)("h3",{id:"active-team"},"Active Team"),(0,e.kt)("p",null,"The Nirvana team works on the core functionality, AWS annotation services, in addition to keeping the annotation data sources up-to-date."),(0,e.kt)("p",null,"Current members of the Nirvana team are listed in alphabetical order below."),(0,e.kt)("div",{className:"row"},(0,e.kt)(o,{name:"Fahd Siddiqui",githubUrl:"https://github.com/Fahd-Siddiqui",mdxType:"TeamProfileCardCol"},"Joined our team back in December 2021 and brings even more cloud and ML experience to our team."),(0,e.kt)(o,{name:"Joseph Platzer",githubUrl:"https://github.com/jplatzer2",mdxType:"TeamProfileCardCol"},"Test Lead. Joins Nirvana with a history of building sequencing tools and keeping the customer first."),(0,e.kt)(o,{name:"Michael Str\xf6mberg",githubUrl:"https://github.com/MichaelStromberg",mdxType:"TeamProfileCardCol"},"Nirvana founder and now ever grateful Nirvana cheerleader to those who actually write code for it."),(0,e.kt)(o,{name:"Ningxin Ouyang",githubUrl:"https://github.com/N-Ouyang",mdxType:"TeamProfileCardCol"},"Our newest addition to the team with a wealth of experience in transcript factor footprinting."),(0,e.kt)(o,{name:"Rajat Shuvro Roy",githubUrl:"https://github.com/rajatshuvro",mdxType:"TeamProfileCardCol"},"Lead developer. Loves to speed up things and make services available to all interested users.")),(0,e.kt)("h3",{id:"honorary-alumni"},"Honorary Alumni"),(0,e.kt)("p",null,"Nirvana would never be what it is today without the huge contributions from these folks who have moved on to bigger and greater things."),(0,e.kt)("div",{className:"row"},(0,e.kt)(o,{name:"Haochen Li",githubUrl:"https://github.com/haochenl",mdxType:"TeamProfileCardCol"},"Detail-oriented quick thinker that keeps cool even in the most stressful situations. Now working as a Senior Bioinformatics Data Scientist at GRAIL."),(0,e.kt)(o,{name:"Julien Lajugie",githubUrl:"https://github.com/JulienLajugie",mdxType:"TeamProfileCardCol"},"Julien is a legend around these parts. When he's not taking down opponents in Taekwondo or melting riffs in his rock band, he's demolishing bugs and making the world a better place."),(0,e.kt)(o,{name:"Shuli Kang",githubUrl:"https://github.com/shulik7",mdxType:"TeamProfileCardCol"},"Oncology bioinformatician from USC before joining our team at Illumina. Now working as a Senior Bioinformatics Scientist at Novartis Gene Therapies."),(0,e.kt)(o,{name:"Yu Jiang",githubUrl:"https://github.com/yujiang02",mdxType:"TeamProfileCardCol"},"Biostatistics genius from Duke University before joining our team at Illumina. Now working as a Research Engineer at Facebook AI Research.")))}y.isMDXComponent=!0},68054:(M,L,t)=>{t.d(L,{Z:()=>i});const i=""},53541:(M,L,t)=>{t.d(L,{Z:()=>i});const i=t.p+"assets/images/SupplementaryAnnotations-d43d3f1c837f9b80fab530432e0e4b1d.svg"},9876:(M,L,t)=>{t.d(L,{Z:()=>i});const i=t.p+"assets/images/TranscriptConsequences-60ca1c43a36dacf896fecdabf09ce02c.svg"}}]); \ No newline at end of file diff --git a/assets/js/220878dc.49431aa6.js b/assets/js/220878dc.49431aa6.js deleted file mode 100644 index 19c33e6d..00000000 --- a/assets/js/220878dc.49431aa6.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[970,2031],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>v});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var l=a.createContext({}),c=function(e){var t=a.useContext(l),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},p=function(e){var t=c(e.components);return a.createElement(l.Provider,{value:t},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,l=e.parentName,p=s(e,["components","mdxType","originalType","parentName"]),d=c(n),u=r,v=d["".concat(l,".").concat(u)]||d[u]||m[u]||i;return n?a.createElement(v,o(o({ref:t},p),{},{components:n})):a.createElement(v,o({ref:t},p))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var s={};for(var l in t)hasOwnProperty.call(t,l)&&(s[l]=t[l]);s.originalType=e,s[d]="string"==typeof e?e:r,o[1]=s;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>s,toc:()=>l});var a=n(87462),r=(n(67294),n(3905));const i={},o=void 0,s={unversionedId:"data-sources/primate-ai-json",id:"version-3.18/data-sources/primate-ai-json",title:"primate-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/primate-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/primate-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/primate-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/primate-ai-json.md",tags:[],version:"3.18",frontMatter:{}},l=[],c={toc:l},p="wrapper";function d(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI":[\n {\n "hgnc":"TP53",\n "scorePercentile":0.3,\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))))}d.isMDXComponent=!0},49890:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>s,default:()=>m,frontMatter:()=>o,metadata:()=>l,toc:()=>c});var a=n(87462),r=(n(67294),n(3905)),i=n(20837);const o={title:"Primate AI"},s=void 0,l={unversionedId:"data-sources/primate-ai",id:"version-3.18/data-sources/primate-ai",title:"Primate AI",description:"Overview",source:"@site/versioned_docs/version-3.18/data-sources/primate-ai.mdx",sourceDirName:"data-sources",slug:"/data-sources/primate-ai",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/primate-ai",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/primate-ai.mdx",tags:[],version:"3.18",frontMatter:{title:"Primate AI"},sidebar:"docs",previous:{title:"PhyloP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/phylop"},next:{title:"REVEL",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/revel"}},c=[{value:"Overview",id:"overview",children:[],level:2},{value:"TSV File",id:"tsv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Pre-processing",id:"pre-processing",children:[{value:"Converting UCSC IDs",id:"converting-ucsc-ids",children:[],level:3},{value:"Running the Pre-Processor",id:"running-the-pre-processor",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],p={toc:c},d="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"Primate AI is a deep residual neural network for classifying the pathogenicity of missense mutations. The method is described in the publication:"),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Sundaram, L., Gao, H., Padigepati, S.R. et al. Predicting the clinical impact of human mutation with deep neural networks. ",(0,r.kt)("em",{parentName:"p"},"Nat Genet")," ",(0,r.kt)("strong",{parentName:"p"},"50"),", 1161\u20131170 (2018). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1038/s41588-018-0167-z"},"https://doi.org/10.1038/s41588-018-0167-z")))),(0,r.kt)("h2",{id:"tsv-file"},"TSV File"),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr pos ref alt refAA altAA strand_1pos_0neg trinucleotide_context UCSC_gene ExAC_coverage primateDL_score\nchr10 1046704 C T R C 1 CCG uc001ift.3 45.49 0.849114537239\nchr10 1046704 C G R G 1 CCG uc001ift.3 45.49 0.795686006546\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the TSV file, we're mainly interested in the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"chr")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"ref")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"alt")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"primateDL_score"))),(0,r.kt)("p",null,"We also use ",(0,r.kt)("inlineCode",{parentName:"p"},"UCSC_gene")," to filter out variants that don't have matching gene models in Nirvana."),(0,r.kt)("h2",{id:"pre-processing"},"Pre-processing"),(0,r.kt)("h3",{id:"converting-ucsc-ids"},"Converting UCSC IDs"),(0,r.kt)("p",null,"Primate AI only provides UCSC IDs. As an initial pre-processing step, we'll need to convert these to either Entrez or Ensembl Gene IDs."),(0,r.kt)("p",null,"The following queries are used to download the conversions from UCSC:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-bash"},'mysql -h genome-mysql.soe.ucsc.edu -u genome -A -P 3306 \\\n -e "select * FROM knownToLocusLink;" hg19 > ucsc_locuslink.tsv\n\nmysql -h genome-mysql.soe.ucsc.edu -u genome -A -P 3306 \\\n -e "select knownToEnsembl.name, knownToEnsembl.value, ensGene.name2 FROM knownToEnsembl, ensGene WHERE knownToEnsembl.value = ensGene.name;" \\\n hg19 > ucsc_ensembl.tsv\n')),(0,r.kt)("h3",{id:"running-the-pre-processor"},"Running the Pre-Processor"),(0,r.kt)("p",null,"The Primate AI pre-processor can be run as follows:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet PrimateAiPreProcessor.dll UGA_develop.tsv PrimateAI_scores_v0.2.tsv.gz \\\n ucsc_locuslink.tsv ucsc_ensembl.tsv PrimateAI_0.2_GRCh37.tsv.gz\n")),(0,r.kt)("p",null,"During conversion, 0.5% of the UCSC Ids cannot be converted to either Entrez or Ensembl gene IDs. Once the gene IDs have been acquired, we check to see which are available in Nirvana."),(0,r.kt)("p",null,"The following Entrez Gene IDs were not found:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"399753\n401980\n504189\n504191\n100293534\n")),(0,r.kt)("p",null,"Here is the output from the pre-processor:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"- loading UCSC to Entrez Gene ID dictionary... 73,432 genes loaded.\n- loading UCSC to Ensembl Gene ID dictionary... 76,178 genes loaded.\n- loading UGA gene ID to gene dictionary... 103,277 genes loaded.\n- parsing Primate AI variants... 70,121,953 variants parsed.\n \n# variants with unknown gene ID: 27,253 / 70,121,953\n# genes with unknown gene ID: 109 / 19,614\n \n# variants not in UGA: 2,036 / 70,121,953\n# genes not in UGA: 6 / 19,614\n")),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"The Primate AI data set provides raw scores, but the scores are biased according to gene context. I.e. a 0.4 means something different in ",(0,r.kt)("inlineCode",{parentName:"p"},"TP53")," than it does in ",(0,r.kt)("inlineCode",{parentName:"p"},"KRAS"),"."),(0,r.kt)("p",{parentName:"div"},"As a result, the Primate AI team provided guidance on aggregating these scores and presenting them as percentiles with respect to the associated gene. According to their research, the 25",(0,r.kt)("sup",null,"th")," percentile is a good proxy for benign variants and the 75",(0,r.kt)("sup",null,"th")," percentile is a good proxy for pathogenic variants."))),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://basespace.illumina.com/s/cPgCSmecvhb4"},"https://basespace.illumina.com/s/cPgCSmecvhb4")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(i.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/23648e4e.dfea1b13.js b/assets/js/23648e4e.dfea1b13.js deleted file mode 100644 index ed15c87c..00000000 --- a/assets/js/23648e4e.dfea1b13.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[60,8680,5578,1779],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>N});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),m=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},c=function(e){var t=m(e.components);return a.createElement(s.Provider,{value:t},e.children)},d="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,c=o(e,["components","mdxType","originalType","parentName"]),d=m(n),u=i,N=d["".concat(s,".").concat(u)]||d[u]||p[u]||r;return n?a.createElement(N,l(l({ref:t},c),{},{components:n})):a.createElement(N,l({ref:t},c))}));function N(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,l=new Array(r);l[0]=u;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[d]="string"==typeof e?e:i,l[1]=o;for(var m=2;m{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>d,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={},l=void 0,o={unversionedId:"data-sources/cosmic-cancer-gene-census",id:"version-3.21/data-sources/cosmic-cancer-gene-census",title:"cosmic-cancer-gene-census",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/cosmic-cancer-gene-census.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-cancer-gene-census",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/cosmic-cancer-gene-census",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/cosmic-cancer-gene-census.md",tags:[],version:"3.21",frontMatter:{}},s=[],m={toc:s},c="wrapper";function d(e){let{components:t,...n}=e;return(0,i.kt)(c,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},' {\n "name": "PRDM16",\n "hgncId": 14000,\n "ncbiGeneId": "63976",\n "ensemblGeneId": "ENSG00000142611",\n "cosmic": {\n "roleInCancer": [\n "oncogene",\n "fusion"\n ]\n }\n}\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"roleInCancer"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Possible roles in caner")))))}d.isMDXComponent=!0},54163:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>d,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={},l=void 0,o={unversionedId:"data-sources/cosmic-gene-fusion-json",id:"version-3.21/data-sources/cosmic-gene-fusion-json",title:"cosmic-gene-fusion-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/cosmic-gene-fusion-json.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-gene-fusion-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/cosmic-gene-fusion-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/cosmic-gene-fusion-json.md",tags:[],version:"3.21",frontMatter:{}},s=[],m={toc:s},c="wrapper";function d(e){let{components:t,...n}=e;return(0,i.kt)(c,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},' "cosmicGeneFusions":[\n {\n "id":"COSF881",\n "numSamples":6,\n "geneSymbols":[\n "MYB",\n "NFIB"\n ],\n "hgvsr":"ENST00000341911.5(MYB):r.1_2368::ENST00000397581.2(NFIB):r.2592_3318",\n "histologies":[\n {\n "name":"adenoid cystic carcinoma",\n "numSamples":6\n }\n ],\n "sites":[\n {\n "name":"salivary gland (submandibular)",\n "numSamples":1\n },\n {\n "name":"salivary gland (parotid)",\n "numSamples":1\n },\n {\n "name":"salivary gland (nasal cavity)",\n "numSamples":1\n },\n {\n "name":"breast",\n "numSamples":3\n }\n ],\n "pubMedIds":[\n 19841262\n ]\n }\n ]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"id"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"COSMIC fusion ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"geneSymbols"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"5' gene & 3' gene")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hgvsr"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"HGVS RNA translocation fusion notation")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"histologies"),(0,i.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotypic descriptions")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"sites"),(0,i.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"tissue types")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Count")),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"name"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"description")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})))))}d.isMDXComponent=!0},43501:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>d,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={},l=void 0,o={unversionedId:"data-sources/cosmic-json",id:"version-3.21/data-sources/cosmic-json",title:"cosmic-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/cosmic-json.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/cosmic-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/cosmic-json.md",tags:[],version:"3.21",frontMatter:{}},s=[],m={toc:s},c="wrapper";function d(e){let{components:t,...n}=e;return(0,i.kt)(c,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{\n "id":"COSV58272668",\n "numSamples":8,\n "refAllele":"-",\n "altAllele":"CCT",\n "histologies":[\n {\n "name":"carcinoma (serous carcinoma)",\n "numSamples":2\n },\n {\n "name":"meningioma (fibroblastic)",\n "numSamples":1\n },\n {\n "name":"carcinoma",\n "numSamples":1\n },\n {\n "name":"carcinoma (squamous cell carcinoma)",\n "numSamples":1\n },\n {\n "name":"meningioma (transitional)",\n "numSamples":1\n },\n {\n "name":"carcinoma (adenocarcinoma)",\n "numSamples":1\n },\n {\n "name":"other (neoplasm)",\n "numSamples":1\n }\n ],\n "sites":[\n {\n "name":"ovary",\n "numSamples":2\n },\n {\n "name":"meninges",\n "numSamples":2\n },\n {\n "name":"thyroid",\n "numSamples":2\n },\n {\n "name":"cervix",\n "numSamples":1\n },\n {\n "name":"large intestine (colon)",\n "numSamples":1\n }\n ],\n "pubMedIds":[\n 25738363,\n 27548314\n ],\n "confirmedSomatic":true,\n "drugResistance":true, /* not in this particular COSMIC variant */\n "isAlleleSpecific":true\n}\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"id"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"COSMIC Genomic Mutation ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"histologies"),(0,i.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotypic descriptions")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"sites"),(0,i.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"tissue types")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"confirmedSomatic"),(0,i.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,i.kt)("td",{parentName:"tr",align:"left"},"true when the variant is a confirmed somatic variant")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"drugResistance"),(0,i.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,i.kt)("td",{parentName:"tr",align:"left"},"true when the variant has been associated with drug resistance")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Count")),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"name"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"description")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})))))}d.isMDXComponent=!0},51417:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>m,default:()=>N,frontMatter:()=>s,metadata:()=>c,toc:()=>d});var a=n(87462),i=(n(67294),n(3905)),r=n(43501),l=n(54163),o=n(5033);const s={title:"COSMIC"},m=void 0,c={unversionedId:"data-sources/cosmic",id:"version-3.21/data-sources/cosmic",title:"COSMIC",description:"Overview",source:"@site/versioned_docs/version-3.21/data-sources/cosmic.mdx",sourceDirName:"data-sources",slug:"/data-sources/cosmic",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/cosmic",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/cosmic.mdx",tags:[],version:"3.21",frontMatter:{title:"COSMIC"},sidebar:"docs",previous:{title:"ClinVar",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/clinvar"},next:{title:"DANN",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/dann"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF extraction",id:"vcf-extraction",children:[{value:"Example",id:"example",children:[],level:4},{value:"Parsing",id:"parsing",children:[],level:4}],level:3},{value:"TSV extraction",id:"tsv-extraction",children:[{value:"Example",id:"example-1",children:[],level:4},{value:"Parsing",id:"parsing-1",children:[],level:4},{value:"Parsing",id:"parsing-2",children:[],level:4},{value:"Aggregating Histologies & Sites",id:"aggregating-histologies--sites",children:[],level:4}],level:3},{value:"Download URL",id:"download-url",children:[{value:"GRCh37",id:"grch37",children:[],level:4},{value:"GRCh38",id:"grch38",children:[],level:4}],level:3},{value:"JSON Output",id:"json-output",children:[],level:3}],level:2},{value:"Gene Fusions",id:"gene-fusions",children:[{value:"TSV extraction",id:"tsv-extraction-1",children:[{value:"Example",id:"example-2",children:[],level:4},{value:"Parsing",id:"parsing-3",children:[],level:4},{value:"Parsing",id:"parsing-4",children:[],level:4},{value:"Aggregating Histologies & Sites",id:"aggregating-histologies--sites-1",children:[],level:4}],level:3},{value:"Known Issues",id:"known-issues",children:[],level:3},{value:"Download URL",id:"download-url-1",children:[{value:"GRCh37",id:"grch37-1",children:[],level:4},{value:"GRCh38",id:"grch38-1",children:[],level:4}],level:3},{value:"JSON Output",id:"json-output-1",children:[],level:3}],level:2},{value:"Cancer Gene Census",id:"cancer-gene-census",children:[{value:"TSV Extraction",id:"tsv-extraction-2",children:[{value:"Example",id:"example-3",children:[],level:4},{value:"Parsing",id:"parsing-5",children:[{value:"Columns",id:"columns",children:[],level:5},{value:"Possible Roles in Cancer",id:"possible-roles-in-cancer",children:[],level:5},{value:"Parsing Stats",id:"parsing-stats",children:[],level:5}],level:4}],level:3},{value:"Known Issues",id:"known-issues-1",children:[],level:3},{value:"Download URL",id:"download-url-2",children:[],level:3},{value:"JSON output",id:"json-output-2",children:[],level:3}],level:2}],p={toc:d},u="wrapper";function N(e){let{components:t,...n}=e;return(0,i.kt)(u,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"COSMIC, the Catalogue of Somatic Mutations in Cancer, is the world's largest source of expert manually curated somatic mutation information relating to human\ncancers."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"John G Tate, Sally Bamford, Harry C Jubb, Zbyslaw Sondka, David M Beare, Nidhi Bindal, Harry Boutselakis, Charlotte G Cole, Celestino Creatore, Elisabeth Dawson,\nPeter Fish, Bhavana Harsha, Charlie Hathaway, Steve C Jupe, Chai Yin Kok, Kate Noble, Laura Ponting, Christopher C Ramshaw, Claire E Rye, Helen E Speedy, Ray\nStefancsik, Sam L Thompson, Shicai Wang, Sari Ward, Peter J Campbell, Simon A Forbes. (2019) ",(0,i.kt)("a",{parentName:"p",href:"https://academic.oup.com/nar/article/47/D1/D941/5146192"},"COSMIC: the Catalogue Of Somatic Mutations In\nCancer"),", ",(0,i.kt)("em",{parentName:"p"},"Nucleic Acids Research"),", Volume 47, Issue D1"))),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Licensed Content")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Commercial companies are required to ",(0,i.kt)("a",{parentName:"p",href:"https://cancer.sanger.ac.uk/cosmic/license"},"acquire a license from COSMIC"),". At the moment, this means that our COSMIC\ncontent is only available in Illumina's products and services, not in the open source distribution."),(0,i.kt)("p",{parentName:"div"},"Since many of you are academic users, we will enable a COSMIC login in our downloader later this year that will allow academic and commercial organizations (with\na license) access our COSMIC data sources."))),(0,i.kt)("h2",{id:"small-variants"},"Small Variants"),(0,i.kt)("p",null,"Our main COSMIC deliverable provides annotations for both coding and non-coding variants throughout the genome. As of COSMIC v96, this includes 28.7M variants\nspanning the human genome. Nirvana currently parses four files to extract the relevant content:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"CosmicCodingMuts.vcf.gz"),(0,i.kt)("li",{parentName:"ul"},"CosmicNonCodingVariants.vcf.gz"),(0,i.kt)("li",{parentName:"ul"},"CosmicMutantExport.tsv.gz"),(0,i.kt)("li",{parentName:"ul"},"CosmicNCV.tsv.gz")),(0,i.kt)("h3",{id:"vcf-extraction"},"VCF extraction"),(0,i.kt)("h4",{id:"example"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 65797 COSV58737189 T C . . GENE=OR4F5_ENST00000641515;STRAND=+;LEGACY_ID=COSN23957695;CDS=c.9+224T>C;AA=p.?;HGVSC=ENST00000641515.2:c.9+224T>C;HGVSG=1:g.65797T>C;CNT=1\n")),(0,i.kt)("h4",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"From the VCF files, we're mainly interested in the following columns:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"CHROM")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"POS")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"ID")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"REF")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"ALT"))),(0,i.kt)("h3",{id:"tsv-extraction"},"TSV extraction"),(0,i.kt)("h4",{id:"example-1"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"Gene name Accession Number Gene CDS length HGNC ID Sample name ID_sample ID_tumour Primary site Site subtype 1 Site subtype 2 Site subtype 3 Primary histology Histology subtype 1 Histology subtype 2 Histology subtype 3 Genome-wide screen GENOMIC_MUTATION_ID LEGACY_MUTATION_ID MUTATION_ID Mutation CDS Mutation AA Mutation Description Mutation zygosity LOH GRCh Mutation genome position Mutation strand Resistance Mutation Mutation somatic status Pubmed_PMID ID_STUDY Sample Type Tumour origin Age HGVSP HGVSC HGVSG\nMCF2L_ENST00000375604 ENST00000375604.6 3372 14576 RK091_C01 1918867 1806188 liver NS NS NS carcinoma NS NS NS y COSV65049364 COSN1601909 113108365 c.73+3096A>G p.? Unknown het 38 13:113005079-113005079 + - Variant of unknown origin 322 fresh/frozen - NOS primary ENST00000375604.6:c.73+3096A>G 13:g.113005079A>G\n")),(0,i.kt)("h4",{id:"parsing-1"},"Parsing"),(0,i.kt)("p",null,"From the TSV file, we're mainly interested in the following columns:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"GENOMIC_MUTATION_ID")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"ID_sample")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Primary site")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Site subtype 1")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Primary histology")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Histology subtype 1")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Pubmed_PMID")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Resistance Mutation")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"Mutation somatic status"))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"For all the histologies and sites, we replace all the underlines with spaces. ",(0,i.kt)("inlineCode",{parentName:"p"},"salivary_gland")," would become ",(0,i.kt)("inlineCode",{parentName:"p"},"salivary gland"),"."))),(0,i.kt)("h4",{id:"parsing-2"},"Parsing"),(0,i.kt)("p",null,"To aggregate the data in Nirvana, we perform the following:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"Parse the coding and non-coding TSV files to retrieve the histologies, sites, PubMed IDs, somatic status, and resistance mutation status. Histologies and sites\nare tracked with respect to sample IDs."),(0,i.kt)("li",{parentName:"ul"},"Parse the coding and non-coding VCF files to retrieve the genomic variant for each entry")),(0,i.kt)("h4",{id:"aggregating-histologies--sites"},"Aggregating Histologies & Sites"),(0,i.kt)("p",null,"For sites and histologies, we observe that the subtype provides additional description but is still dependent on the primary site value. For example, the primary\nsite might be ",(0,i.kt)("inlineCode",{parentName:"p"},"skin"),", but the subtype is ",(0,i.kt)("inlineCode",{parentName:"p"},"foot"),". Therefore, we will combine the values in the following manner: ",(0,i.kt)("inlineCode",{parentName:"p"},"skin (foot)"),". "),(0,i.kt)("p",null,"COSMIC uses ",(0,i.kt)("inlineCode",{parentName:"p"},"NS")," to show that a value is empty. If the subtype is ",(0,i.kt)("inlineCode",{parentName:"p"},"NS"),", we will use the primary histology instead."),(0,i.kt)("h3",{id:"download-url"},"Download URL"),(0,i.kt)("h4",{id:"grch37"},"GRCh37"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh37/cosmic/v96/VCF/CosmicCodingMuts.vcf.gz"},"CosmicCodingMuts.vcf.gz")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh37/cosmic/v96/VCF/CosmicNonCodingVariants.vcf.gz"},"CosmicNonCodingVariants.vcf.gz")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh37/cosmic/v96/CosmicMutantExport.tsv.gz"},"CosmicMutantExport.tsv.gz")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh37/cosmic/v96/CosmicNCV.tsv.gz"},"CosmicNCV.tsv.gz"))),(0,i.kt)("h4",{id:"grch38"},"GRCh38"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v96/VCF/CosmicCodingMuts.vcf.gz"},"CosmicCodingMuts.vcf.gz")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v96/VCF/CosmicNonCodingVariants.vcf.gz"},"CosmicNonCodingVariants.vcf.gz")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v96/CosmicMutantExport.tsv.gz"},"CosmicMutantExport.tsv.gz")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v96/CosmicNCV.tsv.gz"},"CosmicNCV.tsv.gz"))),(0,i.kt)("h3",{id:"json-output"},"JSON Output"),(0,i.kt)(r.default,{mdxType:"SmallVariantJSON"}),(0,i.kt)("h2",{id:"gene-fusions"},"Gene Fusions"),(0,i.kt)("p",null,"Gene fusions are manually curated from peer reviewed publications by expert COSMIC curators. A comprehensive literature curation is completed for each fusion\npair when it is released in the database. Currently COSMIC includes information on fusions involved in solid tumours and leukaemias."),(0,i.kt)("h3",{id:"tsv-extraction-1"},"TSV extraction"),(0,i.kt)("h4",{id:"example-2"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"SAMPLE_ID SAMPLE_NAME PRIMARY_SITE SITE_SUBTYPE_1 SITE_SUBTYPE_2 SITE_SUBTYPE_3 PRIMARY_HISTOLOGY HISTOLOGY_SUBTYPE_1 HISTOLOGY_SUBTYPE_2 HISTOLOGY_SUBTYPE_3 FUSION_ID TRANSLOCATION_NAME 5'_CHROMOSOME 5'_STRAND 5'_GENE_ID 5'_GENE_NAME 5'_LAST_OBSERVED_EXON 5'_GENOME_START_FROM 5'_GENOME_START_TO 5'_GENOME_STOP_FROM 5'_GENOME_STOP_TO 3'_CHROMOSOME 3'_STRAND 3'_GENE_ID 3'_GENE_NAME 3'_FIRST_OBSERVED_EXON 3'_GENOME_START_FROM 3'_GENOME_START_TO 3'_GENOME_STOP_FROM 3'_GENOME_STOP_TO FUSION_TYPE PUBMED_PMID\n749711 HCC1187 breast NS NS NS carcinoma ductal_carcinoma NS NS 665 ENST00000360863.10(RGS22):r.1_3555::ENST00000369518.1(SYCP1):r.2100_3452 8 - 197199 RGS22 22 99981937 99981937 100106116 100106116 1 + 212470 SYCP1_ENST00000369518 24 114944339 114944339 114995367 114995367 Inferred Breakpoint 20033038\n")),(0,i.kt)("h4",{id:"parsing-3"},"Parsing"),(0,i.kt)("p",null,"From the TSV file, we're mainly interested in the following columns:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"SAMPLE_ID")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"PRIMARY_SITE")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"PRIMARY_HISTOLOGY")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"HISTOLOGY_SUBTYPE_1")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"FUSION_ID")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"TRANSLOCATION_NAME")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"PUBMED_PMID"))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"For all the histologies and sites, we replace all the underlines with spaces. ",(0,i.kt)("inlineCode",{parentName:"p"},"salivary_gland")," would become ",(0,i.kt)("inlineCode",{parentName:"p"},"salivary gland"),"."))),(0,i.kt)("h4",{id:"parsing-4"},"Parsing"),(0,i.kt)("p",null,"To create the gene fusion entries in Nirvana, we perform the following on each row in the TSV file:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"Group all entries by FUSION_ID"),(0,i.kt)("li",{parentName:"ul"},"Using all the entries related to this FUSION_ID:",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"Collect all the PubMed IDs"),(0,i.kt)("li",{parentName:"ul"},"Tally the number of observed sample IDs"),(0,i.kt)("li",{parentName:"ul"},"Grab the HGVS r. notation (should not change throughout the FUSION_ID)"),(0,i.kt)("li",{parentName:"ul"},"Tally the number of samples observed for each histology"),(0,i.kt)("li",{parentName:"ul"},"Tally the number of samples observed for each site"))),(0,i.kt)("li",{parentName:"ul"},"Extract the transcript IDs from the HGVS notation and lookup the associated gene symbols")),(0,i.kt)("h4",{id:"aggregating-histologies--sites-1"},"Aggregating Histologies & Sites"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"#aggregating-histologies--sites"},"Aggregating Histologies & Sites")," was previously described in the small variants section."),(0,i.kt)("h3",{id:"known-issues"},"Known Issues"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"There are some issues with the HGVS RNA notation:"),(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"For coding transcripts, HGVS numbering should use CDS coordinates. Right now COSMIC is using cDNA coordinates for all their fusions.")))),(0,i.kt)("h3",{id:"download-url-1"},"Download URL"),(0,i.kt)("h4",{id:"grch37-1"},"GRCh37"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh37/cosmic/v96/CosmicFusionExport.tsv.gz"},"CosmicFusionExport.tsv.gz"))),(0,i.kt)("h4",{id:"grch38-1"},"GRCh38"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v96/CosmicFusionExport.tsv.gz"},"CosmicFusionExport.tsv.gz"))),(0,i.kt)("h3",{id:"json-output-1"},"JSON Output"),(0,i.kt)(l.default,{mdxType:"GeneFusionJSON"}),(0,i.kt)("h2",{id:"cancer-gene-census"},"Cancer Gene Census"),(0,i.kt)("h3",{id:"tsv-extraction-2"},"TSV Extraction"),(0,i.kt)("h4",{id:"example-3"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"GENE_NAME CELL_TYPE PUBMED_PMID HALLMARK IMPACT DESCRIPTION CELL_LINE\nPRDM16 18496560 role in cancer oncogene oncogene\nPRDM16 16015645 role in cancer fusion fusion\n")),(0,i.kt)("h4",{id:"parsing-5"},"Parsing"),(0,i.kt)("p",null,'To extract information about TSGs and oncogenes, the data based on the "role in cancer" attribute is filtered.\nFor tumor suppressor genes, rows with the value "TSG" and for oncogenes, rows with the value "oncogene" are filtered.\nSome genes have both "TSG/oncogene" as their role, which indicates that they can act as both.'),(0,i.kt)("h5",{id:"columns"},"Columns"),(0,i.kt)("p",null,"Only following columns are needed to gather required roles in cancer:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"GENE_NAME")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"IMPACT")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"HALLMARK"))),(0,i.kt)("h5",{id:"possible-roles-in-cancer"},"Possible Roles in Cancer"),(0,i.kt)("p",null,"While parsing, only following roles in cancer are found:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"fusion")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"TSG")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"oncogene"))),(0,i.kt)("h5",{id:"parsing-stats"},"Parsing Stats"),(0,i.kt)("p",null,"The file contained following number of instances for each role type"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Role in cancer"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Total Instances"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"fusion"),(0,i.kt)("td",{parentName:"tr",align:"center"},"149")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"TSG"),(0,i.kt)("td",{parentName:"tr",align:"center"},"195")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"oncogene"),(0,i.kt)("td",{parentName:"tr",align:"center"},"181")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"Total"),(0,i.kt)("td",{parentName:"tr",align:"center"},"525")))),(0,i.kt)("h3",{id:"known-issues-1"},"Known Issues"),(0,i.kt)("p",null,"None"),(0,i.kt)("h3",{id:"download-url-2"},"Download URL"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v97/Cancer_Gene_Census_Hallmarks_Of_Cancer.tsv.gz"},"Cancer_Gene_Census_Hallmarks_Of_Cancer.tsv.gz"))),(0,i.kt)("h3",{id:"json-output-2"},"JSON output"),(0,i.kt)(o.default,{mdxType:"CancerGeneCensusJSON"}))}N.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/23e63ba2.b3d85a61.js b/assets/js/23e63ba2.b3d85a61.js deleted file mode 100644 index 4e98743f..00000000 --- a/assets/js/23e63ba2.b3d85a61.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[392],{3905:(e,t,n)=>{n.d(t,{Zo:()=>s,kt:()=>f});var r=n(67294);function o(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function a(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}var i=r.createContext({}),p=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},s=function(e){var t=p(e.components);return r.createElement(i.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,o=e.mdxType,a=e.originalType,i=e.parentName,s=c(e,["components","mdxType","originalType","parentName"]),u=p(n),d=o,f=u["".concat(i,".").concat(d)]||u[d]||m[d]||a;return n?r.createElement(f,l(l({ref:t},s),{},{components:n})):r.createElement(f,l({ref:t},s))}));function f(e,t){var n=arguments,o=t&&t.mdxType;if("string"==typeof e||o){var a=n.length,l=new Array(a);l[0]=d;var c={};for(var i in t)hasOwnProperty.call(t,i)&&(c[i]=t[i]);c.originalType=e,c[u]="string"==typeof e?e:o,l[1]=c;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>a,metadata:()=>c,toc:()=>i});var r=n(87462),o=(n(67294),n(3905));const a={},l=void 0,c={unversionedId:"data-sources/phylop-json",id:"version-3.21/data-sources/phylop-json",title:"phylop-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/phylop-json.md",sourceDirName:"data-sources",slug:"/data-sources/phylop-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/phylop-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/phylop-json.md",tags:[],version:"3.21",frontMatter:{}},i=[],p={toc:i},s="wrapper";function u(e){let{components:t,...n}=e;return(0,o.kt)(s,(0,r.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-json"},'"variants":[\n {\n "vid":"2:48010488:A",\n "chromosome":"chr2",\n "begin":48010488,\n "end":48010488,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "phylopScore":0.459\n }\n] \n')),(0,o.kt)("table",null,(0,o.kt)("thead",{parentName:"table"},(0,o.kt)("tr",{parentName:"thead"},(0,o.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,o.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,o.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,o.kt)("tbody",{parentName:"table"},(0,o.kt)("tr",{parentName:"tbody"},(0,o.kt)("td",{parentName:"tr",align:"left"},"phylopScore"),(0,o.kt)("td",{parentName:"tr",align:"center"},"float"),(0,o.kt)("td",{parentName:"tr",align:"left"},"range: -14.08 to 6.424")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/24b84a7a.61e8d75d.js b/assets/js/24b84a7a.61e8d75d.js deleted file mode 100644 index d5e7145a..00000000 --- a/assets/js/24b84a7a.61e8d75d.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5269,8113],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>v});var a=n(67294);function l(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(l[n]=e[n]);return l}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(l[n]=e[n])}return l}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},m="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,l=e.mdxType,r=e.originalType,s=e.parentName,c=o(e,["components","mdxType","originalType","parentName"]),m=d(n),u=l,v=m["".concat(s,".").concat(u)]||m[u]||p[u]||r;return n?a.createElement(v,i(i({ref:t},c),{},{components:n})):a.createElement(v,i({ref:t},c))}));function v(e,t){var n=arguments,l=t&&t.mdxType;if("string"==typeof e||l){var r=n.length,i=new Array(r);i[0]=u;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[m]="string"==typeof e?e:l,i[1]=o;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>m,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var a=n(87462),l=(n(67294),n(3905));const r={},i=void 0,o={unversionedId:"data-sources/dbsnp-json",id:"version-3.18/data-sources/dbsnp-json",title:"dbsnp-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/dbsnp-json.md",sourceDirName:"data-sources",slug:"/data-sources/dbsnp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/dbsnp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/dbsnp-json.md",tags:[],version:"3.18",frontMatter:{}},s=[],d={toc:s},c="wrapper";function m(e){let{components:t,...n}=e;return(0,l.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"dbsnp":[\n "rs1042821"\n]\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,l.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"dbsnp"),(0,l.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,l.kt)("td",{parentName:"tr",align:"left"},"dbSNP rsIDs")))))}m.isMDXComponent=!0},49321:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>i,metadata:()=>s,toc:()=>d});var a=n(87462),l=(n(67294),n(3905)),r=n(54909);const i={title:"dbSNP"},o=void 0,s={unversionedId:"data-sources/dbsnp",id:"version-3.18/data-sources/dbsnp",title:"dbSNP",description:"Overview",source:"@site/versioned_docs/version-3.18/data-sources/dbsnp.mdx",sourceDirName:"data-sources",slug:"/data-sources/dbsnp",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/dbsnp",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/dbsnp.mdx",tags:[],version:"3.18",frontMatter:{title:"dbSNP"},sidebar:"docs",previous:{title:"DANN",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/dann"},next:{title:"DECIPHER",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/decipher"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"VCF File",id:"vcf-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Global allele extraction",id:"global-allele-extraction",children:[],level:4},{value:"Equal Allele Frequency Example (2 alleles)",id:"equal-allele-frequency-example-2-alleles",children:[],level:4},{value:"Equal Allele Frequency Example (3 alleles)",id:"equal-allele-frequency-example-3-alleles",children:[],level:4},{value:"Equal Allele Frequency in Alternate Alleles",id:"equal-allele-frequency-in-alternate-alleles",children:[],level:4},{value:"Equal Allele Frequency Between Reference & Alternate Allele",id:"equal-allele-frequency-between-reference--alternate-allele",children:[],level:4}],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:d},m="wrapper";function p(e){let{components:t,...n}=e;return(0,l.kt)(m,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("h2",{id:"overview"},"Overview"),(0,l.kt)("p",null,"dbSNP contains human single nucleotide variations, microsatellites, and small-scale insertions and deletions along with publication, population frequency, molecular consequence, and genomic and RefSeq mapping information for both common variations and clinical mutations."),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Sherry, S.T., Ward, M. and Sirotkin, K. (1999) dbSNP\u2014Database for Single Nucleotide Polymorphisms and Other Classes of Minor Genetic Variation. ",(0,l.kt)("em",{parentName:"p"},"Genome Res."),", ",(0,l.kt)("strong",{parentName:"p"},"9"),", 677\u2013679."))),(0,l.kt)("h2",{id:"vcf-file"},"VCF File"),(0,l.kt)("h3",{id:"example"},"Example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 10177 rs367896724 A AC . . RS=367896724;RSPOS=10177;dbSNPBuildID=138; \\ \n SSR=0;SAO=0;VP=0x050000020005130026000200;GENEINFO=DDX11L1:100287102;WGT=1; \\\n VC=DIV;R5;ASP;G5A;G5;KGPhase3;CAF=0.5747,0.4253;COMMON=1; \\\n TOPMED=0.76728147298674821,0.23271852701325178\n")),(0,l.kt)("h3",{id:"parsing"},"Parsing"),(0,l.kt)("p",null,"From the VCF file, we're mainly interested in the following:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"rsID")," from the ",(0,l.kt)("inlineCode",{parentName:"li"},"ID")," field"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"CAF")," from the ",(0,l.kt)("inlineCode",{parentName:"li"},"INFO")," field")),(0,l.kt)("h4",{id:"global-allele-extraction"},"Global allele extraction"),(0,l.kt)("p",null,"The global major and minor alleles are extracted based on the frequency of the alleles provided in the CAF field. The global minor allele frequency is the second highest value of the CAF comma delimited field (ignoring '.' values). "),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Tie Breaking: Global Major Allele")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"If there are two candidates for global major and the reference allele is one of them, we prefer the reference allele."))),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Tie Breaking: Global Minor Allele")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"If there are two candidates for global minor and the reference allele is one of them, we prefer the other allele. If the reference allele is not involved, they are chosen arbitrarily."))),(0,l.kt)("h4",{id:"equal-allele-frequency-example-2-alleles"},"Equal Allele Frequency Example (2 alleles)"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C CAF=0.5,0.5\n")),(0,l.kt)("p",null,"We will select A to be the global major allele and C to be the global minor allele."),(0,l.kt)("h4",{id:"equal-allele-frequency-example-3-alleles"},"Equal Allele Frequency Example (3 alleles)"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C,T CAF=0.33,0.33,0.33\n")),(0,l.kt)("p",null,"We will select A to be the global major allele and either C or T is chosen (arbitrarily) to be the global minor allele."),(0,l.kt)("h4",{id:"equal-allele-frequency-in-alternate-alleles"},"Equal Allele Frequency in Alternate Alleles"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C,T CAF=0.2,0.4,0.4\n")),(0,l.kt)("p",null,"We will select C or T to be arbitrarily assigned to be the global major or global minor allele."),(0,l.kt)("h4",{id:"equal-allele-frequency-between-reference--alternate-allele"},"Equal Allele Frequency Between Reference & Alternate Allele"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C,T CAF=0.2,0.2,0.6\n")),(0,l.kt)("p",null,"We will select T to be the global major allele and C to be the global minor allele."),(0,l.kt)("h2",{id:"known-issues"},"Known Issues"),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"If there are multiple entries with different CAF values for the same allele, we use the first CAF value."))),(0,l.kt)("h2",{id:"download-url"},"Download URL"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://ftp.ncbi.nih.gov/snp/organisms/"},"https://ftp.ncbi.nih.gov/snp/organisms/")),(0,l.kt)("h2",{id:"json-output"},"JSON Output"),(0,l.kt)(r.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/24c525b2.0ece7950.js b/assets/js/24c525b2.0ece7950.js deleted file mode 100644 index 8177b3dc..00000000 --- a/assets/js/24c525b2.0ece7950.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1153],{3905:(M,L,t)=>{t.d(L,{Zo:()=>o,kt:()=>C});var i=t(67294);function j(M,L,t){return L in M?Object.defineProperty(M,L,{value:t,enumerable:!0,configurable:!0,writable:!0}):M[L]=t,M}function e(M,L){var t=Object.keys(M);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(M);L&&(i=i.filter((function(L){return Object.getOwnPropertyDescriptor(M,L).enumerable}))),t.push.apply(t,i)}return t}function u(M){for(var L=1;L=0||(j[t]=M[t]);return j}(M,L);if(Object.getOwnPropertySymbols){var e=Object.getOwnPropertySymbols(M);for(i=0;i=0||Object.prototype.propertyIsEnumerable.call(M,t)&&(j[t]=M[t])}return j}var a=i.createContext({}),n=function(M){var L=i.useContext(a),t=L;return M&&(t="function"==typeof M?M(L):u(u({},L),M)),t},o=function(M){var L=n(M.components);return i.createElement(a.Provider,{value:L},M.children)},s="mdxType",w={inlineCode:"code",wrapper:function(M){var L=M.children;return i.createElement(i.Fragment,{},L)}},y=i.forwardRef((function(M,L){var t=M.components,j=M.mdxType,e=M.originalType,a=M.parentName,o=N(M,["components","mdxType","originalType","parentName"]),s=n(t),y=j,C=s["".concat(a,".").concat(y)]||s[y]||w[y]||e;return t?i.createElement(C,u(u({ref:L},o),{},{components:t})):i.createElement(C,u({ref:L},o))}));function C(M,L){var t=arguments,j=L&&L.mdxType;if("string"==typeof M||j){var e=t.length,u=new Array(e);u[0]=y;var N={};for(var a in L)hasOwnProperty.call(L,a)&&(N[a]=L[a]);N.originalType=M,N[s]="string"==typeof M?M:j,u[1]=N;for(var n=2;n{t.d(L,{Z:()=>j});var i=t(67294);function j(M){let{className:L,name:t,children:j,githubUrl:e,twitterUrl:u}=M;return i.createElement("div",{className:L},i.createElement("div",{className:"card card--full-height"},i.createElement("div",{className:"card__header"},i.createElement("div",{className:"avatar avatar--vertical"},i.createElement("img",{className:"avatar__photo avatar__photo--xl",src:e+".png"}),i.createElement("div",{className:"avatar__intro"},i.createElement("h3",{className:"avatar__name"},t)))),i.createElement("div",{className:"card__body"},j),i.createElement("div",{className:"card__footer"},i.createElement("div",{className:"button-group button-group--block"},e&&i.createElement("a",{className:"button button--secondary",href:e},"GitHub"),u&&i.createElement("a",{className:"button button--secondary",href:u},"Twitter")))))}},91646:(M,L,t)=>{t.r(L),t.d(L,{TeamProfileCardCol:()=>o,contentTitle:()=>N,default:()=>y,frontMatter:()=>u,metadata:()=>a,toc:()=>n});var i=t(87462),j=(t(67294),t(3905)),e=t(63427);const u={id:"introduction",title:"Introduction",description:"Clinical-grade variant annotation",hide_title:!0,slug:"/"},N=void 0,a={unversionedId:"introduction/introduction",id:"version-3.14/introduction/introduction",title:"Introduction",description:"Clinical-grade variant annotation",source:"@site/versioned_docs/version-3.14/introduction/introduction.mdx",sourceDirName:"introduction",slug:"/",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/introduction/introduction.mdx",tags:[],version:"3.14",frontMatter:{id:"introduction",title:"Introduction",description:"Clinical-grade variant annotation",hide_title:!0,slug:"/"},sidebar:"version-3.14/docs",next:{title:"Dependencies",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/introduction/dependencies"}},n=[{value:"What does Nirvana annotate?",id:"what-does-nirvana-annotate",children:[],level:2},{value:"Licensing",id:"licensing",children:[{value:"Code",id:"code",children:[],level:3},{value:"Data",id:"data",children:[],level:3}],level:2},{value:"Nirvana Team",id:"nirvana-team",children:[{value:"Active Team",id:"active-team",children:[],level:3},{value:"Honorary Alumni",id:"honorary-alumni",children:[],level:3}],level:2}];function o(M){return(0,j.kt)(e.Z,(0,i.Z)({},M,{className:"col col--6 margin-bottom--lg",mdxType:"TeamProfileCard"}))}const s={toc:n,TeamProfileCardCol:o},w="wrapper";function y(M){let{components:L,...e}=M;return(0,j.kt)(w,(0,i.Z)({},s,e,{components:L,mdxType:"MDXLayout"}),(0,j.kt)("p",null,(0,j.kt)("img",{src:t(40923).Z})),(0,j.kt)("p",null,"Nirvana provides clinical-grade annotation of genomic variants (SNVs, MNVs, insertions, deletions, indels, STRs, gene fusions, and SVs (including CNVs). It can be run as a stand-alone package, as an AWS Lambda function, or integrated into larger software tools that require variant annotation."),(0,j.kt)("p",null,"The input to Nirvana are VCFs and the output is a structured JSON representation of all annotation and sample information (as extracted from the VCF). Nirvana handles multiple alternate alleles and multiple samples with ease."),(0,j.kt)("p",null,"The software is being developed under a rigorous SDLC and testing process to ensure accuracy of the results and enable embedding in other software with regulatory needs. Nirvana uses a continuous integration pipeline where millions of variant annotations are monitored against baseline values daily."),(0,j.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,j.kt)("div",{parentName:"div",className:"admonition-heading"},(0,j.kt)("h5",{parentName:"div"},(0,j.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,j.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,j.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Fun Fact")),(0,j.kt)("div",{parentName:"div",className:"admonition-content"},(0,j.kt)("p",{parentName:"div"},"Nirvana is a backronym for ",(0,j.kt)("strong",{parentName:"p"},"NI"),"mble and ",(0,j.kt)("strong",{parentName:"p"},"R"),"obust ",(0,j.kt)("strong",{parentName:"p"},"VA"),"riant a",(0,j.kt)("strong",{parentName:"p"},"N"),"not",(0,j.kt)("strong",{parentName:"p"},"A"),"tor"))),(0,j.kt)("h2",{id:"what-does-nirvana-annotate"},"What does Nirvana annotate?"),(0,j.kt)("p",null,"We use Sequence Ontology consequences to describe how each variant impacts a given transcript:"),(0,j.kt)("p",null,(0,j.kt)("img",{src:t(12653).Z})),(0,j.kt)("p",null,"In addition, we also use external data sources to provide additional context for each variant:"),(0,j.kt)("p",null,(0,j.kt)("img",{src:t(71631).Z})),(0,j.kt)("h2",{id:"licensing"},"Licensing"),(0,j.kt)("h3",{id:"code"},"Code"),(0,j.kt)("p",null,"Nirvana source code is provided under the ",(0,j.kt)("a",{parentName:"p",href:"https://github.com/Illumina/Nirvana/blob/develop/LICENSE"},"GPLv3")," license. Nirvana includes several third party packages provided under other open source licenses, please see ",(0,j.kt)("a",{parentName:"p",href:"introduction/dependencies"},"Dependencies")," for additional details."),(0,j.kt)("h3",{id:"data"},"Data"),(0,j.kt)("p",null,"The data used by Nirvana is publicly available, however some data sources have special restrictions on use by non-academic entities."),(0,j.kt)("h2",{id:"nirvana-team"},"Nirvana Team"),(0,j.kt)("h3",{id:"active-team"},"Active Team"),(0,j.kt)("p",null,"The Nirvana team works on the core functionality, AWS annotation services, in addition to keeping the annotation data sources up-to-date."),(0,j.kt)("p",null,"Current members of the Nirvana team are listed in alphabetical order below."),(0,j.kt)("div",{className:"row"},(0,j.kt)(o,{name:"Haochen Li",githubUrl:"https://github.com/haochenl",mdxType:"TeamProfileCardCol"},"Active developer. Detail-oriented quick thinker that keeps cool even in the most stressful situations."),(0,j.kt)(o,{name:"Michael Str\xf6mberg",githubUrl:"https://github.com/MichaelStromberg",mdxType:"TeamProfileCardCol"},"Nirvana founder and now ever grateful Nirvana cheerleader to those who actually write code for it."),(0,j.kt)(o,{name:"Rajat Shuvro Roy",githubUrl:"https://github.com/rajatshuvro",mdxType:"TeamProfileCardCol"},"Lead developer. Loves to speed up things and make services available to all interested users.")),(0,j.kt)("h3",{id:"honorary-alumni"},"Honorary Alumni"),(0,j.kt)("p",null,"Nirvana would never be what it is today without the huge contributions from these folks who have moved on to bigger and greater things."),(0,j.kt)("div",{className:"row"},(0,j.kt)(o,{name:"Julien Lajugie",githubUrl:"https://github.com/JulienLajugie",mdxType:"TeamProfileCardCol"},"Julien is a legend around these parts. When he's not taking down opponents in Taekwondo or melting riffs in his rock band, he's demolishing bugs and making the world a better place."),(0,j.kt)(o,{name:"Shuli Kang",githubUrl:"https://github.com/shulik7",mdxType:"TeamProfileCardCol"},"Oncology bioinformatician from USC before joining our team at Illumina. Now working as a Senior Bioinformatics Scientist at Novartis Gene Therapies."),(0,j.kt)(o,{name:"Yu Jiang",githubUrl:"https://github.com/yujiang02",mdxType:"TeamProfileCardCol"},"Biostatistics genius from Duke University before joining our team at Illumina. Now working as a Research Engineer at Facebook AI Research.")))}y.isMDXComponent=!0},40923:(M,L,t)=>{t.d(L,{Z:()=>i});const i=""},71631:(M,L,t)=>{t.d(L,{Z:()=>i});const i=t.p+"assets/images/SupplementaryAnnotations-d43d3f1c837f9b80fab530432e0e4b1d.svg"},12653:(M,L,t)=>{t.d(L,{Z:()=>i});const i=t.p+"assets/images/TranscriptConsequences-60ca1c43a36dacf896fecdabf09ce02c.svg"}}]); \ No newline at end of file diff --git a/assets/js/25512a56.da03206a.js b/assets/js/25512a56.da03206a.js deleted file mode 100644 index 546d3ae7..00000000 --- a/assets/js/25512a56.da03206a.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8808],{3905:(t,e,n)=>{n.d(e,{Zo:()=>c,kt:()=>g});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var p=a.createContext({}),u=function(t){var e=a.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},c=function(t){var e=u(t.components);return a.createElement(p.Provider,{value:e},t.children)},s="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},m=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,c=i(t,["components","mdxType","originalType","parentName"]),s=u(n),m=r,g=s["".concat(p,".").concat(m)]||s[m]||d[m]||l;return n?a.createElement(g,o(o({ref:e},c),{},{components:n})):a.createElement(g,o({ref:e},c))}));function g(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=m;var i={};for(var p in e)hasOwnProperty.call(e,p)&&(i[p]=e[p]);i.originalType=t,i[s]="string"==typeof t?t:r,o[1]=i;for(var u=2;u{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>s,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/clingen-json",id:"version-3.16/data-sources/clingen-json",title:"clingen-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/clingen-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/clingen-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/clingen-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],u={toc:p},c="wrapper";function s(t){let{components:e,...n}=t;return(0,r.kt)(c,(0,a.Z)({},u,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingen":[\n {\n "chromosome":"17",\n "begin":525,\n "end":14667519,\n "variantType":"copy_number_gain",\n "id":"nsv996083",\n "clinicalInterpretation":"pathogenic",\n "observedGains":1,\n "validated":true,\n "phenotypes":[\n "Intrauterine growth retardation"\n ],\n "phenotypeIds":[\n "HP:0001511",\n "MedGen:C1853481"\n ],\n "reciprocalOverlap":0.00131\n },\n {\n "chromosome":"17",\n "begin":45835,\n "end":7600330,\n "variantType":"copy_number_loss",\n "id":"nsv869419",\n "clinicalInterpretation":"pathogenic",\n "observedLosses":1,\n "validated":true,\n "phenotypes":[\n "Developmental delay AND/OR other significant developmental or morphological phenotypes"\n ],\n "reciprocalOverlap":0.00254\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingen"),(0,r.kt)("td",{parentName:"tr",align:null},"object array"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Any of the\xa0sequence alterations defined here.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Identifier from the data source. Alternatively a VID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clinicalInterpretation"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"observedGains"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,r.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"observedLosses"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,r.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"validated"),(0,r.kt)("td",{parentName:"tr",align:null},"boolean"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:null},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"Description of the phenotype.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"phenotypeIds"),(0,r.kt)("td",{parentName:"tr",align:null},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"Description of the phenotype IDs.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"clinicalInterpretation")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"curated benign"),(0,r.kt)("li",{parentName:"ul"},"curated pathogenic"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"path gain"),(0,r.kt)("li",{parentName:"ul"},"path loss"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"uncertain")))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/25773e15.f40c04ab.js b/assets/js/25773e15.f40c04ab.js deleted file mode 100644 index 7865280a..00000000 --- a/assets/js/25773e15.f40c04ab.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3334],{3905:(e,t,n)=>{n.d(t,{Zo:()=>m,kt:()=>k});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var p=a.createContext({}),s=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},m=function(e){var t=s(e.components);return a.createElement(p.Provider,{value:t},e.children)},d="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},g=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,p=e.parentName,m=l(e,["components","mdxType","originalType","parentName"]),d=s(n),g=r,k=d["".concat(p,".").concat(g)]||d[g]||c[g]||i;return n?a.createElement(k,o(o({ref:t},m),{},{components:n})):a.createElement(k,o({ref:t},m))}));function k(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=g;var l={};for(var p in t)hasOwnProperty.call(t,p)&&(l[p]=t[p]);l.originalType=e,l[d]="string"==typeof e?e:r,o[1]=l;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>l,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const i={title:"MNV Recomposition"},o=void 0,l={unversionedId:"core-functionality/mnv-recomposition",id:"version-3.18/core-functionality/mnv-recomposition",title:"MNV Recomposition",description:"Overview",source:"@site/versioned_docs/version-3.18/core-functionality/mnv-recomposition.md",sourceDirName:"core-functionality",slug:"/core-functionality/mnv-recomposition",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/core-functionality/mnv-recomposition",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/core-functionality/mnv-recomposition.md",tags:[],version:"3.18",frontMatter:{title:"MNV Recomposition"},sidebar:"docs",previous:{title:"Gene Fusion Detection",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/core-functionality/gene-fusions"},next:{title:"Variant IDs",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/core-functionality/variant-ids"}},p=[{value:"Overview",id:"overview",children:[],level:2},{value:"Criteria",id:"criteria",children:[],level:2},{value:"Examples",id:"examples",children:[{value:"Multiple Samples",id:"multiple-samples",children:[],level:3},{value:"Phase Sets",id:"phase-sets",children:[{value:"Homozygous variants, same phase set",id:"homozygous-variants-same-phase-set",children:[],level:4},{value:"Mixing phased and unphased variants",id:"mixing-phased-and-unphased-variants",children:[],level:4},{value:"Variants in different phase sets",id:"variants-in-different-phase-sets",children:[],level:4},{value:"Unphased homozygous variants",id:"unphased-homozygous-variants",children:[],level:4},{value:"Homozygous variants are not commutative",id:"homozygous-variants-are-not-commutative",children:[],level:4}],level:3},{value:"Conflicting Genotypes",id:"conflicting-genotypes",children:[],level:3}],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],s={toc:p},m="wrapper";function d(e){let{components:t,...i}=e;return(0,r.kt)(m,(0,a.Z)({},s,i,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"Most annotation tools handle variants independently. The problem with this approach is that nearby variants could affect the same codon leading to a very different annotation. For example, consider the following example (Danecek, 2017):"),(0,r.kt)("p",null,(0,r.kt)("img",{src:n(71481).Z})),(0,r.kt)("p",null,"When handled independently, the two variants (C\u2192T & G\u2192A) would be annotated as missense annotations. However, if we consider them together, the resulting MNV would yield a stop gain."),(0,r.kt)("p",null,"By default, Nirvana identifies these types of cases where two or more SNVs would affect the same codon. In addition, it's able to perform this operation on VCFs containing large numbers of samples (we've tested this on 2,500+ samples using the 1000 Genomes Project VCF files)."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Petr Danecek, Shane A McCarthy, ",(0,r.kt)("a",{parentName:"p",href:"https://academic.oup.com/bioinformatics/article-abstract/33/13/2037/3000373"},"BCFtools/csq: haplotype-aware variant consequences"),", Bioinformatics, Volume 33, Issue 13, 1 July 2017, Pages 2037\u20132039"))),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Supported variant types")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"At the moment, ",(0,r.kt)("strong",{parentName:"p"},"Nirvana only supports recomposing multiple SNVs into an MNV"),". The Danecek paper makes a compelling case for supporting frameshifting variants paired with frame-restoring variants. We've also received requests for supporting the recomposition of an SNV with insertions and deletions. While this is something we've looked into, it represents functionality that many of our clinical customers are not yet comfortable with."))),(0,r.kt)("h2",{id:"criteria"},"Criteria"),(0,r.kt)("p",null,"Nirvana will recompose a set of SNVs if two or more SNVs are located in the same codon for any codon in any of the overlapping transcripts."),(0,r.kt)("p",null,"The following criteria must also be met for at least one sample:"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"Genotypes are provided for the VCF variants and all variants are in phase or homozygous variant."),(0,r.kt)("li",{parentName:"ol"},"All the available phase set IDs are the same (homozygous variants are available to all phase sets)"),(0,r.kt)("li",{parentName:"ol"},"The genotype ploidy for all the variants are the same."),(0,r.kt)("li",{parentName:"ol"},"No unsupported variant type (i.e. insertion or deletion) overlaps the recomposed variants"),(0,r.kt)("li",{parentName:"ol"},"The first and last base in at least one of the recomposed alleles must be non-reference.")),(0,r.kt)("h2",{id:"examples"},"Examples"),(0,r.kt)("p",null,"During variant recomposition, if two SNVs affect the same codon, it becomes the seed codon. If there are SNVs in the adjacent codons, they will be aggregated into the seed codon."),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Three SNVs in two adjacent codons. The recomposed alternate allele is ",(0,r.kt)("inlineCode",{parentName:"p"},"ATAG"),":\n",(0,r.kt)("img",{src:n(89698).Z}))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Three SNVs in two adjacent codons (larger distance). The recomposed alternate allele is ",(0,r.kt)("inlineCode",{parentName:"p"},"ATATCC"),":\n",(0,r.kt)("img",{src:n(2736).Z}))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Nirvana can use ",(0,r.kt)("strong",{parentName:"p"},"multiple reading frames")," to aggregate the seed codon. In this example, the seed codon is highlighted in green. If we look at reading frame 1, we see that the T\u2192A variant occurs in the ",(0,r.kt)("inlineCode",{parentName:"p"},"ACT")," codon. The adjacent codon to the left also has a variant C\u2192T. As a result, there can be up to four bases between SNVs when aggregating the flanking codons. The recomposed alternate allele is ",(0,r.kt)("inlineCode",{parentName:"p"},"TTCACATAGCACTCAC"),":\n",(0,r.kt)("img",{src:n(95903).Z}))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Nothing will be recomposed if there's no seed codon:\n",(0,r.kt)("img",{src:n(22159).Z})))),(0,r.kt)("h3",{id:"multiple-samples"},"Multiple Samples"),(0,r.kt)("p",null,"Recomposing variants while handling multiple samples can be complex. The recomposition criteria described above often leads to sample-specific recomposed variants. Here we show the recomposition of three variants with sample-specific criteria marked in bold:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Sample 1"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Sample 2"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Sample 3"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},(0,r.kt)("strong",{parentName:"td"},"0/1")),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 3"),(0,r.kt)("td",{parentName:"tr",align:"center"},"102"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},(0,r.kt)("strong",{parentName:"td"},".")),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AC"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AG, CG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"."),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","2"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"ACT"),(0,r.kt)("td",{parentName:"tr",align:"center"},"CCT, CCA"),(0,r.kt)("td",{parentName:"tr",align:"center"},"."),(0,r.kt)("td",{parentName:"tr",align:"center"},"."),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","2")))),(0,r.kt)("p",null,"In the example above, the heterozygous genotype in sample 1 at position 101 would prevent the MNVs from being recomposed. Similarly, the unknown genotype for sample 2 at position 102 would produce a smaller MNV than the one expressed for sample 3."),(0,r.kt)("h3",{id:"phase-sets"},"Phase Sets"),(0,r.kt)("h4",{id:"homozygous-variants-same-phase-set"},"Homozygous variants, same phase set"),(0,r.kt)("p",null,"Recomposed phase set becomes ",(0,r.kt)("inlineCode",{parentName:"p"},".")," since homozygous variants belong to all phase sets."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Genotype"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Phase Set"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AC"),(0,r.kt)("td",{parentName:"tr",align:"center"},"TG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")))),(0,r.kt)("h4",{id:"mixing-phased-and-unphased-variants"},"Mixing phased and unphased variants"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Genotype"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Phase Set"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1/1"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AC"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AG,TG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")))),(0,r.kt)("h4",{id:"variants-in-different-phase-sets"},"Variants in different phase sets"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Genotype"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Phase Set"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"890")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AC"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AG,TG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","2"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")))),(0,r.kt)("h4",{id:"unphased-homozygous-variants"},"Unphased homozygous variants"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Genotype"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Phase Set"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1/1"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1/1"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AC"),(0,r.kt)("td",{parentName:"tr",align:"center"},"TG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1/1"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")))),(0,r.kt)("h4",{id:"homozygous-variants-are-not-commutative"},"Homozygous variants are not commutative"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Genotype"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Phase Set"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 3"),(0,r.kt)("td",{parentName:"tr",align:"center"},"102"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"890")))),(0,r.kt)("p",null,"In this example, the homozygous variant at position 101 cannot bridge the gap between other two variants since there could be a switching error between phase sets 567 & 890. As a result, we have to create two overlapping MNVs:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Genotype"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Phase Set"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AC"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AG, TG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"CG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"GG, GT"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"890")))),(0,r.kt)("h3",{id:"conflicting-genotypes"},"Conflicting Genotypes"),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)("p",null,"Given the following VCF entries:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S2 S3\nchr1 12861477 . T C . PASS . GT:PS 0/0:. 0/0:. 0|1:12861477\nchr1 12861478 . G A . PASS . GT:PS 0/0:. 0/0:. 0|1:12861477\n")),(0,r.kt)("p",null,"Each original variant would be annotated as usual. The difference is that both will now have a ",(0,r.kt)("inlineCode",{parentName:"p"},"isDecomposedVariant")," flag set to true in addition to an entry in the ",(0,r.kt)("inlineCode",{parentName:"p"},"linkedVids")," field that points to the new MNV:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json",metastring:"{31-34,70-73}","{31-34,70-73}":!0},'{\n "chromosome":"chr1",\n "position":12861477,\n "refAllele":"T",\n "altAlleles":[\n "C"\n ],\n "filters":[\n "PASS"\n ],\n "samples":[\n {\n "genotype":"0/0",\n },\n {\n "genotype":"0/0",\n },\n {\n "genotype":"0|1",\n }\n ],\n "variants":[\n {\n "vid":"1-12861477-T-C",\n "chromosome":"chr1",\n "begin":12861477,\n "end":12861477,\n "refAllele":"T",\n "altAllele":"C",\n "variantType":"SNV",\n "isDecomposedVariant":true,\n "linkedVids":[\n "1-12861477-TG-CA"\n ],\n "hgvsg":"NC_000001.11:g.12861477T>C",\n "transcripts":[ ... ]\n }\n ]\n},\n{\n "chromosome":"chr1",\n "position":12861478,\n "refAllele":"G",\n "altAlleles":[\n "A"\n ],\n "filters":[\n "PASS"\n ],\n "samples":[\n {\n "genotype":"0/0",\n },\n {\n "genotype":"0/0",\n },\n {\n "genotype":"0|1",\n }\n ],\n "variants":[\n {\n "vid":"1-12861478-G-A",\n "chromosome":"chr1",\n "begin":12861478,\n "end":12861478,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "isDecomposedVariant":true,\n "linkedVids":[\n "1-12861477-TG-CA"\n ],\n "hgvsg":"NC_000001.11:g.12861478G>A",\n "transcripts":[ ... ]\n }\n ]\n}\n')),(0,r.kt)("p",null,"The recomposed variant gets a separate entry where the ",(0,r.kt)("inlineCode",{parentName:"p"},"isRecomposedVariant")," flag is set to true and the ",(0,r.kt)("inlineCode",{parentName:"p"},"linkedVids")," field links to the constituent SNVs:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json",metastring:"{32-36}","{32-36}":!0},' {\n "chromosome": "chr1",\n "position": 12861477,\n "refAllele": "TG",\n "altAlleles": [\n "CA"\n ],\n "filters": [\n "PASS"\n ],\n "cytogeneticBand": "1p36.21",\n "samples": [\n {\n "genotype": "0|0"\n },\n {\n "genotype": "0|0"\n },\n {\n "genotype": "0|1"\n }\n ],\n "variants": [\n {\n "vid": "1-12861477-TG-CA",\n "chromosome": "chr1",\n "begin": 12861477,\n "end": 12861478,\n "refAllele": "TG",\n "altAllele": "CA",\n "variantType": "MNV",\n "isRecomposedVariant": true,\n "linkedVids": [\n "1-12861477-T-C",\n "1-12861478-G-A"\n ],\n "hgvsg": "NC_000001.11:g.12861477_12861478inv",\n "transcripts":[ ... ]\n ]\n }\n ]\n },\n')),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Recomposed QUAL, FILTER, and GQ")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Although the example above does not demonstrate it, Nirvana tries to set the quality score, filter, and genotype quality (GQ) for the recomposed variant. The QUAL score is calculated to be the ",(0,r.kt)("strong",{parentName:"p"},"minimum")," QUAL score for all the constituent SNVs. The same method is used for the genotype quality (GQ) scores. For the ",(0,r.kt)("inlineCode",{parentName:"p"},"filters")," field, ",(0,r.kt)("inlineCode",{parentName:"p"},"PASS")," will be used if all constituent variants passed their filters, otherwise we set it to ",(0,r.kt)("inlineCode",{parentName:"p"},"FilteredVariantsRecomposed"),"."))))}d.isMDXComponent=!0},71481:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/BCFtools-csq-fig1a-a266b0be1c6d74f085fcacb2f433f750.png"},95903:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/multiple-reading-frames-19e896fe74a8781afdd1fa2539edff88.png"},22159:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/no-recomposition-b63eb855b0ed62b8ae331eafc538223d.png"},2736:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/three-SNVs-larger-separation-85b12d5bafd32ee312103a1b9b588720.png"},89698:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/three-SNVs-two-codons-bc45a465809b53d51dbfb32deaa6324a.png"}}]); \ No newline at end of file diff --git a/assets/js/25df2835.8922ed1d.js b/assets/js/25df2835.8922ed1d.js deleted file mode 100644 index 9bbf584a..00000000 --- a/assets/js/25df2835.8922ed1d.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2135,3966,4291],{3905:(e,t,n)=>{n.d(t,{Zo:()=>u,kt:()=>N});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var p=a.createContext({}),s=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},u=function(e){var t=s(e.components);return a.createElement(p.Provider,{value:t},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},c=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,l=e.originalType,p=e.parentName,u=i(e,["components","mdxType","originalType","parentName"]),m=s(n),c=r,N=m["".concat(p,".").concat(c)]||m[c]||d[c]||l;return n?a.createElement(N,o(o({ref:t},u),{},{components:n})):a.createElement(N,o({ref:t},u))}));function N(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=n.length,o=new Array(l);o[0]=c;var i={};for(var p in t)hasOwnProperty.call(t,p)&&(i[p]=t[p]);i.originalType=e,i[m]="string"==typeof e?e:r,o[1]=i;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>m,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/1000Genomes-snv-json",id:"version-3.17/data-sources/1000Genomes-snv-json",title:"1000Genomes-snv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/1000Genomes-snv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-snv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/1000Genomes-snv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/1000Genomes-snv-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],s={toc:p},u="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(u,(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":{\n "allAf":0.200879,\n "afrAf":0.210287,\n "amrAf":0.139769,\n "easAf":0.275794,\n "eurAf":0.181909,\n "sasAf":0.173824,\n "allAn":5008,\n "afrAn":1322,\n "amrAn":694,\n "easAn":1008,\n "eurAn":1006,\n "sasAn":978,\n "allAc":1006,\n "afrAc":278,\n "amrAc":97,\n "easAc":278,\n "eurAc":183,\n "sasAc":170\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the African super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the African super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the Ad Mixed American super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the Ad Mixed American super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the East Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the East Asian super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the European super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the European super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the South Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the South Asian super population. Non-zero integer.")))))}m.isMDXComponent=!0},96351:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>m,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/1000Genomes-sv-json",id:"version-3.17/data-sources/1000Genomes-sv-json",title:"1000Genomes-sv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/1000Genomes-sv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-sv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/1000Genomes-sv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/1000Genomes-sv-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],s={toc:p},u="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(u,(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":[\n {\n "chromosome":"1",\n "begin":1595369,\n "end":1612441,\n "variantType": "copy_number_variation",\n "id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",\n "allAn": 5008,\n "allAc": 2702,\n "allAf": 0.539537,\n "afrAf": 0.6052,\n "amrAf": 0.3675,\n "eurAf": 0.5357,\n "easAf": 0.5368,\n "sasAf": 0.5797,\n "reciprocalOverlap": 0.07555\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"range: 0 - 1.")))))}m.isMDXComponent=!0},663:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>p,default:()=>c,frontMatter:()=>i,metadata:()=>s,toc:()=>u});var a=n(87462),r=(n(67294),n(3905)),l=n(84517),o=n(96351);const i={title:"1000 Genomes"},p=void 0,s={unversionedId:"data-sources/1000Genomes",id:"version-3.17/data-sources/1000Genomes",title:"1000 Genomes",description:"Overview",source:"@site/versioned_docs/version-3.17/data-sources/1000Genomes.mdx",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/1000Genomes",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/1000Genomes.mdx",tags:[],version:"3.17",frontMatter:{title:"1000 Genomes"},sidebar:"version-3.17/docs",previous:{title:"Annotating COVID-19",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/introduction/covid19"},next:{title:"Amino Acid Conservation",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/amino-acid-conservation"}},u=[{value:"Overview",id:"overview",children:[],level:2},{value:"Populations",id:"populations",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF File Parsing",id:"vcf-file-parsing",children:[{value:"Conflict Resolution",id:"conflict-resolution",children:[],level:4}],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2},{value:"Structural Variants",id:"structural-variants",children:[{value:"VCF File Parsing",id:"vcf-file-parsing-1",children:[],level:3},{value:"Converting VCF svTypes to SO sequence alterations",id:"converting-vcf-svtypes-to-so-sequence-alterations",children:[{value:"Exceptions",id:"exceptions",children:[],level:4}],level:3}],level:2},{value:"JSON Output",id:"json-output-1",children:[],level:2}],m={toc:u},d="wrapper";function c(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"The goal of the 1000 Genomes Project was to find most genetic variants with frequencies of at least 1% in the populations studied. It was the first project to sequence the genomes of a large number of people, to provide a comprehensive resource on human genetic variation. Data from the 1000 Genomes Project was quickly made available to the worldwide scientific community through freely accessible public databases."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Sudmant, P., Rausch, T., Gardner, E. et al. An integrated map of structural variation in 2,504 human genomes. ",(0,r.kt)("em",{parentName:"p"},"Nature 526"),", 75\u201381 (2015). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1038/nature15394"},"https://doi.org/10.1038/nature15394")))),(0,r.kt)("h2",{id:"populations"},"Populations"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"The super population membership can be found here: (",(0,r.kt)("a",{parentName:"li",href:"http://www.1000genomes.org/category/population/"},"http://www.1000genomes.org/category/population/"),")"),(0,r.kt)("li",{parentName:"ul"},"We want to capture the allele frequencies for all 26 populations as well as the 5 super populations and the total population.")),(0,r.kt)("h2",{id:"small-variants"},"Small Variants"),(0,r.kt)("h3",{id:"vcf-file-parsing"},"VCF File Parsing"),(0,r.kt)("p",null,"The original VCF files come with allele frequency fields (e.g. ALL_AF, AMR_AF) but we recompute them using allele counts and allele numbers in order to get 6 digit precision. The allele counts and allele numbers (e.g. AMR_AC, AMR_AN) are not expressed in the INFO field. Instead the genotypes need to be parsed to compute that information. Our team converted the original data to VCF entries with allele counts and allele numbers like the following."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 15274 rs62636497 A G,T 100 PASS AC=1739,3210;AF=0.347244,0.640974;AN=5008;NS=2504;DP=23255;EAS_AF=0.4812,0.5188;AMR_AF=0.2752,0.7205;AFR_AF=0.323,0.6369;EUR_AF=0.2922,0.7078;SAS_AF=0.3497,0.6472;AA=g|||;VT=SNP;MULTI_ALLELIC;EAS_AN=1008;EAS_AC=485,523;EUR_AN=1006;EUR_AC=294,712;AFR_AN=1322;AFR_AC=427,842;AMR_AN=694;AMR_AC=191,500;SAS_AN=978;SAS_AC=342,633\n")),(0,r.kt)("p",null,"The ancestral allele, if it exists, is the first value in the pipe separated AA fields (the Indel specific REF, ALT, IndelType fields are ignored)."),(0,r.kt)("p",null,"We parse the VCF file and extract the following fields from INFO:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"AA"),(0,r.kt)("li",{parentName:"ul"},"AC"),(0,r.kt)("li",{parentName:"ul"},"AN"),(0,r.kt)("li",{parentName:"ul"},"EAS_AN"),(0,r.kt)("li",{parentName:"ul"},"AMR_AN"),(0,r.kt)("li",{parentName:"ul"},"AFR_AN"),(0,r.kt)("li",{parentName:"ul"},"EUR_AN"),(0,r.kt)("li",{parentName:"ul"},"SAS_AN"),(0,r.kt)("li",{parentName:"ul"},"EAS_AC"),(0,r.kt)("li",{parentName:"ul"},"AMR_AC"),(0,r.kt)("li",{parentName:"ul"},"AFR_AC"),(0,r.kt)("li",{parentName:"ul"},"EUR_AC"),(0,r.kt)("li",{parentName:"ul"},"SAS_AC")),(0,r.kt)("h4",{id:"conflict-resolution"},"Conflict Resolution"),(0,r.kt)("p",null,"We have observed conflicting allele frequency information in the source. Take the following example:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 20505705 rs35377696 C CTCTG,CTG,CTGTG 100 PASS AC=46,1513,152;AF=0.0091853,0.302117,0.0303514;\n1 20505705 rs35377696 C CTG 100 PASS AC=4;AF=0.000798722;\n")),(0,r.kt)("p",null,"That is, the variant 1-20505705-C-CTG has conflicting entries. To get an idea of how frequently we observe this, here is a table summarizing ChrX and all chromosomes. Note that almost all such entries are found in ChrX."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"center"},"Chromosome"),(0,r.kt)("th",{parentName:"tr",align:"left"},"#"," of alleles"),(0,r.kt)("th",{parentName:"tr",align:"center"},"#"," of conflicting alleles"),(0,r.kt)("th",{parentName:"tr",align:"left"},"percentage"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"chrX"),(0,r.kt)("td",{parentName:"tr",align:"left"},"834800"),(0,r.kt)("td",{parentName:"tr",align:"center"},"2733"),(0,r.kt)("td",{parentName:"tr",align:"left"},"0.33%")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"Total"),(0,r.kt)("td",{parentName:"tr",align:"left"},"21413098"),(0,r.kt)("td",{parentName:"tr",align:"center"},"2743"),(0,r.kt)("td",{parentName:"tr",align:"left"},"0.013%")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Currently"),", we removed the allele frequency of the conflicting allele (i.e., insertion TG in the example) but keep allele frequencies of all other alleles in the VCF line."),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Potential Alternate Solutions")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"Remove all alleles that are contained in the vcf lines which have conflicting allele. (Recommended by 1000 genome group Holly Zheng-Bradley, 7/29/2015)"),(0,r.kt)("li",{parentName:"ul"},"Recalculate the allele frequency for the conflicting allele."),(0,r.kt)("li",{parentName:"ul"},"Pick the allele frequency that has the highest data support.")),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/"},"GRCh37"),"\n",(0,r.kt)("a",{parentName:"p",href:"http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/"},"GRCh38")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(l.default,{mdxType:"JSONSNV"}),(0,r.kt)("h2",{id:"structural-variants"},"Structural Variants"),(0,r.kt)("h3",{id:"vcf-file-parsing-1"},"VCF File Parsing"),(0,r.kt)("p",null,"The VCF files contain entries like the following:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103\n22 16050654 esv3647175;esv3647176;esv3647177;esv3647178 A ,,, 100 PASS AC=9,87,599,20;AF=0.00179712,0.0173722,0.119609,0.00399361;AN=5008;CS=DUP_gs;END=16063474;NS=2504;SVTYPE=CNV;DP=22545;EAS_AF=0.001,0.0169,0.2361,0.0099;AMR_AF=0,0.0101,0.219,0.0072;AFR_AF=0.0061,0.0363,0.0053,0;EUR_AF=0,0.007,0.0944,0.003;SAS_AF=0,0.0082,0.1094,0.002;VT=SV GT 3|0 0|0 0|0 0|0 0|0 0|0 0|4\n")),(0,r.kt)("p",null,"Please note that, CNVs are allele-specific. For example, HG00096 is effectively copy number 4, which would be a net gain on chr22."),(0,r.kt)("p",null,"1000 Genomes contains 5 types of structural variants:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"CNV"),(0,r.kt)("li",{parentName:"ul"},"DEL"),(0,r.kt)("li",{parentName:"ul"},"DUP"),(0,r.kt)("li",{parentName:"ul"},"INS"),(0,r.kt)("li",{parentName:"ul"},"INV")),(0,r.kt)("p",null,"Since data of 1000 genomes is provided in VCF format, we assume that the coordinates follow the vcf format, i.e., there is a padding base for symbolic alleles. So all the interval can be interpreted as ","[BEGIN+1, END]",".\nSimilarly, for all other variant types except insertion, END is far larger than BEGIN. The distribution of BEGIN and END for insertions is summarized below."),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Insertion issues")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"END = BEGIN for 6/165"),(0,r.kt)("li",{parentName:"ul"},"END = BEGIN+2 for 93/165"),(0,r.kt)("li",{parentName:"ul"},"END = BEGIN+3 for 11/165"),(0,r.kt)("li",{parentName:"ul"},"END = BEGIN+4 for 11/165"),(0,r.kt)("li",{parentName:"ul"},"END \u2013 BEGIN range from 5 to 1156 for others.")),(0,r.kt)("h3",{id:"converting-vcf-svtypes-to-so-sequence-alterations"},"Converting VCF svTypes to SO sequence alterations"),(0,r.kt)("p",null,"The svType will be captured in our JSON file under the ",(0,r.kt)("a",{parentName:"p",href:"http://www.sequenceontology.org/browser/current_svn/term/SO:0001059"},"sequenceAlteration")," key. Here's the translation we'll use according to svType in 1000 Genomes."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"svType"),(0,r.kt)("th",{parentName:"tr",align:null},"Alternative Alleles contain "),(0,r.kt)("th",{parentName:"tr",align:null},"sequenceAlteration"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"ALU"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"DUP"),(0,r.kt)("td",{parentName:"tr",align:null},"TRUE"),(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_gain")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"CNV"),(0,r.kt)("td",{parentName:"tr",align:null},"TRUE"),(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_gain (observed_gains >0 and observed_losses =0) ",(0,r.kt)("br",null),"copy_number_loss\xa0(observed_gains = 0 and observed_losses > 0) ",(0,r.kt)("br",null),"copy_number_variation (otherwise)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"DEL"),(0,r.kt)("td",{parentName:"tr",align:null},"TRUE"),(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_loss")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"LINE1"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"SVA"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"INV"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"inversion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"INS"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"insertion")))),(0,r.kt)("h4",{id:"exceptions"},"Exceptions"),(0,r.kt)("p",null,(0,r.kt)("em",{parentName:"p"},"We discard structural variants without END")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103\n21 9495848 esv3646347 A 100 PASS AC=1543;AF=0.308107;AN=5008;CS=L1_umary;MEINFO=LINE1,5669,6005,+;NS=2504;SVLEN=336;SVTYPE=LINE1;TSD=null;DP=20015;EAS_AF=0.3125;AMR_AF=0.2911;AFR_AF=0.3026;EUR_AF=0.2922;SAS_AF=0.3395;VT=SV GT 0|0 1|1 1|0 0|1 1|0 1|0 0|0\n")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"CNVs in chrY")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"No other types of structural variants exist in chrY"),(0,r.kt)("li",{parentName:"ul"},'Since copy number is provided in genotype field, we directly parse the copy number from "CN" field.'),(0,r.kt)("li",{parentName:"ul"},"For most CNVs in chrY, the reference copy number is 1, but the refence number for CNVs in segmental duplication sites is 2 ("," in the 2nd example). All segmental duplication calls have identifiers starting with GS_SD_M2.")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00101 HG00103 HG00105 HG00107 HG00108\nY 2888555 CNV_Y_2888555_3014661 T 100 PASS AC=1;AF=0.000817661;AN=1223;END=3014661;NS=1233;SVTYPE=CNV;AMR_AF=0.0000;AFR_AF=0.0000;EUR_AF=0.0000;SAS_AF=0.0019;EAS_AF=0.0000;VT=SV GT:CN:CNL:CNP:CNQ:GP:GQ:PL 0:1:-1000,0,-58.45:-1000,0,-61.55:99:0,-61.55:99:0,585 0:1:-296.36,0,-16.6:-300.46,0,-19.7:99:0,-19.7:99:0,166 0:1:-1000,0,-39.44:-1000,0,-42.54:99:0,-42.54:99:0,394\nY 6128381 GS_SD_M2_Y_6128381_6230094_Y_9650284_9752225 C , 100 PASS AC=4,2;AF=0.00327065,0.00163532;AN=1223;END=6230094;NS=1233;SVTYPE=CNV;AMR_AF=0.0029,0.0029;AFR_AF=0.0016,0.0016;EUR_AF=0.0000,0.0000;SAS_AF=0.0038,0.0000;EAS_AF=0.0000,0.0000;VT=SV;EX_TARGET GT:CN:CNL:CNP:CNQ:GP:GQ 0:2:-1000,-138.78,0,-38.53:-1000,-141.27,0,-41.33:99:0,-141.27,-41.33:99 0:2:-1000,-53.32,0,-17.85:-1000,-55.81,0,-20.64:99:0,-55.81,-20.64:99 0:2:-1000,-71.83,0,-32.5:-1000,-74.32,0,-35.29:99:0,-74.32,-35.29:99 0:2:-1000,-60.96,0,-20.29:-1000,-63.45,0,-23.08:99:0,-63.45,-23.08:99 0:2:-1000,-77.6,0,-31.45:-1000,-80.09,0,-34.24:99:0,-80.09,-34.24:99\n")),(0,r.kt)("h2",{id:"json-output-1"},"JSON Output"),(0,r.kt)(o.default,{mdxType:"JSONSV"}))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/2665e0c5.06944af0.js b/assets/js/2665e0c5.06944af0.js deleted file mode 100644 index ba238832..00000000 --- a/assets/js/2665e0c5.06944af0.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1562],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>f});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var s=a.createContext({}),p=function(t){var e=a.useContext(s),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},m=function(t){var e=p(t.components);return a.createElement(s.Provider,{value:e},t.children)},c="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},u=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,s=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),c=p(n),u=r,f=c["".concat(s,".").concat(u)]||c[u]||d[u]||l;return n?a.createElement(f,o(o({ref:e},m),{},{components:n})):a.createElement(f,o({ref:e},m))}));function f(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=u;var i={};for(var s in e)hasOwnProperty.call(e,s)&&(i[s]=e[s]);i.originalType=t,i[c]="string"==typeof t?t:r,o[1]=i;for(var p=2;p{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>c,frontMatter:()=>l,metadata:()=>i,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/mitomap-small-variants-json",id:"version-3.18/data-sources/mitomap-small-variants-json",title:"mitomap-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/mitomap-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/mitomap-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/mitomap-small-variants-json.md",tags:[],version:"3.18",frontMatter:{}},s=[],p={toc:s},m="wrapper";function c(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},p,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "refAllele":"G",\n "altAllele":"A",\n "diseases":[ \n "Bipolar disorder",\n "Melanoma"\n ],\n "hasHomoplasmy":false,\n "hasHeteroplasmy":true,\n "status":"Reported",\n "clinicalSignificance":"confirmed pathogenic",\n "scorePercentile":83.30,\n "numGenBankFullLengthSeqs":2,\n "pubMedIds":["2316527","6299878","6301949"],\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"diseases"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"associated diseases")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHomoplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHeteroplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"status"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"record status")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"clinicalSignificance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"predicted pathogenicity")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MitoTIP score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numGenBankFullLengthSeqs"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"# of GenBank full-length sequences")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the MITOMAP alternate allele")))))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/268147df.266fb58d.js b/assets/js/268147df.266fb58d.js deleted file mode 100644 index c5a7e820..00000000 --- a/assets/js/268147df.266fb58d.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3514],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>g});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},d=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,d=o(e,["components","mdxType","originalType","parentName"]),p=c(n),m=r,g=p["".concat(s,".").concat(m)]||p[m]||u[m]||i;return n?a.createElement(g,l(l({ref:t},d),{},{components:n})):a.createElement(g,l({ref:t},d))}));function g(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,l=new Array(i);l[0]=m;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[p]="string"==typeof e?e:r,l[1]=o;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>p,frontMatter:()=>i,metadata:()=>o,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const i={},l=void 0,o={unversionedId:"data-sources/clingen-gene-validity-json",id:"version-3.17/data-sources/clingen-gene-validity-json",title:"clingen-gene-validity-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/clingen-gene-validity-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-gene-validity-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/clingen-gene-validity-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/clingen-gene-validity-json.md",tags:[],version:"3.17",frontMatter:{}},s=[],c={toc:s},d="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingenGeneValidity":[\n {\n "diseaseId":"MONDO_0007893",\n "disease":"Noonan syndrome with multiple lentigines",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n },\n {\n "diseaseId":"MONDO_0015280",\n "disease":"cardiofaciocutaneous syndrome",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingenGeneValidity"),(0,r.kt)("td",{parentName:"tr",align:null},"object"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"diseaseId"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Monarch Disease Ontology ID (MONDO)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"disease"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"disease label")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"classification"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see below for possible values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"classificationDate"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"yyyy-MM-dd")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"classification")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no reported evidence"),(0,r.kt)("li",{parentName:"ul"},"disputed"),(0,r.kt)("li",{parentName:"ul"},"limited"),(0,r.kt)("li",{parentName:"ul"},"moderate"),(0,r.kt)("li",{parentName:"ul"},"definitive"),(0,r.kt)("li",{parentName:"ul"},"strong"),(0,r.kt)("li",{parentName:"ul"},"refuted")))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/27ea7395.b24be961.js b/assets/js/27ea7395.b24be961.js deleted file mode 100644 index 20b9dc5b..00000000 --- a/assets/js/27ea7395.b24be961.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2783],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>v});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},p=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,p=o(e,["components","mdxType","originalType","parentName"]),m=c(n),u=r,v=m["".concat(s,".").concat(u)]||m[u]||d[u]||i;return n?a.createElement(v,l(l({ref:t},p),{},{components:n})):a.createElement(v,l({ref:t},p))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,l=new Array(i);l[0]=u;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[m]="string"==typeof e?e:r,l[1]=o;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>i,metadata:()=>o,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const i={title:"Variant IDs"},l=void 0,o={unversionedId:"core-functionality/variant-ids",id:"version-3.17/core-functionality/variant-ids",title:"Variant IDs",description:"Overview",source:"@site/versioned_docs/version-3.17/core-functionality/variant-ids.md",sourceDirName:"core-functionality",slug:"/core-functionality/variant-ids",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/core-functionality/variant-ids",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/core-functionality/variant-ids.md",tags:[],version:"3.17",frontMatter:{title:"Variant IDs"},sidebar:"version-3.17/docs",previous:{title:"MNV Recomposition",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/core-functionality/mnv-recomposition"},next:{title:"Jasix",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/utilities/jasix"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF Examples",id:"vcf-examples",children:[],level:3},{value:"Format",id:"format",children:[],level:3},{value:"VID Examples",id:"vid-examples",children:[],level:3}],level:2},{value:"Translocation Breakends",id:"translocation-breakends",children:[{value:"VCF Example",id:"vcf-example",children:[],level:3},{value:"Format",id:"format-1",children:[],level:3},{value:"VID Example",id:"vid-example",children:[],level:3}],level:2},{value:"All Other Structural Variants",id:"all-other-structural-variants",children:[{value:"VCF Examples",id:"vcf-examples-1",children:[],level:3},{value:"Format",id:"format-2",children:[],level:3},{value:"VID Examples",id:"vid-examples-1",children:[],level:3}],level:2}],c={toc:s},p="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"Many downstream tools use a variant identifier to store annotation results. We've standardized on using variant identifiers (VIDs) that originated from the notation used by the Broad Institute."),(0,r.kt)("p",null,"The Broad VID scheme is not only simple, but it has the advantage that a user could create a bare bones VCF entry from the information captured in the identifier. One of the limitations of the Broad VID scheme is that it does not define how to handle structural variants. Our VID scheme attempts to fill that gap."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Conventions")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("ul",{parentName:"div"},(0,r.kt)("li",{parentName:"ul"},"all chromosomes use Ensembl style notation (i.e. 22 instead of chr22)"),(0,r.kt)("li",{parentName:"ul"},"for a reference variant (i.e. no alt allele), replace the period (.) with the reference base"),(0,r.kt)("li",{parentName:"ul"},"padding bases are used, neither the reference nor alternate allele can be empty"),(0,r.kt)("li",{parentName:"ul"},"some large variant callers lazily output N for the reference allele. If this is the case, replace it with the true reference base")))),(0,r.kt)("h2",{id:"small-variants"},"Small Variants"),(0,r.kt)("h3",{id:"vcf-examples"},"VCF Examples"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 66507 . T A 184.45 PASS .\nchr1 66521 . T TATATA 144.53 PASS .\nchr1 66572 . GTA G,GTACTATATATTATA 45.45 PASS .\n")),(0,r.kt)("h3",{id:"format"},"Format"),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},"chromosome"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"reference allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"alternate allele")),(0,r.kt)("h3",{id:"vid-examples"},"VID Examples"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"1-66507-T-A"),(0,r.kt)("li",{parentName:"ul"},"1-66521-T-TATATA"),(0,r.kt)("li",{parentName:"ul"},"1-66572-GTA-G"),(0,r.kt)("li",{parentName:"ul"},"1-66572-G-GTACTATATATTA")),(0,r.kt)("h2",{id:"translocation-breakends"},"Translocation Breakends"),(0,r.kt)("h3",{id:"vcf-example"},"VCF Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 2617277 . A AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[ . PASS SVTYPE=BND\n")),(0,r.kt)("h3",{id:"format-1"},"Format"),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},"chromosome"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"reference allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"alternate allele")),(0,r.kt)("h3",{id:"vid-example"},"VID Example"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"1-2617277-A-AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[")),(0,r.kt)("h2",{id:"all-other-structural-variants"},"All Other Structural Variants"),(0,r.kt)("h3",{id:"vcf-examples-1"},"VCF Examples"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 1000 . G . PASS END=3001000;SVTYPE=ROH\nchr1 1350082 . G . PASS END=1351320;SVTYPE=DEL\nchr1 1477854 . C . PASS END=1477984;SVTYPE=DUP\nchr1 1477968 . T . PASS END=1477968;SVTYPE=INS\nchr1 1715898 . N . PASS SVTYPE=CNV;END=1750149\nchr1 2650426 . N . PASS SVTYPE=CNV;END=2653074\nchr2 321682 . T . PASS SVTYPE=INV;END=421681\nchr20 2633403 . G . PASS END=2633421\n")),(0,r.kt)("h3",{id:"format-2"},"Format"),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},"chromosome"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"end position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"reference allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"alternate allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"SVTYPE")),(0,r.kt)("h3",{id:"vid-examples-1"},"VID Examples"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"1-1000-3001000-G-","<","ROH",">","-ROH"),(0,r.kt)("li",{parentName:"ul"},"1-1350082-1351320-G-","<","DEL",">","-DEL"),(0,r.kt)("li",{parentName:"ul"},"1-1477854-1477984-C-","<","DUP:TANDEM",">","-DUP"),(0,r.kt)("li",{parentName:"ul"},"1-1477968-1477968-T-","<","INS",">","-INS"),(0,r.kt)("li",{parentName:"ul"},"1-1715898-1750149-A-","<","DUP",">","-CNV ",(0,r.kt)("strong",{parentName:"li"},"(replace the N with A)")),(0,r.kt)("li",{parentName:"ul"},"1-2650426-2653074-N-","<","DEL",">","-CNV ",(0,r.kt)("strong",{parentName:"li"},"(keep the N)")),(0,r.kt)("li",{parentName:"ul"},"2-321682-421681-T-","<","INV",">","-INV"),(0,r.kt)("li",{parentName:"ul"},"20-2633403-2633421-G-","<","STR2",">","-STR")))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/28131b90.ffe89422.js b/assets/js/28131b90.ffe89422.js deleted file mode 100644 index 9e7c0514..00000000 --- a/assets/js/28131b90.ffe89422.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9491],{3905:(e,t,n)=>{n.d(t,{Zo:()=>s,kt:()=>f});var r=n(67294);function o(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function a(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}var i=r.createContext({}),p=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},s=function(e){var t=p(e.components);return r.createElement(i.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,o=e.mdxType,a=e.originalType,i=e.parentName,s=c(e,["components","mdxType","originalType","parentName"]),u=p(n),d=o,f=u["".concat(i,".").concat(d)]||u[d]||m[d]||a;return n?r.createElement(f,l(l({ref:t},s),{},{components:n})):r.createElement(f,l({ref:t},s))}));function f(e,t){var n=arguments,o=t&&t.mdxType;if("string"==typeof e||o){var a=n.length,l=new Array(a);l[0]=d;var c={};for(var i in t)hasOwnProperty.call(t,i)&&(c[i]=t[i]);c.originalType=e,c[u]="string"==typeof e?e:o,l[1]=c;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>a,metadata:()=>c,toc:()=>i});var r=n(87462),o=(n(67294),n(3905));const a={},l=void 0,c={unversionedId:"data-sources/phylop-json",id:"version-3.18/data-sources/phylop-json",title:"phylop-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/phylop-json.md",sourceDirName:"data-sources",slug:"/data-sources/phylop-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/phylop-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/phylop-json.md",tags:[],version:"3.18",frontMatter:{}},i=[],p={toc:i},s="wrapper";function u(e){let{components:t,...n}=e;return(0,o.kt)(s,(0,r.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-json"},'"variants":[\n {\n "vid":"2:48010488:A",\n "chromosome":"chr2",\n "begin":48010488,\n "end":48010488,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "phylopScore":0.459\n }\n] \n')),(0,o.kt)("table",null,(0,o.kt)("thead",{parentName:"table"},(0,o.kt)("tr",{parentName:"thead"},(0,o.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,o.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,o.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,o.kt)("tbody",{parentName:"table"},(0,o.kt)("tr",{parentName:"tbody"},(0,o.kt)("td",{parentName:"tr",align:"left"},"phylopScore"),(0,o.kt)("td",{parentName:"tr",align:"center"},"float"),(0,o.kt)("td",{parentName:"tr",align:"left"},"range: -14.08 to 6.424")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/284deb6b.a0b2a7a4.js b/assets/js/284deb6b.a0b2a7a4.js deleted file mode 100644 index 78fcb608..00000000 --- a/assets/js/284deb6b.a0b2a7a4.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8616,7185,6625,6446,6754],{3905:(e,t,n)=>{n.d(t,{Zo:()=>s,kt:()=>g});var a=n(67294);function l(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(l[n]=e[n]);return l}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(l[n]=e[n])}return l}var p=a.createContext({}),m=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},s=function(e){var t=m(e.components);return a.createElement(p.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},N=a.forwardRef((function(e,t){var n=e.components,l=e.mdxType,r=e.originalType,p=e.parentName,s=o(e,["components","mdxType","originalType","parentName"]),u=m(n),N=l,g=u["".concat(p,".").concat(N)]||u[N]||d[N]||r;return n?a.createElement(g,i(i({ref:t},s),{},{components:n})):a.createElement(g,i({ref:t},s))}));function g(e,t){var n=arguments,l=t&&t.mdxType;if("string"==typeof e||l){var r=n.length,i=new Array(r);i[0]=N;var o={};for(var p in t)hasOwnProperty.call(t,p)&&(o[p]=t[p]);o.originalType=e,o[u]="string"==typeof e?e:l,i[1]=o;for(var m=2;m{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>r,metadata:()=>o,toc:()=>p});var a=n(87462),l=(n(67294),n(3905));const r={},i=void 0,o={unversionedId:"data-sources/gnomad-lof-json",id:"version-3.18/data-sources/gnomad-lof-json",title:"gnomad-lof-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/gnomad-lof-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-lof-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gnomad-lof-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/gnomad-lof-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],m={toc:p},s="wrapper";function u(e){let{components:t,...n}=e;return(0,l.kt)(s,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD":{ \n "pLi":1.00e0,\n "pNull":8.94e-40,\n "pRec":1.84e-16,\n "synZ":-8.44e-2,\n "misZ":5.96e-1,\n "loeuf":1.13e0\n}\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pLi"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pNull"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pRec"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"synZ"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"misZ"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))))}u.isMDXComponent=!0},42182:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>r,metadata:()=>o,toc:()=>p});var a=n(87462),l=(n(67294),n(3905));const r={},i=void 0,o={unversionedId:"data-sources/gnomad-small-variants-json",id:"version-3.18/data-sources/gnomad-small-variants-json",title:"gnomad-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/gnomad-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gnomad-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/gnomad-small-variants-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],m={toc:p},s="wrapper";function u(e){let{components:t,...n}=e;return(0,l.kt)(s,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomad":{ \n "coverage":20,\n "allAf":0.190317,\n "maleAf":0.193,\n "femaleAf": 0.1935,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "maleAn":15096,\n "femaleAn":15700\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "maleAc":2930,\n "femaleAc": 2931,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "maleHc":280,\n "femaleHc":281,\n "controlsAllAf":0.190317,\n "controlsAllAn":30796,\n "controlsAllAc":5861,\n "lowComplexityRegion":true,\n "failedFilter":true\n}\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"coverage"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the controls subset. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for male population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for female population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the controls subset. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for male population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for female population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the controls subset. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the South Asian population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the South Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the South Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"lowComplexityRegion"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant is located in a low complexity region.")))))}u.isMDXComponent=!0},78090:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>r,metadata:()=>o,toc:()=>p});var a=n(87462),l=(n(67294),n(3905));const r={},i=void 0,o={unversionedId:"data-sources/gnomad-structural-variants-data_description",id:"version-3.18/data-sources/gnomad-structural-variants-data_description",title:"gnomad-structural-variants-data_description",description:"Bed Example",source:"@site/versioned_docs/version-3.18/data-sources/gnomad-structural-variants-data_description.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-structural-variants-data_description",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gnomad-structural-variants-data_description",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/gnomad-structural-variants-data_description.md",tags:[],version:"3.18",frontMatter:{}},p=[{value:"Bed Example",id:"bed-example",children:[],level:4},{value:"TSV Example",id:"tsv-example",children:[],level:4},{value:"Structural Variant Type Mapping",id:"structural-variant-type-mapping",children:[],level:4}],m={toc:p},s="wrapper";function u(e){let{components:t,...n}=e;return(0,l.kt)(s,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("h4",{id:"bed-example"},"Bed Example"),(0,l.kt)("p",null,"The bed file was obtained from original source for GRCh37"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"#chrom start end name svtype ALGORITHMS BOTHSIDES_SUPPORT CHR2 CPX_INTERVALS CPX_TYPE END2 ENDEVIDENCE HIGH_SR_BACKGROUND PCRPLUS_DEPLETED PESR_GT_OVERDISPERSION POS2 PROTEIN_CODING__COPY_GAIN PROTEIN_CODING__DUP_LOF PROTEIN_CODING__DUP_PARTIAL PROTEIN_CODING__INTERGENIC PROTEIN_CODING__INTRONIC PROTEIN_CODING__INV_SPAN PROTEIN_CODING__LOF PROTEIN_CODING__MSV_EXON_OVR PROTEIN_CODING__NEAREST_TSS PROTEIN_CODING__PROMOTER PROTEIN_CODING__UTR SOURCE STRANDS SVLEN SVTYPE UNRESOLVED_TYPE UNSTABLE_AF_PCRPLUS VARIABLE_ACROSS_BATCHES AN AC AF N_BI_GENOS N_HOMREF N_HET N_HOMALT FREQ_HOMREF FREQ_HET FREQ_HOMALT MALE_AN MALE_AC MALE_AF MALE_N_BI_GENOS MALE_N_HOMREF MALE_N_HET MALE_N_HOMALT MALE_FREQ_HOMREF MALE_FREQ_HET MALE_FREQ_HOMALT MALE_N_HEMIREF MALE_N_HEMIALT MALE_FREQ_HEMIREF MALE_FREQ_HEMIALT PAR FEMALE_AN FEMALE_AC FEMALE_AF FEMALE_N_BI_GENOS FEMALE_N_HOMREF FEMALE_N_HET FEMALE_N_HOMALT FEMALE_FREQ_HOMREF FEMALE_FREQ_HET FEMALE_FREQ_HOMALT POPMAX_AF AFR_AN AFR_AC AFR_AF AFR_N_BI_GENOS AFR_N_HOMREF AFR_N_HET AFR_N_HOMALT AFR_FREQ_HOMREF AFR_FREQ_HEAFR_FREQ_HOMALT AFR_MALE_AN AFR_MALE_AC AFR_MALE_AF AFR_MALE_N_BI_GENOS AFR_MALE_N_HOMREF AFR_MALE_N_HET AFR_MALE_N_HOMALT AFR_MALE_FREQ_HOMREF AFR_MALE_FREQ_HET AFR_MALE_FREQ_HOMALT AFR_MALE_N_HEMIREF AFR_MALE_N_HEMIALT AFR_MALE_FREQ_HEMIREF AFR_MALE_FREQ_HEMIALT AFR_FEMALE_AN AFR_FEMALE_AC AFR_FEMALE_AF AFR_FEMALE_N_BI_GENOS AFR_FEMALE_N_HOMREF AFR_FEMALE_N_HET AFR_FEMALE_N_HOMALT AFR_FEMALE_FREQ_HOMREF AFR_FEMALE_FREQ_HET AFR_FEMALE_FREQ_HOMALT AMR_AN AMR_AC AMR_AF AMR_N_BI_GENOS AMR_N_HOMREF AMR_N_HET AMR_N_HOMALT AMR_FREQ_HOMREF AMR_FREQ_HET AMR_FREQ_HOMALT AMR_MALE_AN AMR_MALE_AC AMR_MALE_AF AMR_MALE_N_BI_GENOS AMR_MALE_N_HOMREF AMR_MALE_N_HET AMR_MALE_N_HOMALT AMR_MALE_FREQ_HOMREF AMR_MALE_FREQ_HET AMR_MALE_FREQ_HOMALT AMR_MALE_N_HEMIREF AMR_MALE_N_HEMIALT AMR_MALE_FREQ_HEMIREF AMR_MALE_FREQ_HEMIALT AMR_FEMALE_AN AMR_FEMALE_AC AMR_FEMALE_AF AMR_FEMALE_N_BI_GENOS AMR_FEMALE_N_HOMREF AMR_FEMALE_N_HET AMR_FEMALE_N_HOMALT AMR_FEMALE_FREQ_HOMREF AMR_FEMALE_FREQ_HET AMR_FEMALE_FREQ_HOMALT EAS_AN EAS_AC EAS_AF EAS_N_BI_GENOS EAS_N_HOMREF EAS_N_HET EAS_N_HOMALT EAS_FREQ_HOMREF EAS_FREQ_HET EAS_FREQ_HOMALT EAS_MALE_AN EAS_MALE_AC EAS_MALE_AF EAS_MALE_N_BI_GENOS EAS_MALE_N_HOMREF EAS_MALE_N_HET EAS_MALE_N_HOMALT EAS_MALE_FREQ_HOMREF EAS_MALE_FREQ_HET EAS_MALE_FREQ_HOMALT EAS_MALE_N_HEMIREF EAS_MALE_N_HEMIALT EAS_MALE_FREQ_HEMIREF EAS_MALE_FREQ_HEMIALT EAS_FEMALE_AN EAS_FEMALE_AC EAS_FEMALE_AF EAS_FEMALE_N_BI_GENOS EAS_FEMALE_N_HOMREF EAS_FEMALE_N_HET EAS_FEMALE_N_HOMALT EAS_FEMALE_FREQ_HOMREF EAS_FEMALE_FREQ_HET EAS_FEMALE_FREQ_HOMALT EUR_AN EUR_AC EUR_AF EUR_N_BI_GENOS EUR_N_HOMREF EUR_N_HET EUR_N_HOMALT EUR_FREQ_HOMREF EUR_FREQ_HET EUR_FREQ_HOMALT EUR_MALE_AN EUR_MALE_AC EUR_MALE_AF EUR_MALE_N_BI_GENOS EUR_MALE_N_HOMREF EUR_MALE_N_HET EUR_MALE_N_HOMALT EUR_MALE_FREQ_HOMREF EUR_MALE_FREQ_HET EUR_MALE_FREQ_HOMALT EUR_MALE_N_HEMIREF EUR_MALE_N_HEMIALT EUR_MALE_FREQ_HEMIREF EUR_MALE_FREQ_HEMIALT EUR_FEMALE_AN EUR_FEMALE_AC EUR_FEMALE_AF EUR_FEMALE_N_BI_GENOS EUR_FEMALE_N_HOMREF EUR_FEMALE_N_HET EUR_FEMALE_N_HOMALT EUR_FEMALE_FREQ_HOMREF EUR_FEMALE_FREQ_HET EUR_FEMALE_FREQ_HOMALT OTH_AN OTH_AC OTH_AF OTH_N_BI_GENOS OTH_N_HOMREF OTH_N_HET OTH_N_HOMALT OTH_FREQ_HOMREF OTH_FREQ_HET OTH_FREQ_HOMALT OTH_MALE_AN OTH_MALE_AC OTH_MALE_AF OTH_MALE_N_BI_GENOS OTH_MALE_N_HOMREF OTH_MALE_N_HET OTH_MALE_N_HOMALT OTH_MALE_FREQ_HOMREF OTH_MALE_FREQ_HET OTH_MALE_FREQ_HOMALT OTH_MALE_N_HEMIREF OTH_MALE_N_HEMIALT OTH_MALE_FREQ_HEMIREF OTH_MALE_FREQ_HEMIALT OTH_FEMALE_AN OTH_FEMALE_AC OTH_FEMALE_AF OTH_FEMALE_N_BI_GENOS OTH_FEMALE_N_HOMREF OTH_FEMALE_N_HET OTH_FEMALE_N_HOMALT OTH_FEMALE_FREQ_HOMREF OTH_FEMALE_FREQ_HET OTH_FEMALE_FREQ_HOMALT FILTER\n1 10641 10642 gnomAD-SV_v2.1_BND_1_1 BND manta False 15 NA NA 10643 10643 PE,SR False False True 10642 NA NA NA False NA NA NA NA NA NA NA NA NA -1 BND SINGLE_ENDER_-- False False 21366 145 0.006785999983549118 10683 10543 135 5 0.9868950247764587 0.012636899948120117 0.00046803298755548894 10866 69 0.00634999992325902 5433 5366 65 2 0.987667977809906 0.011963900178670883 0.000368120992789045 NA NA NA NA False 10454 76 0.007269999943673615227 5154 70 3 0.9860339760780334 0.013392000459134579 0.0005739430198445916 0.015956999734044075 93972 0.007660999894142151 4699 4629 68 2 0.9851030111312866 0.014471200294792652 0.0004256220126990229 5154 33 0.006403000093996525 2577 2544 33 0 0.9871940016746521 0.012805599719285965 0.0NA NA NA NA 4232 39 0.009216000325977802 2116 2079 35 2 0.9825140237808228 0.01654059998691082 0.0009451800142414868 1910 7 0.003664999967440963 955 949 5 1 0.9937170147895813 0.00523559981957078 0.001047119963914156 950 4 0.004211000166833401 475 472 2 1 0.9936839938163757 0.00421052984893322 0.0021052600350230932 NA NA NA NA 952 3 0.0031510000117123127 476473 3 0 0.9936969876289368 0.006302520167082548 0.0 2296 31 0.013501999899744987 1148 11131 0 0.9729970097541809 0.02700350061058998 0.0 1312 13 0.009909000247716904 656 643 13 0.9801830053329468 0.01981710083782673 0.0 NA NA NA NA 976 18 0.018442999571561813 488470 18 0 0.9631149768829346 0.03688519820570946 0.0 7574 32 0.004224999807775021 3787 37528 2 0.9920780062675476 0.007393720094114542 0.0005281229969114065 3374 17 0.005038999952375889 1681671 15 1 0.9905160069465637 0.008891520090401173 0.000592768017668277 NA NA NA NA 41815 0.003587000072002411 2091 2077 13 1 0.9933050274848938 0.006217120215296745 0.00047823999193497188 3 0.015956999734044075 94 91 3 0 0.968084990978241 0.03191490098834038 0.0 76 0.026316000148653984 38 36 2 0 0.9473680257797241 0.05263160169124603 0.0 NA NA NA NA 112 1 0.008929000236093998 56 55 1 0 0.982142984867096 0.017857100814580917 0.0UNRESOLVED \n")),(0,l.kt)("h4",{id:"tsv-example"},"TSV Example"),(0,l.kt)("p",null,"The tsv was obtained from lifted over dataset created by dbVar for GRCh38"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"#variant_call_accession variant_call_id variant_call_type experiment_id sample_id sampleset_id assembly chrcontig outer_start start inner_start inner_stop stop outer_stop insertion_length variant_region_acc variant_region_id copy_number description validation zygosity origin phenotype hgvs_name placement_method placement_rank placements_per_assembly remap_alignment remap_best_within_cluster remap_coverage remap_diff_chr remap_failure_code allele_count allele_frequency allele_number\nnssv15777856 gnomAD-SV_v2.1_CNV_10_564_alt_1 copy number variation 1 1 GRCh38.p12 10 736806 738184 nsv4039284 10__782746___784124______GRCh37.p13_copy_number_variation 0 Remapped BestAvailable Single First Pass 0 1 AC=21,AFR_AC=10,AMR_AC=9,EAS_AC=0,EUR_AC=2,OTH_AC=0AF=0.038889,AFR_AF=0.044643,AMR_AF=0.03913,EAS_AF=0,EUR_AF=0.023256,OTH_AF=0 AN=540,AFR_AN=224,AMR_AN=230,EAS_AN=0,EUR_AN=86,OTH_AN=0\n")),(0,l.kt)("h4",{id:"structural-variant-type-mapping"},"Structural Variant Type Mapping"),(0,l.kt)("p",null,"The source files represented the structural variants with keys using various naming conventions.\nIn the Nirvana JSON output, these keys will be mapped according to the following. "),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Nirvana JSON SV Type Key"),(0,l.kt)("th",{parentName:"tr",align:null},"GRCh37 Source SV Type Key"),(0,l.kt)("th",{parentName:"tr",align:null},"GRCh38 Source SV Type Key"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"copy_number_variation"),(0,l.kt)("td",{parentName:"tr",align:null}),(0,l.kt)("td",{parentName:"tr",align:null},"copy number variation")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"deletion"),(0,l.kt)("td",{parentName:"tr",align:null},"DEL, CN=0"),(0,l.kt)("td",{parentName:"tr",align:null},"deletion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"duplication"),(0,l.kt)("td",{parentName:"tr",align:null},"DUP"),(0,l.kt)("td",{parentName:"tr",align:null},"duplication")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"insertion"),(0,l.kt)("td",{parentName:"tr",align:null},"INS"),(0,l.kt)("td",{parentName:"tr",align:null},"insertion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"inversion"),(0,l.kt)("td",{parentName:"tr",align:null},"INV"),(0,l.kt)("td",{parentName:"tr",align:null},"inversion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,l.kt)("td",{parentName:"tr",align:null},"INS:ME"),(0,l.kt)("td",{parentName:"tr",align:null},"mobile element insertion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,l.kt)("td",{parentName:"tr",align:null},"INS:ME:ALU"),(0,l.kt)("td",{parentName:"tr",align:null},"alu insertion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,l.kt)("td",{parentName:"tr",align:null},"INS:ME:LINE1"),(0,l.kt)("td",{parentName:"tr",align:null},"line1 insertion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,l.kt)("td",{parentName:"tr",align:null},"INS:ME:SVA"),(0,l.kt)("td",{parentName:"tr",align:null},"sva insertion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"structural alteration"),(0,l.kt)("td",{parentName:"tr",align:null}),(0,l.kt)("td",{parentName:"tr",align:null},"sequence alteration")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"complex_structural_alteration"),(0,l.kt)("td",{parentName:"tr",align:null},"CPX"),(0,l.kt)("td",{parentName:"tr",align:null})))))}u.isMDXComponent=!0},40853:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>r,metadata:()=>o,toc:()=>p});var a=n(87462),l=(n(67294),n(3905));const r={},i=void 0,o={unversionedId:"data-sources/gnomad-structural-variants-json",id:"version-3.18/data-sources/gnomad-structural-variants-json",title:"gnomad-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/gnomad-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gnomad-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/gnomad-structural-variants-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],m={toc:p},s="wrapper";function u(e){let{components:t,...n}=e;return(0,l.kt)(s,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD-preview": [\n {\n "chromosome": "1",\n "begin": 40001,\n "end": 47200,\n "variantId": "gnomAD-SV_v2.1_DUP_1_1",\n "variantType": "duplication",\n "failedFilter": true,\n "allAf": 0.068963,\n "afrAf": 0.135694,\n "amrAf": 0.022876,\n "easAf": 0.01101,\n "eurAf": 0.007846,\n "othAf": 0.017544,\n "femaleAf": 0.065288,\n "maleAf": 0.07255,\n "allAc": 943,\n "afrAc": 866,\n "amrAc": 21,\n "easAc": 17,\n "eurAc": 37,\n "othAc": 2,\n "femaleAc": 442,\n "maleAc": 499,\n "allAn": 13674,\n "afrAn": 6382,\n "amrAn": 918,\n "easAn": 1544,\n "eurAn": 4716,\n "othAn": 114,\n "femaleAn": 6770,\n "maleAn": 6878,\n "allHc": 91,\n "afrHc": 90,\n "amrHc": 1,\n "easHc": 0,\n "eurHc": 0,\n "othHc": 55,\n "femaleHc": 44,\n "maleHc": 47,\n "reciprocalOverlap": 0.01839,\n "annotationOverlap": 0.16667\n }\n]\n\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,l.kt)("td",{parentName:"tr",align:null},"string"),(0,l.kt)("td",{parentName:"tr",align:null},"chromosome number")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"begin"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"position interval start")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"end"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"position internal end")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"variantType"),(0,l.kt)("td",{parentName:"tr",align:null},"string"),(0,l.kt)("td",{parentName:"tr",align:null},"structural variant type")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"variantId"),(0,l.kt)("td",{parentName:"tr",align:null},"string"),(0,l.kt)("td",{parentName:"tr",align:null},"gnomAD ID")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for all other populations. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for all populations.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the African super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Ad Mixed American super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"eurAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the European super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for all other populations.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for male population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for female population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for all populations.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the African super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Ad Mixed American super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"eurAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the European super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for all other populations.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for female population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for male population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the African / African American population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Latino population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the East Asian population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"eurAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the European super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all other populations.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,l.kt)("td",{parentName:"tr",align:null},"boolean"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"Reciprocal overlap. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"Reciprocal overlap. Range: 0 - 1.0")))),(0,l.kt)("p",null,(0,l.kt)("strong",{parentName:"p"},"Note:")," Following fields are not available in ",(0,l.kt)("em",{parentName:"p"},"GRCh38")," because the source file does not contain this information:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAf")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAf")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAn")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAn")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"eurAc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"failedFilter")))))}u.isMDXComponent=!0},67246:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>s,default:()=>c,frontMatter:()=>m,metadata:()=>u,toc:()=>d});var a=n(87462),l=(n(67294),n(3905)),r=n(42182),i=n(77953),o=n(40853),p=n(78090);const m={title:"gnomAD"},s=void 0,u={unversionedId:"data-sources/gnomad",id:"version-3.18/data-sources/gnomad",title:"gnomAD",description:"Overview",source:"@site/versioned_docs/version-3.18/data-sources/gnomad.mdx",sourceDirName:"data-sources",slug:"/data-sources/gnomad",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gnomad",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/gnomad.mdx",tags:[],version:"3.18",frontMatter:{title:"gnomAD"},sidebar:"docs",previous:{title:"GME Variome",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gme"},next:{title:"Mitochondrial Heteroplasmy",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/mito-heteroplasmy"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF extraction",id:"vcf-extraction",children:[],level:3},{value:"Computation",id:"computation",children:[],level:3},{value:"Merging genomes and exomes",id:"merging-genomes-and-exomes",children:[],level:3},{value:"Filters",id:"filters",children:[],level:3},{value:"VCF download instructions",id:"vcf-download-instructions",children:[],level:3},{value:"JSON output",id:"json-output",children:[],level:3},{value:"Building the supplementary files",id:"building-the-supplementary-files",children:[{value:"Source data files",id:"source-data-files",children:[],level:4}],level:3}],level:2},{value:"LoF Gene Metrics",id:"lof-gene-metrics",children:[{value:"Tab delimited file example",id:"tab-delimited-file-example",children:[],level:3},{value:"JSON key to TSV column mapping",id:"json-key-to-tsv-column-mapping",children:[],level:3},{value:"Gene symbol update",id:"gene-symbol-update",children:[],level:3},{value:"Conflict resolution",id:"conflict-resolution",children:[],level:3},{value:"Download URL",id:"download-url",children:[],level:3},{value:"JSON output",id:"json-output-1",children:[],level:3}],level:2},{value:"Structural Variants",id:"structural-variants",children:[{value:"Source Files",id:"source-files",children:[],level:3},{value:"Download URLs",id:"download-urls",children:[{value:"GRCh37",id:"grch37",children:[],level:4},{value:"GRCh38",id:"grch38",children:[],level:4},{value:"Download URL",id:"download-url-1",children:[],level:4}],level:3},{value:"JSON output",id:"json-output-2",children:[],level:3}],level:2}],N={toc:d},g="wrapper";function c(e){let{components:t,...n}=e;return(0,l.kt)(g,(0,a.Z)({},N,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("h2",{id:"overview"},"Overview"),(0,l.kt)("p",null,"The Genome Aggregation Database (",(0,l.kt)("a",{parentName:"p",href:"https://gnomad.broadinstitute.org/"},"gnomAD"),") is a resource developed by an international coalition of investigators, with the goal of aggregating and harmonizing both exome and genome sequencing data from a wide variety of large-scale sequencing projects, and making summary data available for the wider scientific community."),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Koch, L., 2020. Exploring human genomic diversity with gnomAD. ",(0,l.kt)("em",{parentName:"p"},"Nature Reviews Genetics"),", ",(0,l.kt)("strong",{parentName:"p"},"21(8)"),", pp.448-448."))),(0,l.kt)("h2",{id:"small-variants"},"Small Variants"),(0,l.kt)("h3",{id:"vcf-extraction"},"VCF extraction"),(0,l.kt)("p",null,"We currently extract the following info fields from gnomAD genome and exome VCF files:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},'##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n')),(0,l.kt)("p",null,"We also extract the following extra fields from gnomAD exome VCF file:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},'##INFO=\n##INFO=\n##INFO=\n')),(0,l.kt)("h3",{id:"computation"},"Computation"),(0,l.kt)("p",null,"Using these, we compute the following:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"Coverage"),(0,l.kt)("li",{parentName:"ul"},"Allele count, Homozygous count, allele number and allele frequencies for:"),(0,l.kt)("li",{parentName:"ul"},"Global population"),(0,l.kt)("li",{parentName:"ul"},"African/African Americans"),(0,l.kt)("li",{parentName:"ul"},"Admixed Americans"),(0,l.kt)("li",{parentName:"ul"},"Ashkenazi Jews"),(0,l.kt)("li",{parentName:"ul"},"East Asians"),(0,l.kt)("li",{parentName:"ul"},"Finnish"),(0,l.kt)("li",{parentName:"ul"},"Non-Finnish Europeans"),(0,l.kt)("li",{parentName:"ul"},"South Asian"),(0,l.kt)("li",{parentName:"ul"},"Others (population not assigned)"),(0,l.kt)("li",{parentName:"ul"},"Male"),(0,l.kt)("li",{parentName:"ul"},"Female"),(0,l.kt)("li",{parentName:"ul"},"Controls")),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Note")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("ul",{parentName:"div"},(0,l.kt)("li",{parentName:"ul"},"Coverage = DP / AN. Frequencies are computed using AC/AN for each population."),(0,l.kt)("li",{parentName:"ul"},"Please note that currently there is no genome sequencing data of south asian (SAS) population available in gnomAD."),(0,l.kt)("li",{parentName:"ul"},"Allele Count, Homozygous count, allele number and allele frequencies for control groups are also provided for the global population.")))),(0,l.kt)("h3",{id:"merging-genomes-and-exomes"},"Merging genomes and exomes"),(0,l.kt)("p",null,"When merging the genomes and exomes, the allele counts and allele numbers will be summed across both of the data sets."),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("ul",{parentName:"div"},(0,l.kt)("li",{parentName:"ul"},"For GRCh37, Nirvana currently uses gnomAD version 2.1 which contains both genomes and exomes data. Genomes and exomes data are merged in the output."),(0,l.kt)("li",{parentName:"ul"},"For GRCh38, Nirvana currently uses gnomAD version 3.0 which doesn't contain the exomes data. Therefore, only genomes data are presented in the output.")))),(0,l.kt)("h3",{id:"filters"},"Filters"),(0,l.kt)("p",null,"The following strategy will be used when there's a conflict in filter status:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"center"}),(0,l.kt)("th",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"th"},"Genomes PASS")),(0,l.kt)("th",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"th"},"Genomes Filtered")))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"td"},"Exomes PASS")),(0,l.kt)("td",{parentName:"tr",align:"center"},"PASS"),(0,l.kt)("td",{parentName:"tr",align:"center"},"Only use exome data")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"td"},"Exomes Filtered")),(0,l.kt)("td",{parentName:"tr",align:"center"},"Only use genome data"),(0,l.kt)("td",{parentName:"tr",align:"center"},"Filtered")))),(0,l.kt)("h3",{id:"vcf-download-instructions"},"VCF download instructions"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://gnomad.broadinstitute.org/downloads"},"https://gnomad.broadinstitute.org/downloads")),(0,l.kt)("h3",{id:"json-output"},"JSON output"),(0,l.kt)(r.default,{mdxType:"JSONV"}),(0,l.kt)("h3",{id:"building-the-supplementary-files"},"Building the supplementary files"),(0,l.kt)("p",null,"The gnomAD ",(0,l.kt)("inlineCode",{parentName:"p"},".nsa")," for Nirvana can be built using the ",(0,l.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's ",(0,l.kt)("inlineCode",{parentName:"p"},"gnomad")," subcommand. We will describe building gnomAD version 3.1 here."),(0,l.kt)("h4",{id:"source-data-files"},"Source data files"),(0,l.kt)("p",null,"Input VCF files (one per chromosome) and a ",(0,l.kt)("inlineCode",{parentName:"p"},".version")," file are required in a folder to build the ",(0,l.kt)("inlineCode",{parentName:"p"},".nsa")," file. For example, my directory contains:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr10.vcf.bgz chr22.vcf.bgz\nchr11.vcf.bgz chr2.vcf.bgz\nchr12.vcf.bgz chr3.vcf.bgz\nchr13.vcf.bgz chr4.vcf.bgz\nchr14.vcf.bgz chr5.vcf.bgz\nchr15.vcf.bgz chr6.vcf.bgz\nchr16.vcf.bgz chr7.vcf.bgz\nchr17.vcf.bgz chr8.vcf.bgz\nchr18.vcf.bgz chr9.vcf.bgz\nchr19.vcf.bgz chrM.vcf.bgz\nchr1.vcf.bgz chrX.vcf.bgz\nchr20.vcf.bgz chrY.vcf.bgz\nchr21.vcf.bgz gnomad.r3.1.version\n")),(0,l.kt)("p",null,"The version file is a text file with the following content."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"NAME=gnomAD\nVERSION=3.1\nDATE=2020-10-29\nDESCRIPTION=Allele frequencies from Genome Aggregation Database (gnomAD)\n")),(0,l.kt)("p",null,"The help menu for the utility is as follows:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"SAUtils.dll gnomad\n---------------------------------------------------------------------------\nSAUtils (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.17.0\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll gnomad [options]\nReads provided supplementary data files and populates tsv files\n\nOPTIONS:\n --ref, -r compressed reference sequence file\n --genome, -g input directory containing VCF (and .version)\n files with genomic frequencies\n --exome, -e input directory containing VCF (and .version)\n files with exomic frequencies\n --temp, -t output temp directory for intermediate (per chrom)\n NSA files\n --out, -o output directory for NSA file\n --help, -h displays the help menu\n --version, -v displays the version\n")),(0,l.kt)("p",null,"Here is a sample execution:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet ~/Nirvana/bin/Debug/netcoreapp3.1/SAUtils.dll Gnomad \\\\\n--ref ~/References/7/Homo_sapiens.GRCh38.Nirvana.dat --genome genomes/ \\\\\n--out ~/SupplementaryDatabase/63/GRCh38 --temp ~/ExternalDataSources/gnomAD/3.1/GRCh38/temp\n")),(0,l.kt)("h2",{id:"lof-gene-metrics"},"LoF Gene Metrics"),(0,l.kt)("h3",{id:"tab-delimited-file-example"},"Tab delimited file example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"gene transcript obs_mis exp_mis oe_mis mu_mis possible_mis obs_mis_pphen exp_mis_pphen oe_mis_pphen possible_mis_pphen obs_syn exp_syn oe_syn mu_syn possible_syn obs_lof mu_lof possible_lof exp_lof pLI pNull pRec oe_lof oe_syn_lower oe_syn_upper oe_mis_lower oe_mis_upper oe_lof_lower oe_lof_upper constraint_flag syn_zmis_z lof_z oe_lof_upper_rank oe_lof_upper_bin oe_lof_upper_bin_6 n_sites classic_caf max_af no_lofs obs_het_lof obs_hom_lof defined p exp_hom_lof classic_caf_afr classic_caf_amr classic_caf_asj classic_caf_eas classic_caf_fin classic_caf_nfe classic_caf_oth classic_caf_sas p_afr p_amr p_asj p_eas p_fin p_nfep_oth p_sas transcript_type gene_id transcript_level cds_length num_coding_exons gene_type gene_length exac_pLI exac_obs_lof exac_exp_lof exac_oe_lof brain_expression chromosome start_positionend_position\nMED13 ENST00000397786 871 1.1178e+03 7.7921e-01 5.5598e-05 14195 314 5.2975e+02 5.9273e-01 6708 422 3.8753e+02 1.0890e+00 1.9097e-05 4248 0 4.9203e-06 1257 9.8429e+01 1.0000e+00 8.9436e-40 1.8383e-16 0.0000e+00 1.0050e+00 1.1800e+00 7.3600e-01 8.2400e-01 0.0000e+00 3.0000e-02 -1.3765e+00 2.6232e+00 9.1935e+00 0 0 0 2 1.2058e-05 8.0492e-06 124782 3 0 124785 1.2021e-05 1.8031e-05 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 9.2812e-05 8.8571e-06 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 9.2760e-05 8.8276e-06 0.0000e+00 0.0000e+00 protein_coding ENSG00000108510 2 6522 30 protein_coding 122678 1.0000e+00 0 6.4393e+01 0.0000e+00 NA 17 60019966 60142643\n")),(0,l.kt)("h3",{id:"json-key-to-tsv-column-mapping"},"JSON key to TSV column mapping"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"JSON key"),(0,l.kt)("th",{parentName:"tr",align:null},"TSV column"),(0,l.kt)("th",{parentName:"tr",align:null},"Description"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pLi"),(0,l.kt)("td",{parentName:"tr",align:null},"pLI"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pNull"),(0,l.kt)("td",{parentName:"tr",align:null},"pNull"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pRec"),(0,l.kt)("td",{parentName:"tr",align:null},"pRec"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"synZ"),(0,l.kt)("td",{parentName:"tr",align:null},"syn_z"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"misZ"),(0,l.kt)("td",{parentName:"tr",align:null},"mis_z"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,l.kt)("td",{parentName:"tr",align:null},"oe_lof_upper"),(0,l.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))),(0,l.kt)("h3",{id:"gene-symbol-update"},"Gene symbol update"),(0,l.kt)("p",null,"The input file provides Ensembl gene ids for each entry. We observed that they were unique while gene symbols may be repeated (multiple lines may have the same gene symbol). Since Ensembl gene Ids are more stable, and Nirvana transcript cache data contains Ensembl gene ids, we use these ids to extract the gene symbols from the transcript cache. For example, if ENSG0001 has gene symbol GENE1 in the input but Nirvana cache say ENSG0001 maps to GENE2, we use GENE2 as the gene symbol for that entry."),(0,l.kt)("h3",{id:"conflict-resolution"},"Conflict resolution"),(0,l.kt)("p",null,"gnomAD uses Ensembl GeneID as unique identifiers in the ",(0,l.kt)("a",{parentName:"p",href:"https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz"},"source file")," but Nirvana uses HGNC gene symbols. Multiple Ensembl GeneIDs can map to the same HGNC symbol and therefore may result is conflict."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"MDGA2 ENST00000426342 306 4.0043e+02 7.6419e-01 2.1096e-05 4724 78 1.6525e+02 4.7202e-01 1923 125 1.3737e+02 9.0993e-01 7.1973e-06 1413 4 2.0926e-06 453 3.8316e+01 9.9922e-01 8.6490e-12 7.8128e-04 1.0440e-01 7.8600e-01 1.0560e+00 6.9500e-01 8.4000e-01 5.0000e-02 2.3900e-01 8.2988e-01 1.6769e+00 5.1372e+00 1529 0 0 7 2.8103e-05 4.0317e-06 124784 7 0 124791 2.8047e-05 9.8167e-05 0.0000e+00 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 3.5391e-05 1.6672e-04 3.2680e-05 0.0000e+00 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 3.5308e-05 1.6492e-04 3.2678e-05 protein_coding ENSG00000139915 2 2181 13 protein_coding 835332 9.9322e-01 3 2.7833e+01 1.0779e-01 NA 14 47308826 48144157\nMDGA2 ENST00000439988 438 5.5311e+02 7.9189e-01 2.9490e-05 6608 105 2.0496e+02 5.1228e-01 2386 180 1.9491e+02 9.2351e-01 9.8371e-06 2048 11 2.8074e-06 627 5.1882e+01 6.6457e-01 5.5841e-10 3.3543e-01 2.1202e-01 8.1700e-01 1.0450e+00 7.3100e-01 8.5700e-01 1.3200e-01 3.5100e-01 8.3940e-01 1.7393e+00 5.2595e+00 2989 1 0 9 3.6173e-05 4.0463e-06 124782 9 0 124791 3.6061e-05 1.6228e-04 6.4986e-05 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 4.4275e-05 1.6672e-04 3.2680e-05 6.4577e-05 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 4.4135e-05 1.6492e-04 3.2678e-05 protein_coding ENSG00000272781 3 3075 17 protein_coding 832866 NA NA NA NA NA 14 47311134 48143999\n")),(0,l.kt)("p",null,'In such cases, Nirvana chooses the entry with the smallest "LOEUF" value. The reason for choosing this value can be highlighted by the following table:'),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"right"},"LOEUF decile"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Haplo-insufficient"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Autosomal Dominant"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Autosomal Recessive"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Olfactory Genes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"0-10%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"104"),(0,l.kt)("td",{parentName:"tr",align:"right"},"140"),(0,l.kt)("td",{parentName:"tr",align:"right"},"36"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"10-20%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"47"),(0,l.kt)("td",{parentName:"tr",align:"right"},"128"),(0,l.kt)("td",{parentName:"tr",align:"right"},"72"),(0,l.kt)("td",{parentName:"tr",align:"right"},"1")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"20-30%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"17"),(0,l.kt)("td",{parentName:"tr",align:"right"},"86"),(0,l.kt)("td",{parentName:"tr",align:"right"},"112"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"30-40%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"8"),(0,l.kt)("td",{parentName:"tr",align:"right"},"80"),(0,l.kt)("td",{parentName:"tr",align:"right"},"173"),(0,l.kt)("td",{parentName:"tr",align:"right"},"4")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"40-50%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"7"),(0,l.kt)("td",{parentName:"tr",align:"right"},"65"),(0,l.kt)("td",{parentName:"tr",align:"right"},"206"),(0,l.kt)("td",{parentName:"tr",align:"right"},"8")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"50-60%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"4"),(0,l.kt)("td",{parentName:"tr",align:"right"},"54"),(0,l.kt)("td",{parentName:"tr",align:"right"},"207"),(0,l.kt)("td",{parentName:"tr",align:"right"},"6")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"60-70%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0"),(0,l.kt)("td",{parentName:"tr",align:"right"},"46"),(0,l.kt)("td",{parentName:"tr",align:"right"},"154"),(0,l.kt)("td",{parentName:"tr",align:"right"},"18")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"70-80%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"2"),(0,l.kt)("td",{parentName:"tr",align:"right"},"49"),(0,l.kt)("td",{parentName:"tr",align:"right"},"120"),(0,l.kt)("td",{parentName:"tr",align:"right"},"49")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"80-90%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0"),(0,l.kt)("td",{parentName:"tr",align:"right"},"34"),(0,l.kt)("td",{parentName:"tr",align:"right"},"58"),(0,l.kt)("td",{parentName:"tr",align:"right"},"96")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"90-100%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0"),(0,l.kt)("td",{parentName:"tr",align:"right"},"26"),(0,l.kt)("td",{parentName:"tr",align:"right"},"40"),(0,l.kt)("td",{parentName:"tr",align:"right"},"174")))),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Note")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("ul",{parentName:"div"},(0,l.kt)("li",{parentName:"ul"},"Table source: ",(0,l.kt)("a",{parentName:"li",href:"https://www.biorxiv.org/content/biorxiv/early/2019/01/28/531210.full-text.pdf"},"https://www.biorxiv.org/content/biorxiv/early/2019/01/28/531210.full-text.pdf")),(0,l.kt)("li",{parentName:"ul"},"This table indicates that lower LOEUF scores have more deleterious effect on genes."),(0,l.kt)("li",{parentName:"ul"},"Only 15 out of 19685 genes have conflicting entries.")))),(0,l.kt)("p",null,(0,l.kt)("strong",{parentName:"p"},"List of genes with conflicting entries")),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},'MDGA2:\n {"pLI":9.99e-1,"pRec":7.81e-4,"pNull":8.65e-12,"synZ":8.30e-1,"misZ":1.68e0,"loeuf":2.39e-1}\n {"pLI":6.65e-1,"pRec":3.35e-1,"pNull":5.58e-10,"synZ":8.39e-1,"misZ":1.74e0,"loeuf":3.51e-1}\nCRYBG3:\n {"pLI":9.27e-5,"pRec":1.00e0,"pNull":1.88e-7,"synZ":1.82e0,"misZ":4.68e-1,"loeuf":4.93e-1}\n {"pLI":2.69e-4,"pRec":1.00e0,"pNull":1.20e-4,"synZ":2.63e0,"misZ":9.80e-1,"loeuf":5.98e-1}\nCHTF8:\n {"pLI":8.29e-1,"pRec":1.67e-1,"pNull":3.21e-3,"synZ":1.94e0,"misZ":9.48e-1,"loeuf":5.13e-1}\n {"pLI":3.73e-1,"pRec":5.84e-1,"pNull":4.29e-2,"synZ":3.33e-1,"misZ":2.91e-1,"loeuf":9.92e-1}\nSEPT1:\n {"pLI":6.77e-8,"pRec":8.90e-1,"pNull":1.10e-1,"synZ":1.58e-1,"misZ":1.57e0,"loeuf":9.68e-1}\n {"pLI":1.96e-8,"pRec":6.71e-1,"pNull":3.29e-1,"synZ":1.68e-1,"misZ":1.41e0,"loeuf":1.08e0}\nARL14EPL:\n {"pLI":3.48e-2,"pRec":8.38e-1,"pNull":1.28e-1,"synZ":3.56e-1,"misZ":-1.87e-1,"loeuf":1.23e0}\n {"pLI":3.23e-2,"pRec":8.29e-1,"pNull":1.38e-1,"synZ":1.15e0,"misZ":-4.05e-1,"loeuf":1.26e0}\nUGT2A1:\n {"pLI":2.90e-13,"pRec":1.40e-1,"pNull":8.60e-1,"synZ":-1.29e0,"misZ":-1.77e0,"loeuf":1.18e0}\n {"pLI":3.88e-17,"pRec":2.87e-3,"pNull":9.97e-1,"synZ":-8.00e-1,"misZ":-1.40e0,"loeuf":1.53e0}\nLTB4R2:\n {"pLI":4.39e-4,"pRec":6.71e-1,"pNull":3.29e-1,"synZ":-5.24e-1,"misZ":-2.96e-1,"loeuf":1.40e0}\n {"pLI":1.38e-5,"pRec":4.12e-1,"pNull":5.88e-1,"synZ":-4.58e-1,"misZ":-2.02e-1,"loeuf":1.54e0}\nCDRT1:\n {"pLI":4.98e-14,"pRec":5.31e-1,"pNull":4.69e-1,"synZ":8.18e-1,"misZ":6.57e-1,"loeuf":1.00e0}\n {"pLI":3.50e-3,"pRec":6.37e-1,"pNull":3.59e-1,"synZ":4.89e-1,"misZ":6.90e-1,"loeuf":1.63e0}\nMUC3A:\n {"pLI":1.48e-10,"pRec":5.76e-1,"pNull":4.24e-1,"synZ":5.81e-2,"misZ":-6.01e-1,"loeuf":1.06e0}\n {"pLI":4.03e-1,"pRec":4.79e-1,"pNull":1.17e-1,"synZ":4.05e-2,"misZ":-1.60e-1,"loeuf":1.70e0}\nCOG8:\n {"pLI":2.97e-9,"pRec":5.04e-1,"pNull":4.96e-1,"synZ":-1.35e0,"misZ":-9.37e-2,"loeuf":1.13e0}\n {"pLI":2.31e-3,"pRec":5.47e-1,"pNull":4.50e-1,"synZ":-4.94e-1,"misZ":-1.48e-1,"loeuf":1.76e0}\nAC006486.1:\n {"pLI":9.37e-1,"pRec":6.27e-2,"pNull":2.47e-4,"synZ":1.44e0,"misZ":2.12e0,"loeuf":3.41e-1}\n {"pLI":1.14e-1,"pRec":6.16e-1,"pNull":2.70e-1,"synZ":-7.57e-2,"misZ":8.33e-2,"loeuf":1.84e0}\nAL645922.1:\n {"pLI":4.67e-16,"pRec":1.00e0,"pNull":4.15e-5,"synZ":7.99e-1,"misZ":1.61e0,"loeuf":6.92e-1}\n {"pLI":1.60e-3,"pRec":2.78e-1,"pNull":7.21e-1,"synZ":-7.30e-2,"misZ":3.21e-1,"loeuf":1.96e0}\nNBPF20:\n {"pLI":1.42e-7,"pRec":3.40e-2,"pNull":9.66e-1,"synZ":-1.86e0,"misZ":-2.88e0,"loeuf":1.97e0}\n {"pLI":1.92e-22,"pRec":7.96e-6,"pNull":1.00e0,"synZ":-9.73e0,"misZ":-7.67e0,"loeuf":1.97e0}\nPRAMEF11:\n {"pLI":6.16e-4,"pRec":7.42e-1,"pNull":2.58e-1,"synZ":-4.02e0,"misZ":-3.69e0,"loeuf":1.31e0}\n {"synZ":-3.33e0,"misZ":-2.59e0}\nFAM231D:\n {"synZ":-1.98e0,"misZ":-1.44e0}\n {"synZ":1.07e0,"misZ":3.13e-1}\n')),(0,l.kt)("p",null,(0,l.kt)("strong",{parentName:"p"},"Conflict resolution")),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"Pick the entry with the lowest LOEUF score"),(0,l.kt)("li",{parentName:"ul"},"If the same, pick the lowest pLI"),(0,l.kt)("li",{parentName:"ul"},"Otherwise pick the entry with the max absolute value of synZ + misZ")),(0,l.kt)("h3",{id:"download-url"},"Download URL"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz"},"https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz")),(0,l.kt)("h3",{id:"json-output-1"},"JSON output"),(0,l.kt)(i.default,{mdxType:"JSONG"}),(0,l.kt)("h2",{id:"structural-variants"},"Structural Variants"),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Collins, R.L., Brand, H., Karczewski, K.J. et al. 2020. A structural variation reference for medical and population genetics. ",(0,l.kt)("em",{parentName:"p"},"Nature")," ",(0,l.kt)("strong",{parentName:"p"},"581"),", pp.444\u2013451. ",(0,l.kt)("a",{parentName:"p",href:"https://doi.org/10.1038/s41586-020-2287-8"},"https://doi.org/10.1038/s41586-020-2287-8")))),(0,l.kt)("p",null,(0,l.kt)("strong",{parentName:"p"},"Note"),"\nThe gnomAD structural variant annotations are in a preview stage at the moment.\nCurrently, the annotations do not include translocation breakends.\nFuture updates will include a better way of annotating the structural variants."),(0,l.kt)("h3",{id:"source-files"},"Source Files"),(0,l.kt)(p.default,{mdxType:"SVDATADESCRIPTION"}),(0,l.kt)("h3",{id:"download-urls"},"Download URLs"),(0,l.kt)("h4",{id:"grch37"},"GRCh37"),(0,l.kt)("p",null,"The GRCh37 file was downloaded from the original source. Following table gives some essential data metrics:"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://storage.googleapis.com/gcp-public-data--gnomad/papers/2019-sv/gnomad_v2.1_sv.sites.bed.gz"},"https://storage.googleapis.com/gcp-public-data--gnomad/papers/2019-sv/gnomad_v2.1_sv.sites.bed.gz")),(0,l.kt)("h4",{id:"grch38"},"GRCh38"),(0,l.kt)("p",null,"Note: The data was unavailable from gnomAD 2.1 original source, however the lifted over structural variant dataset was created by dbVar and was obtained from them ",(0,l.kt)("a",{parentName:"p",href:"https://www.ncbi.nlm.nih.gov/sites/dbvarapp/studies/nstd166/"},"https://www.ncbi.nlm.nih.gov/sites/dbvarapp/studies/nstd166/"),"."),(0,l.kt)("h4",{id:"download-url-1"},"Download URL"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/tsv/nstd166.GRCh38.variant_call.tsv.gz"},"https://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/tsv/nstd166.GRCh38.variant_call.tsv.gz")),(0,l.kt)("h3",{id:"json-output-2"},"JSON output"),(0,l.kt)(o.default,{mdxType:"JSONSV"}))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/2973af85.b08e0226.js b/assets/js/2973af85.b08e0226.js new file mode 100644 index 00000000..94fe277e --- /dev/null +++ b/assets/js/2973af85.b08e0226.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5111],{3905:(e,t,a)=>{a.d(t,{Zo:()=>d,kt:()=>b});var n=a(7294);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function i(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function l(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var s=n.createContext({}),c=function(e){var t=n.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):l(l({},t),e)),a},d=function(e){var t=c(e.components);return n.createElement(s.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},p=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,d=o(e,["components","mdxType","originalType","parentName"]),u=c(a),p=r,b=u["".concat(s,".").concat(p)]||u[p]||m[p]||i;return a?n.createElement(b,l(l({ref:t},d),{},{components:a})):n.createElement(b,l({ref:t},d))}));function b(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=a.length,l=new Array(i);l[0]=p;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[u]="string"==typeof e?e:r,l[1]=o;for(var c=2;c{a.r(t),a.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>i,metadata:()=>o,toc:()=>s});var n=a(7462),r=(a(7294),a(3905));const i={title:"SAUtils"},l=void 0,o={unversionedId:"utilities/sautils",id:"utilities/sautils",title:"SAUtils",description:"Overview",source:"@site/docs/utilities/sautils.mdx",sourceDirName:"utilities",slug:"/utilities/sautils",permalink:"/IlluminaConnectedAnnotationsDocumentation/utilities/sautils",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/utilities/sautils.mdx",tags:[],version:"current",frontMatter:{title:"SAUtils"},sidebar:"docs",previous:{title:"Jasix",permalink:"/IlluminaConnectedAnnotationsDocumentation/utilities/jasix"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"The SAUtils Menu",id:"the-sautils-menu",children:[],level:2},{value:"Output File Formats",id:"output-file-formats",children:[],level:2}],c={toc:s},d="wrapper";function u(e){let{components:t,...a}=e;return(0,r.kt)(d,(0,n.Z)({},c,a,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"SAUtils is a utility tool that creates binary supplementary annotation files (",(0,r.kt)("em",{parentName:"p"},".nsa, "),".gsa, ",(0,r.kt)("em",{parentName:"p"},".npd, "),".nsi, etc.) from original data files (e.g. VCFs, TSVs, XML, HTML, etc.) for various data sources (e.g. ClinVar, dbSNP, gnomAD, etc.). These binary files can be fed into the Illumina Connected Annotations Annotation engine to provide supplementary annotations in the output."),(0,r.kt)("h2",{id:"the-sautils-menu"},"The SAUtils Menu"),(0,r.kt)("p",null,"SAUtils supports building binary files for many data sources. The help menu lists them out in the form of sub-commands."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet SAUtils.dll\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0\n---------------------------------------------------------------------------\n\nUtilities focused on supplementary annotation\n\nUSAGE: dotnet SAUtils.dll [options]\n\nCOMMAND: AutoDownloadGenerate auto download and generate Omim, Clinvar, Clingen\n AaCon create AA conservation database\n ancestralAllele create Ancestral allele database from 1000Genomes data\n ClinGen create ClinGen database\n Downloader download ClinGen database\n clinvar create ClinVar database\n concat merge multiple NSA files for the same data source having non-overlapping regions\n Cosmic create COSMIC database\n CosmicSv create COSMIC SV database\n CosmicFusion create COSMIC gene fusion database\n CosmicCGC create COSMIC cancer gene census database\n CustomGene create custom gene annotation database\n CustomVar create custom variant annotation database\n Dann create DANN database\n Dbsnp create dbSNP database\n Dgv create DGV database\n DiseaseValidity create disease validity database\n DosageMapRegions create dosage map regions\n DosageSensitivity create dosage sensitivity database\n DownloadOmim download OMIM database\n ExtractMiniSA extracts mini SA\n ExtractMiniXml extracts mini XML (ClinVar)\n FilterSpliceNetTsv filter SpliceNet predictions\n FusionCatcher create FusionCatcher database\n Gerp create GERP conservation database\n GlobalMinor create global minor allele database\n Gnomad create gnomAD database\n Gnomad-lcr create gnomAD low complexity region database\n GnomadGeneScores create gnomAD gene scores database\n GnomadSV create gnomAD structural variant database\n Index edit an index file\n MitoHet create mitochondrial Heteroplasmy database\n MitomapSvDb create MITOMAP structural variants database\n MitomapVarDb create MITOMAP small variants database\n Omim create OMIM database\n OneKGen create 1000 Genome small variants database\n OneKGenSv create 1000 Genomes structural variants database\n OneKGenSvVcfToBed convert 1000 Genomes structural variants VCF file into a BED-like file\n PhyloP create PhyloP database\n PrimateAi create PrimateAI database\n RefMinor create Reference Minor database from 1000 Genome \n RemapWithDbsnp remap a VCF file given source and destination rsID mappings\n Revel create REVEL database\n SpliceAi create SpliceAI database\n TopMed create TOPMed database\n Gme create GME Variome database\n Decipher create Decipher database\n")),(0,r.kt)("p",null,"You can get further detailed help for each sub-command by typing in the subcommand. For example:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet SAUtils.dll clinvar\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll clinvar [options]\nCreates a supplementary database with ClinVar annotations\n\nOPTIONS:\n --ref, -r compressed reference sequence file\n --rcv, -i ClinVar Full release XML file\n --vcv, -c ClinVar Variation release XML file\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n")),(0,r.kt)("p",null,"More detailed instructions about each sub-command can be found in documentation of respective data sources."),(0,r.kt)("h2",{id:"output-file-formats"},"Output File Formats"),(0,r.kt)("p",null,"The format of the binary file SAUtils produce depend on the type of annotation data represented in that file (e.g. small variant vs. structural variants vs. genes)."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"File Extension"),(0,r.kt)("th",{parentName:"tr",align:null},"Description"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".nsa"),(0,r.kt)("td",{parentName:"tr",align:null},"Small variant annotations (e.g. SNV, insertions, deletions, etc.)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".gsa"),(0,r.kt)("td",{parentName:"tr",align:null},"Compact variant annotations (e.g. SNV, insertions, deletions, etc.)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".idx"),(0,r.kt)("td",{parentName:"tr",align:null},"Index file")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".nsi"),(0,r.kt)("td",{parentName:"tr",align:null},"Interval annotations (e.g. SV, CNVs, intervals)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".nga"),(0,r.kt)("td",{parentName:"tr",align:null},"Gene annotations")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".npd"),(0,r.kt)("td",{parentName:"tr",align:null},"Conservation scores")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".rma"),(0,r.kt)("td",{parentName:"tr",align:null},"Reference Minor allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".gfs"),(0,r.kt)("td",{parentName:"tr",align:null},"Gene fusions source")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".gfj"),(0,r.kt)("td",{parentName:"tr",align:null},"Gene fusions JSON")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".schema"),(0,r.kt)("td",{parentName:"tr",align:null},"JSON schema")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/2973af85.f4e26546.js b/assets/js/2973af85.f4e26546.js deleted file mode 100644 index 2266b738..00000000 --- a/assets/js/2973af85.f4e26546.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5111],{3905:(e,t,a)=>{a.d(t,{Zo:()=>d,kt:()=>b});var n=a(67294);function r(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function i(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function l(e){for(var t=1;t=0||(r[a]=e[a]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(r[a]=e[a])}return r}var s=n.createContext({}),c=function(e){var t=n.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):l(l({},t),e)),a},d=function(e){var t=c(e.components);return n.createElement(s.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},p=n.forwardRef((function(e,t){var a=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,d=o(e,["components","mdxType","originalType","parentName"]),u=c(a),p=r,b=u["".concat(s,".").concat(p)]||u[p]||m[p]||i;return a?n.createElement(b,l(l({ref:t},d),{},{components:a})):n.createElement(b,l({ref:t},d))}));function b(e,t){var a=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=a.length,l=new Array(i);l[0]=p;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[u]="string"==typeof e?e:r,l[1]=o;for(var c=2;c{a.r(t),a.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>i,metadata:()=>o,toc:()=>s});var n=a(87462),r=(a(67294),a(3905));const i={title:"SAUtils"},l=void 0,o={unversionedId:"utilities/sautils",id:"utilities/sautils",title:"SAUtils",description:"Overview",source:"@site/docs/utilities/sautils.mdx",sourceDirName:"utilities",slug:"/utilities/sautils",permalink:"/IlluminaConnectedAnnotationsDocumentation/utilities/sautils",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/utilities/sautils.mdx",tags:[],version:"current",frontMatter:{title:"SAUtils"},sidebar:"docs",previous:{title:"Jasix",permalink:"/IlluminaConnectedAnnotationsDocumentation/utilities/jasix"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"The SAUtils Menu",id:"the-sautils-menu",children:[],level:2},{value:"Output File Formats",id:"output-file-formats",children:[],level:2}],c={toc:s},d="wrapper";function u(e){let{components:t,...a}=e;return(0,r.kt)(d,(0,n.Z)({},c,a,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"SAUtils is a utility tool that creates binary supplementary annotation files (",(0,r.kt)("em",{parentName:"p"},".nsa, "),".gsa, ",(0,r.kt)("em",{parentName:"p"},".npd, "),".nsi, etc.) from original data files (e.g. VCFs, TSVs, XML, HTML, etc.) for various data sources (e.g. ClinVar, dbSNP, gnomAD, etc.). These binary files can be fed into the Illumina Connected Annotations Annotation engine to provide supplementary annotations in the output."),(0,r.kt)("h2",{id:"the-sautils-menu"},"The SAUtils Menu"),(0,r.kt)("p",null,"SAUtils supports building binary files for many data sources. The help menu lists them out in the form of sub-commands."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet SAUtils.dll\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0\n---------------------------------------------------------------------------\n\nUtilities focused on supplementary annotation\n\nUSAGE: dotnet SAUtils.dll [options]\n\nCOMMAND: AutoDownloadGenerate auto download and generate Omim, Clinvar, Clingen\n AaCon create AA conservation database\n ancestralAllele create Ancestral allele database from 1000Genomes data\n ClinGen create ClinGen database\n Downloader download ClinGen database\n clinvar create ClinVar database\n concat merge multiple NSA files for the same data source having non-overlapping regions\n Cosmic create COSMIC database\n CosmicSv create COSMIC SV database\n CosmicFusion create COSMIC gene fusion database\n CosmicCGC create COSMIC cancer gene census database\n CustomGene create custom gene annotation database\n CustomVar create custom variant annotation database\n Dann create DANN database\n Dbsnp create dbSNP database\n Dgv create DGV database\n DiseaseValidity create disease validity database\n DosageMapRegions create dosage map regions\n DosageSensitivity create dosage sensitivity database\n DownloadOmim download OMIM database\n ExtractMiniSA extracts mini SA\n ExtractMiniXml extracts mini XML (ClinVar)\n FilterSpliceNetTsv filter SpliceNet predictions\n FusionCatcher create FusionCatcher database\n Gerp create GERP conservation database\n GlobalMinor create global minor allele database\n Gnomad create gnomAD database\n Gnomad-lcr create gnomAD low complexity region database\n GnomadGeneScores create gnomAD gene scores database\n GnomadSV create gnomAD structural variant database\n Index edit an index file\n MitoHet create mitochondrial Heteroplasmy database\n MitomapSvDb create MITOMAP structural variants database\n MitomapVarDb create MITOMAP small variants database\n Omim create OMIM database\n OneKGen create 1000 Genome small variants database\n OneKGenSv create 1000 Genomes structural variants database\n OneKGenSvVcfToBed convert 1000 Genomes structural variants VCF file into a BED-like file\n PhyloP create PhyloP database\n PrimateAi create PrimateAI database\n RefMinor create Reference Minor database from 1000 Genome \n RemapWithDbsnp remap a VCF file given source and destination rsID mappings\n Revel create REVEL database\n SpliceAi create SpliceAI database\n TopMed create TOPMed database\n Gme create GME Variome database\n Decipher create Decipher database\n")),(0,r.kt)("p",null,"You can get further detailed help for each sub-command by typing in the subcommand. For example:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet SAUtils.dll clinvar\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll clinvar [options]\nCreates a supplementary database with ClinVar annotations\n\nOPTIONS:\n --ref, -r compressed reference sequence file\n --rcv, -i ClinVar Full release XML file\n --vcv, -c ClinVar Variation release XML file\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n")),(0,r.kt)("p",null,"More detailed instructions about each sub-command can be found in documentation of respective data sources."),(0,r.kt)("h2",{id:"output-file-formats"},"Output File Formats"),(0,r.kt)("p",null,"The format of the binary file SAUtils produce depend on the type of annotation data represented in that file (e.g. small variant vs. structural variants vs. genes)."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"File Extension"),(0,r.kt)("th",{parentName:"tr",align:null},"Description"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".nsa"),(0,r.kt)("td",{parentName:"tr",align:null},"Small variant annotations (e.g. SNV, insertions, deletions, etc.)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".gsa"),(0,r.kt)("td",{parentName:"tr",align:null},"Compact variant annotations (e.g. SNV, insertions, deletions, etc.)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".idx"),(0,r.kt)("td",{parentName:"tr",align:null},"Index file")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".nsi"),(0,r.kt)("td",{parentName:"tr",align:null},"Interval annotations (e.g. SV, CNVs, intervals)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".nga"),(0,r.kt)("td",{parentName:"tr",align:null},"Gene annotations")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".npd"),(0,r.kt)("td",{parentName:"tr",align:null},"Conservation scores")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".rma"),(0,r.kt)("td",{parentName:"tr",align:null},"Reference Minor allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".gfs"),(0,r.kt)("td",{parentName:"tr",align:null},"Gene fusions source")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".gfj"),(0,r.kt)("td",{parentName:"tr",align:null},"Gene fusions JSON")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},".schema"),(0,r.kt)("td",{parentName:"tr",align:null},"JSON schema")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/29d3e7b6.4b8a29c4.js b/assets/js/29d3e7b6.4b8a29c4.js deleted file mode 100644 index 4a11ba18..00000000 --- a/assets/js/29d3e7b6.4b8a29c4.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6729],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>g});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var p=a.createContext({}),u=function(t){var e=a.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},m=function(t){var e=u(t.components);return a.createElement(p.Provider,{value:e},t.children)},s="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},c=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),s=u(n),c=r,g=s["".concat(p,".").concat(c)]||s[c]||d[c]||l;return n?a.createElement(g,o(o({ref:e},m),{},{components:n})):a.createElement(g,o({ref:e},m))}));function g(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=c;var i={};for(var p in e)hasOwnProperty.call(e,p)&&(i[p]=e[p]);i.originalType=t,i[s]="string"==typeof t?t:r,o[1]=i;for(var u=2;u{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>s,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/1000Genomes-sv-json",id:"version-3.18/data-sources/1000Genomes-sv-json",title:"1000Genomes-sv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/1000Genomes-sv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-sv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/1000Genomes-sv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/1000Genomes-sv-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],u={toc:p},m="wrapper";function s(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},u,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":[\n {\n "chromosome":"1",\n "begin":1595369,\n "end":1612441,\n "variantType": "copy_number_variation",\n "id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",\n "allAn": 5008,\n "allAc": 2702,\n "allAf": 0.539537,\n "afrAf": 0.6052,\n "amrAf": 0.3675,\n "eurAf": 0.5357,\n "easAf": 0.5368,\n "sasAf": 0.5797,\n "reciprocalOverlap": 0.07555\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"range: 0 - 1.")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/2a4c539d.2e72985c.js b/assets/js/2a4c539d.2e72985c.js deleted file mode 100644 index f43713e9..00000000 --- a/assets/js/2a4c539d.2e72985c.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1559,1946],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>v});var a=n(67294);function l(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(l[n]=e[n]);return l}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(l[n]=e[n])}return l}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},m="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,l=e.mdxType,r=e.originalType,s=e.parentName,c=o(e,["components","mdxType","originalType","parentName"]),m=d(n),u=l,v=m["".concat(s,".").concat(u)]||m[u]||p[u]||r;return n?a.createElement(v,i(i({ref:t},c),{},{components:n})):a.createElement(v,i({ref:t},c))}));function v(e,t){var n=arguments,l=t&&t.mdxType;if("string"==typeof e||l){var r=n.length,i=new Array(r);i[0]=u;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[m]="string"==typeof e?e:l,i[1]=o;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>m,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var a=n(87462),l=(n(67294),n(3905));const r={},i=void 0,o={unversionedId:"data-sources/dbsnp-json",id:"version-3.14/data-sources/dbsnp-json",title:"dbsnp-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/dbsnp-json.md",sourceDirName:"data-sources",slug:"/data-sources/dbsnp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/dbsnp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/dbsnp-json.md",tags:[],version:"3.14",frontMatter:{}},s=[],d={toc:s},c="wrapper";function m(e){let{components:t,...n}=e;return(0,l.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"dbsnp":[\n "rs1042821"\n]\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,l.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"dbsnp"),(0,l.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,l.kt)("td",{parentName:"tr",align:"left"},"dbSNP rsIDs")))))}m.isMDXComponent=!0},96793:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>i,metadata:()=>s,toc:()=>d});var a=n(87462),l=(n(67294),n(3905)),r=n(66916);const i={title:"dbSNP"},o=void 0,s={unversionedId:"data-sources/dbsnp",id:"version-3.14/data-sources/dbsnp",title:"dbSNP",description:"Overview",source:"@site/versioned_docs/version-3.14/data-sources/dbsnp.mdx",sourceDirName:"data-sources",slug:"/data-sources/dbsnp",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/dbsnp",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/dbsnp.mdx",tags:[],version:"3.14",frontMatter:{title:"dbSNP"},sidebar:"version-3.14/docs",previous:{title:"ClinVar",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/clinvar"},next:{title:"gnomAD",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/gnomad"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"VCF File",id:"vcf-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Global allele extraction",id:"global-allele-extraction",children:[],level:4},{value:"Equal Allele Frequency Example (2 alleles)",id:"equal-allele-frequency-example-2-alleles",children:[],level:4},{value:"Equal Allele Frequency Example (3 alleles)",id:"equal-allele-frequency-example-3-alleles",children:[],level:4},{value:"Equal Allele Frequency in Alternate Alleles",id:"equal-allele-frequency-in-alternate-alleles",children:[],level:4},{value:"Equal Allele Frequency Between Reference & Alternate Allele",id:"equal-allele-frequency-between-reference--alternate-allele",children:[],level:4}],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:d},m="wrapper";function p(e){let{components:t,...n}=e;return(0,l.kt)(m,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("h2",{id:"overview"},"Overview"),(0,l.kt)("p",null,"dbSNP contains human single nucleotide variations, microsatellites, and small-scale insertions and deletions along with publication, population frequency, molecular consequence, and genomic and RefSeq mapping information for both common variations and clinical mutations."),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Sherry, S.T., Ward, M. and Sirotkin, K. (1999) dbSNP\u2014Database for Single Nucleotide Polymorphisms and Other Classes of Minor Genetic Variation. ",(0,l.kt)("em",{parentName:"p"},"Genome Res."),", ",(0,l.kt)("strong",{parentName:"p"},"9"),", 677\u2013679."))),(0,l.kt)("h2",{id:"vcf-file"},"VCF File"),(0,l.kt)("h3",{id:"example"},"Example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 10177 rs367896724 A AC . . RS=367896724;RSPOS=10177;dbSNPBuildID=138; \\ \n SSR=0;SAO=0;VP=0x050000020005130026000200;GENEINFO=DDX11L1:100287102;WGT=1; \\\n VC=DIV;R5;ASP;G5A;G5;KGPhase3;CAF=0.5747,0.4253;COMMON=1; \\\n TOPMED=0.76728147298674821,0.23271852701325178\n")),(0,l.kt)("h3",{id:"parsing"},"Parsing"),(0,l.kt)("p",null,"From the VCF file, we're mainly interested in the following:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"rsID")," from the ",(0,l.kt)("inlineCode",{parentName:"li"},"ID")," field"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"CAF")," from the ",(0,l.kt)("inlineCode",{parentName:"li"},"INFO")," field")),(0,l.kt)("h4",{id:"global-allele-extraction"},"Global allele extraction"),(0,l.kt)("p",null,"The global major and minor alleles are extracted based on the frequency of the alleles provided in the CAF field. The global minor allele frequency is the second highest value of the CAF comma delimited field (ignoring '.' values). "),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Tie Breaking: Global Major Allele")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"If there are two candidates for global major and the reference allele is one of them, we prefer the reference allele."))),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Tie Breaking: Global Minor Allele")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"If there are two candidates for global minor and the reference allele is one of them, we prefer the other allele. If the reference allele is not involved, they are chosen arbitrarily."))),(0,l.kt)("h4",{id:"equal-allele-frequency-example-2-alleles"},"Equal Allele Frequency Example (2 alleles)"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C CAF=0.5,0.5\n")),(0,l.kt)("p",null,"We will select A to be the global major allele and C to be the global minor allele."),(0,l.kt)("h4",{id:"equal-allele-frequency-example-3-alleles"},"Equal Allele Frequency Example (3 alleles)"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C,T CAF=0.33,0.33,0.33\n")),(0,l.kt)("p",null,"We will select A to be the global major allele and either C or T is chosen (arbitrarily) to be the global minor allele."),(0,l.kt)("h4",{id:"equal-allele-frequency-in-alternate-alleles"},"Equal Allele Frequency in Alternate Alleles"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C,T CAF=0.2,0.4,0.4\n")),(0,l.kt)("p",null,"We will select C or T to be arbitrarily assigned to be the global major or global minor allele."),(0,l.kt)("h4",{id:"equal-allele-frequency-between-reference--alternate-allele"},"Equal Allele Frequency Between Reference & Alternate Allele"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C,T CAF=0.2,0.2,0.6\n")),(0,l.kt)("p",null,"We will select T to be the global major allele and C to be the global minor allele."),(0,l.kt)("h2",{id:"known-issues"},"Known Issues"),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"If there are multiple entries with different CAF values for the same allele, we use the first CAF value."))),(0,l.kt)("h2",{id:"download-url"},"Download URL"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://ftp.ncbi.nih.gov/snp/organisms/"},"https://ftp.ncbi.nih.gov/snp/organisms/")),(0,l.kt)("h2",{id:"json-output"},"JSON Output"),(0,l.kt)(r.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/2c853001.0179eacc.js b/assets/js/2c853001.0179eacc.js deleted file mode 100644 index a2385d01..00000000 --- a/assets/js/2c853001.0179eacc.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1500],{71197:e=>{e.exports=JSON.parse('{"pluginId":"default","version":"3.21","label":"3.21","banner":"unmaintained","badge":true,"className":"docs-version-3.21","isLast":false,"docsSidebars":{"docs":[{"type":"category","label":"Introduction","items":[{"type":"link","label":"Introduction","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/","docId":"introduction/introduction"},{"type":"link","label":"Dependencies","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/introduction/dependencies","docId":"introduction/dependencies"},{"type":"link","label":"Getting Started","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/introduction/getting-started","docId":"introduction/getting-started"},{"type":"link","label":"Parsing Nirvana JSON","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/introduction/parsing-json","docId":"introduction/parsing-json"},{"type":"link","label":"Annotating COVID-19","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/introduction/covid19","docId":"introduction/covid19"}],"collapsible":true,"collapsed":true},{"type":"category","label":"Data Sources","items":[{"type":"link","label":"1000 Genomes","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/1000Genomes","docId":"data-sources/1000Genomes"},{"type":"link","label":"Amino Acid Conservation","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/amino-acid-conservation","docId":"data-sources/amino-acid-conservation"},{"type":"link","label":"Cancer Hotspots","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/cancer-hotspots","docId":"data-sources/cancer-hotspots"},{"type":"link","label":"ClinGen","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/clingen","docId":"data-sources/clingen"},{"type":"link","label":"ClinVar","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/clinvar","docId":"data-sources/clinvar"},{"type":"link","label":"COSMIC","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/cosmic","docId":"data-sources/cosmic"},{"type":"link","label":"DANN","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/dann","docId":"data-sources/dann"},{"type":"link","label":"dbSNP","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/dbsnp","docId":"data-sources/dbsnp"},{"type":"link","label":"DECIPHER","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/decipher","docId":"data-sources/decipher"},{"type":"link","label":"FusionCatcher","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/fusioncatcher","docId":"data-sources/fusioncatcher"},{"type":"link","label":"GERP","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gerp","docId":"data-sources/gerp"},{"type":"link","label":"GME Variome","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gme","docId":"data-sources/gme"},{"type":"link","label":"gnomAD","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gnomad","docId":"data-sources/gnomad"},{"type":"link","label":"Mitochondrial Heteroplasmy","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/mito-heteroplasmy","docId":"data-sources/mito-heteroplasmy"},{"type":"link","label":"MITOMAP","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/mitomap","docId":"data-sources/mitomap"},{"type":"link","label":"OMIM","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/omim","docId":"data-sources/omim"},{"type":"link","label":"PhyloP","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/phylop","docId":"data-sources/phylop"},{"type":"link","label":"Primate AI","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/primate-ai","docId":"data-sources/primate-ai"},{"type":"link","label":"REVEL","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/revel","docId":"data-sources/revel"},{"type":"link","label":"Splice AI","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/splice-ai","docId":"data-sources/splice-ai"},{"type":"link","label":"TOPMed","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/topmed","docId":"data-sources/topmed"}],"collapsible":true,"collapsed":true},{"type":"category","label":"File Formats","items":[{"type":"link","label":"Nirvana JSON File Format","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/file-formats/nirvana-json-file-format","docId":"file-formats/nirvana-json-file-format"},{"type":"link","label":"Custom Annotations","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/file-formats/custom-annotations","docId":"file-formats/custom-annotations"}],"collapsible":true,"collapsed":true},{"type":"category","label":"Core Functionality","items":[{"type":"link","label":"Canonical Transcripts","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/core-functionality/canonical-transcripts","docId":"core-functionality/canonical-transcripts"},{"type":"link","label":"Gene Fusion Detection","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/core-functionality/gene-fusions","docId":"core-functionality/gene-fusions"},{"type":"link","label":"MNV Recomposition","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/core-functionality/mnv-recomposition","docId":"core-functionality/mnv-recomposition"},{"type":"link","label":"Variant IDs","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/core-functionality/variant-ids","docId":"core-functionality/variant-ids"}],"collapsible":true,"collapsed":true},{"type":"category","label":"Utilities","items":[{"type":"link","label":"Jasix","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/utilities/jasix","docId":"utilities/jasix"},{"type":"link","label":"SAUtils","href":"/IlluminaConnectedAnnotationsDocumentation/3.21/utilities/sautils","docId":"utilities/sautils"}],"collapsible":true,"collapsed":true}]},"docs":{"core-functionality/canonical-transcripts":{"id":"core-functionality/canonical-transcripts","title":"Canonical Transcripts","description":"Overview","sidebar":"docs"},"core-functionality/gene-fusions":{"id":"core-functionality/gene-fusions","title":"Gene Fusion Detection","description":"Overview","sidebar":"docs"},"core-functionality/mnv-recomposition":{"id":"core-functionality/mnv-recomposition","title":"MNV Recomposition","description":"Overview","sidebar":"docs"},"core-functionality/variant-ids":{"id":"core-functionality/variant-ids","title":"Variant IDs","description":"Overview","sidebar":"docs"},"data-sources/1000Genomes":{"id":"data-sources/1000Genomes","title":"1000 Genomes","description":"Overview","sidebar":"docs"},"data-sources/1000Genomes-snv-json":{"id":"data-sources/1000Genomes-snv-json","title":"1000Genomes-snv-json","description":"| Field | Type | Notes |"},"data-sources/1000Genomes-sv-json":{"id":"data-sources/1000Genomes-sv-json","title":"1000Genomes-sv-json","description":"| Field | Type | Notes |"},"data-sources/amino-acid-conservation":{"id":"data-sources/amino-acid-conservation","title":"Amino Acid Conservation","description":"Overview","sidebar":"docs"},"data-sources/amino-acid-conservation-json":{"id":"data-sources/amino-acid-conservation-json","title":"amino-acid-conservation-json","description":"| Field | Type | Notes |"},"data-sources/cancer-hotspots":{"id":"data-sources/cancer-hotspots","title":"Cancer Hotspots","description":"Overview","sidebar":"docs"},"data-sources/clingen":{"id":"data-sources/clingen","title":"ClinGen","description":"Overview","sidebar":"docs"},"data-sources/clingen-dosage-json":{"id":"data-sources/clingen-dosage-json","title":"clingen-dosage-json","description":"| Field | Type | Notes |"},"data-sources/clingen-gene-validity-json":{"id":"data-sources/clingen-gene-validity-json","title":"clingen-gene-validity-json","description":"| Field | Type | Notes |"},"data-sources/clingen-json":{"id":"data-sources/clingen-json","title":"clingen-json","description":"| Field | Type | Notes |"},"data-sources/clinvar":{"id":"data-sources/clinvar","title":"ClinVar","description":"Overview","sidebar":"docs"},"data-sources/clinvar-json":{"id":"data-sources/clinvar-json","title":"clinvar-json","description":"small variants:"},"data-sources/cosmic":{"id":"data-sources/cosmic","title":"COSMIC","description":"Overview","sidebar":"docs"},"data-sources/cosmic-cancer-gene-census":{"id":"data-sources/cosmic-cancer-gene-census","title":"cosmic-cancer-gene-census","description":"| Field | Type | Notes |"},"data-sources/cosmic-gene-fusion-json":{"id":"data-sources/cosmic-gene-fusion-json","title":"cosmic-gene-fusion-json","description":"| Field | Type | Notes |"},"data-sources/cosmic-json":{"id":"data-sources/cosmic-json","title":"cosmic-json","description":"| Field | Type | Notes |"},"data-sources/dann":{"id":"data-sources/dann","title":"DANN","description":"Overview","sidebar":"docs"},"data-sources/dann-json":{"id":"data-sources/dann-json","title":"dann-json","description":"| Field | Type | Notes |"},"data-sources/dbsnp":{"id":"data-sources/dbsnp","title":"dbSNP","description":"Overview","sidebar":"docs"},"data-sources/dbsnp-json":{"id":"data-sources/dbsnp-json","title":"dbsnp-json","description":"| Field | Type | Notes |"},"data-sources/decipher":{"id":"data-sources/decipher","title":"DECIPHER","description":"Overview","sidebar":"docs"},"data-sources/decipher-json":{"id":"data-sources/decipher-json","title":"decipher-json","description":"| Field | Type | Notes |"},"data-sources/fusioncatcher":{"id":"data-sources/fusioncatcher","title":"FusionCatcher","description":"Overview","sidebar":"docs"},"data-sources/fusioncatcher-json":{"id":"data-sources/fusioncatcher-json","title":"fusioncatcher-json","description":"| Field | Type | Notes |"},"data-sources/gerp":{"id":"data-sources/gerp","title":"GERP","description":"Overview","sidebar":"docs"},"data-sources/gerp-json":{"id":"data-sources/gerp-json","title":"gerp-json","description":"| Field | Type | Notes |"},"data-sources/gme":{"id":"data-sources/gme","title":"GME Variome","description":"Overview","sidebar":"docs"},"data-sources/gme-json":{"id":"data-sources/gme-json","title":"gme-json","description":"| Field | Type | Notes |"},"data-sources/gnomad":{"id":"data-sources/gnomad","title":"gnomAD","description":"Overview","sidebar":"docs"},"data-sources/gnomad-lof-json":{"id":"data-sources/gnomad-lof-json","title":"gnomad-lof-json","description":"| Field | Type | Notes |"},"data-sources/gnomad-small-variants-json":{"id":"data-sources/gnomad-small-variants-json","title":"gnomad-small-variants-json","description":"| Field | Type | Notes |"},"data-sources/gnomad-structural-variants-data_description":{"id":"data-sources/gnomad-structural-variants-data_description","title":"gnomad-structural-variants-data_description","description":"Bed Example"},"data-sources/gnomad-structural-variants-json":{"id":"data-sources/gnomad-structural-variants-json","title":"gnomad-structural-variants-json","description":"| Field | Type | Notes |"},"data-sources/mito-heteroplasmy":{"id":"data-sources/mito-heteroplasmy","title":"Mitochondrial Heteroplasmy","description":"Overview","sidebar":"docs"},"data-sources/mitomap":{"id":"data-sources/mitomap","title":"MITOMAP","description":"Overview","sidebar":"docs"},"data-sources/mitomap-small-variants-json":{"id":"data-sources/mitomap-small-variants-json","title":"mitomap-small-variants-json","description":"| Field | Type | Notes |"},"data-sources/mitomap-structural-variants-json":{"id":"data-sources/mitomap-structural-variants-json","title":"mitomap-structural-variants-json","description":"| Field | Type | Notes |"},"data-sources/omim":{"id":"data-sources/omim","title":"OMIM","description":"Overview","sidebar":"docs"},"data-sources/omim-json":{"id":"data-sources/omim-json","title":"omim-json","description":"| Field | Type | Notes |"},"data-sources/phylop":{"id":"data-sources/phylop","title":"PhyloP","description":"Overview","sidebar":"docs"},"data-sources/phylop-json":{"id":"data-sources/phylop-json","title":"phylop-json","description":"| Field | Type | Notes |"},"data-sources/primate-ai":{"id":"data-sources/primate-ai","title":"Primate AI","description":"Overview","sidebar":"docs"},"data-sources/primate-ai-json":{"id":"data-sources/primate-ai-json","title":"primate-ai-json","description":"| Field | Type | Notes |"},"data-sources/revel":{"id":"data-sources/revel","title":"REVEL","description":"Overview","sidebar":"docs"},"data-sources/revel-json":{"id":"data-sources/revel-json","title":"revel-json","description":"| Field | Type | Notes |"},"data-sources/splice-ai":{"id":"data-sources/splice-ai","title":"Splice AI","description":"Overview","sidebar":"docs"},"data-sources/splice-ai-json":{"id":"data-sources/splice-ai-json","title":"splice-ai-json","description":"| Field | Type | Notes |"},"data-sources/topmed":{"id":"data-sources/topmed","title":"TOPMed","description":"Overview","sidebar":"docs"},"data-sources/topmed-json":{"id":"data-sources/topmed-json","title":"topmed-json","description":"| Field | Type | Notes |"},"file-formats/custom-annotations":{"id":"file-formats/custom-annotations","title":"Custom Annotations","description":"Overview","sidebar":"docs"},"file-formats/nirvana-json-file-format":{"id":"file-formats/nirvana-json-file-format","title":"Nirvana JSON File Format","description":"Overview","sidebar":"docs"},"introduction/covid19":{"id":"introduction/covid19","title":"Annotating COVID-19","description":"The Nirvana development team is mainly focused on providing annotations for the human genome. This focus allows us to maximize our resources towards understanding human health.","sidebar":"docs"},"introduction/dependencies":{"id":"introduction/dependencies","title":"Dependencies","description":"All of the following dependencies have been included in this repository.","sidebar":"docs"},"introduction/getting-started":{"id":"introduction/getting-started","title":"Getting Started","description":"Nirvana is written in C# using .NET Core (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files.","sidebar":"docs"},"introduction/introduction":{"id":"introduction/introduction","title":"Introduction","description":"Clinical-grade variant annotation","sidebar":"docs"},"introduction/parsing-json":{"id":"introduction/parsing-json","title":"Parsing Nirvana JSON","description":"Why JSON?","sidebar":"docs"},"utilities/jasix":{"id":"utilities/jasix","title":"Jasix","description":"Overview","sidebar":"docs"},"utilities/sautils":{"id":"utilities/sautils","title":"SAUtils","description":"Overview","sidebar":"docs"}}}')}}]); \ No newline at end of file diff --git a/assets/js/30773d50.c3e50fab.js b/assets/js/30773d50.c3e50fab.js deleted file mode 100644 index 4d00001f..00000000 --- a/assets/js/30773d50.c3e50fab.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8384],{3905:(e,n,t)=>{t.d(n,{Zo:()=>u,kt:()=>h});var a=t(67294);function i(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function o(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function r(e){for(var n=1;n=0||(i[t]=e[t]);return i}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(i[t]=e[t])}return i}var s=a.createContext({}),c=function(e){var n=a.useContext(s),t=n;return e&&(t="function"==typeof e?e(n):r(r({},n),e)),t},u=function(e){var n=c(e.components);return a.createElement(s.Provider,{value:n},e.children)},p="mdxType",d={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},m=a.forwardRef((function(e,n){var t=e.components,i=e.mdxType,o=e.originalType,s=e.parentName,u=l(e,["components","mdxType","originalType","parentName"]),p=c(t),m=i,h=p["".concat(s,".").concat(m)]||p[m]||d[m]||o;return t?a.createElement(h,r(r({ref:n},u),{},{components:t})):a.createElement(h,r({ref:n},u))}));function h(e,n){var t=arguments,i=n&&n.mdxType;if("string"==typeof e||i){var o=t.length,r=new Array(o);r[0]=m;var l={};for(var s in n)hasOwnProperty.call(n,s)&&(l[s]=n[s]);l.originalType=e,l[p]="string"==typeof e?e:i,r[1]=l;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>r,default:()=>p,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=t(87462),i=(t(67294),t(3905));const o={title:"Annotating COVID-19"},r=void 0,l={unversionedId:"introduction/covid19",id:"version-3.17/introduction/covid19",title:"Annotating COVID-19",description:"The Nirvana development team is mainly focused on providing annotations for the human genome. This focus allows us to maximize our resources towards understanding human health.",source:"@site/versioned_docs/version-3.17/introduction/covid19.md",sourceDirName:"introduction",slug:"/introduction/covid19",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/introduction/covid19",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/introduction/covid19.md",tags:[],version:"3.17",frontMatter:{title:"Annotating COVID-19"},sidebar:"version-3.17/docs",previous:{title:"Parsing Nirvana JSON",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/introduction/parsing-json"},next:{title:"1000 Genomes",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/1000Genomes"}},s=[{value:"Getting Nirvana",id:"getting-nirvana",children:[],level:2},{value:"Downloading the COVID-19 data files",id:"downloading-the-covid-19-data-files",children:[],level:2},{value:"Download a COVID-19 VCF file",id:"download-a-covid-19-vcf-file",children:[],level:2},{value:"Running Nirvana",id:"running-nirvana",children:[],level:2},{value:"Investigating the Results",id:"investigating-the-results",children:[],level:2}],c={toc:s},u="wrapper";function p(e){let{components:n,...t}=e;return(0,i.kt)(u,(0,a.Z)({},c,t,{components:n,mdxType:"MDXLayout"}),(0,i.kt)("p",null,"The Nirvana development team is mainly focused on providing annotations for the human genome. This focus allows us to maximize our resources towards understanding human health."),(0,i.kt)("p",null,"However, nothing in our architecture prevents us from supporting other genomes. Earlier this year, we had an opportunity to put that statement to the test - we added support for annotating the ",(0,i.kt)("strong",{parentName:"p"},"SARS-CoV-2")," genome, the virus that causes the ",(0,i.kt)("strong",{parentName:"p"},"COVID-19")," disease."),(0,i.kt)("p",null,"In addition to normal transcript annotation, we also supply:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"allele frequencies"),(0,i.kt)("li",{parentName:"ul"},"protein domains")),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"SARS-CoV-2 Galaxy Project")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The allele frequencies used by Nirvana were provided by the ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/galaxyproject/SARS-CoV-2"},"SARS-CoV-2 Galaxy Project"),". This is an international effort that provides ongoing analysis of COVID-19 using Galaxy, BioConda, and public research infrastructures."))),(0,i.kt)("h2",{id:"getting-nirvana"},"Getting Nirvana"),(0,i.kt)("p",null,"If you don't have Nirvana already, please consult our ",(0,i.kt)("a",{parentName:"p",href:"getting-started"},"Getting Started")," page first."),(0,i.kt)("h2",{id:"downloading-the-covid-19-data-files"},"Downloading the COVID-19 data files"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/Covid19Data.zip"},"a data zip file")," containing new gene models, reference, and external data sources for SARS-CoV-2:"),(0,i.kt)("p",null,"Just go to the directory that contains your Nirvana ",(0,i.kt)("inlineCode",{parentName:"p"},"Data")," directory."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"cd ~/Nirvana\ncurl -O https://illumina.github.io/NirvanaDocumentation/files/Covid19Data.zip\nunzip Covid19Data.zip\n")),(0,i.kt)("h2",{id:"download-a-covid-19-vcf-file"},"Download a COVID-19 VCF file"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/Covid19Mutations.vcf.gz"},"a COVID-19 VCF file")," you can play around with:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"curl -O https://illumina.github.io/NirvanaDocumentation/files/Covid19Mutations.vcf.gz\n")),(0,i.kt)("h2",{id:"running-nirvana"},"Running Nirvana"),(0,i.kt)("p",null,"Once you have downloaded the data sets, use the following command to annotate your VCF:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/Nirvana.dll \\\n -c Data/Cache/SARS-CoV-2/SARS-CoV-2 \\\n --sd Data/SupplementaryAnnotation/SARS-CoV-2 \\\n -r Data/References/SARS-CoV-2.ASM985889v3.dat \\\n -i Covid19Mutations.vcf.gz \\\n -o Covid19Mutations\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-c")," argument specifies the cache prefix"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"--sd")," argument specifies the supplementary annotation directory"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-r")," argument specifies the compressed reference path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input VCF path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output filename prefix")),(0,i.kt)("p",null,"When running Nirvana, performance metrics are shown as it evaluates each chromosome in the input VCF file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"---------------------------------------------------------------------------\nNirvana (c) 2020 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0\n---------------------------------------------------------------------------\n\nInitialization Time Positions/s\n---------------------------------------------------------------------------\nCache 00:00:00.0\nSA Position Scan 00:00:00.0 1763\n\nReference Preload Annotation Variants/s\n---------------------------------------------------------------------------\nNC_045512 00:00:00.0 00:00:00.1 173\n\nSummary Time Percent\n---------------------------------------------------------------------------\nInitialization 00:00:00.0 2.0 %\nPreload 00:00:00.0 0.3 %\nAnnotation 00:00:00.1 6.0 %\n\nTime: 00:00:01.5\n")),(0,i.kt)("p",null,"The output will be a JSON file called ",(0,i.kt)("inlineCode",{parentName:"p"},"Covid19Mutations.json.gz"),". Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/Covid19Mutations.json.gz"},"the full JSON file"),"."),(0,i.kt)("h2",{id:"investigating-the-results"},"Investigating the Results"),(0,i.kt)("p",null,"Here's an example of what a COVID-19 variant looks like in the JSON output:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{\n "chromosome":"NC_045512.2",\n "position":27323,\n "refAllele":"C",\n "altAlleles":[\n "T"\n ],\n "filters":[\n "PASS"\n ],\n "proteinDomains":[\n {\n "start":27202,\n "end":27384,\n "proteinId":"YP_009724394.1",\n "domainId":"cl13556",\n "domainName":"Sars6 super family",\n "reciprocalOverlap":0.00546,\n "annotationOverlap":0.00546\n }\n ],\n "variants":[\n {\n "vid":"NC_045512.2-27323-C-T",\n "chromosome":"NC_045512.2",\n "begin":27323,\n "end":27323,\n "refAllele":"C",\n "altAllele":"T",\n "variantType":"SNV",\n "hgvsg":"NC_045512.2:g.27323C>T",\n "alleleFrequency":{\n "refAllele":"C",\n "altAllele":"T",\n "allAc":8,\n "allAn":1058,\n "allAf":0.007561\n },\n "transcripts":[\n {\n "transcript":"YP_009724394.1",\n "source":"RefSeq",\n "bioType":"protein_coding",\n "codons":"tCt/tTt",\n "aminoAcids":"S/F",\n "cdnaPos":"122",\n "cdsPos":"122",\n "exons":"1/1",\n "proteinPos":"41",\n "geneId":"43740572",\n "hgnc":"ORF6",\n "consequence":[\n "missense_variant"\n ],\n "hgvsc":"YP_009724394.1:c.122C>T",\n "hgvsp":"YP_009724394.1:p.(Ser41Phe)",\n "proteinId":"YP_009724394.1"\n },\n {\n "transcript":"YP_009724395.1",\n "source":"RefSeq",\n "bioType":"protein_coding",\n "geneId":"43740573",\n "hgnc":"ORF7a",\n "consequence":[\n "upstream_gene_variant"\n ],\n "proteinId":"YP_009724395.1"\n }\n ]\n }\n ]\n}\n')))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/318d7070.06855b52.js b/assets/js/318d7070.06855b52.js deleted file mode 100644 index de22838e..00000000 --- a/assets/js/318d7070.06855b52.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9836],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function i(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var s=r.createContext({}),l=function(e){var t=r.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},p=function(e){var t=l(e.components);return r.createElement(s.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,s=e.parentName,p=c(e,["components","mdxType","originalType","parentName"]),u=l(n),m=a,f=u["".concat(s,".").concat(m)]||u[m]||d[m]||o;return n?r.createElement(f,i(i({ref:t},p),{},{components:n})):r.createElement(f,i({ref:t},p))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,i=new Array(o);i[0]=m;var c={};for(var s in t)hasOwnProperty.call(t,s)&&(c[s]=t[s]);c.originalType=e,c[u]="string"==typeof e?e:a,i[1]=c;for(var l=2;l{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>c,toc:()=>s});var r=n(87462),a=(n(67294),n(3905));const o={},i=void 0,c={unversionedId:"data-sources/amino-acid-conservation-json",id:"version-3.16/data-sources/amino-acid-conservation-json",title:"amino-acid-conservation-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/amino-acid-conservation-json.md",sourceDirName:"data-sources",slug:"/data-sources/amino-acid-conservation-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/amino-acid-conservation-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/amino-acid-conservation-json.md",tags:[],version:"3.16",frontMatter:{}},s=[],l={toc:s},p="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},l,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"aminoAcidConservation": {\n "scores": [0.34]\n} \n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"aminoAcidConservation"),(0,a.kt)("td",{parentName:"tr",align:"center"},"object"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"scores"),(0,a.kt)("td",{parentName:"tr",align:"center"},"object array of doubles"),(0,a.kt)("td",{parentName:"tr",align:"left"},"percent conserved with respect to human amino acid residue. Range: 0.01 - 1.00")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/3191dbaa.1eb0c04d.js b/assets/js/3191dbaa.1eb0c04d.js deleted file mode 100644 index 2195ff7f..00000000 --- a/assets/js/3191dbaa.1eb0c04d.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8065,4091],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>g});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},p=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},c="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),c=d(n),m=r,g=c["".concat(s,".").concat(m)]||c[m]||u[m]||o;return n?a.createElement(g,i(i({ref:t},p),{},{components:n})):a.createElement(g,i({ref:t},p))}));function g(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[c]="string"==typeof e?e:r,i[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>c,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/decipher-json",id:"version-3.21/data-sources/decipher-json",title:"decipher-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/decipher-json.md",sourceDirName:"data-sources",slug:"/data-sources/decipher-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/decipher-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/decipher-json.md",tags:[],version:"3.21",frontMatter:{}},s=[],d={toc:s},p="wrapper";function c(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"decipher":[\n {\n "chromosome":"1",\n "begin":13516,\n "end":91073,\n "numDeletions":27,\n "deletionFrequency":0.675,\n "numDuplications":27,\n "duplicationFrequency":0.675,\n "sampleSize":40,\n "reciprocalOverlap": 0.27555,\n "annotationOverlap": 0.5901\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"numDeletions"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"# of observed deletions")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"deletionFrequency"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"deletion frequency")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"numDuplications"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"# of observed duplications")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"duplicationFrequency"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"duplication frequency")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sampleSize"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"total # of samples")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% annotation overlap")))))}c.isMDXComponent=!0},98723:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>i,metadata:()=>s,toc:()=>d});var a=n(87462),r=(n(67294),n(3905)),o=n(25803);const i={title:"DECIPHER"},l=void 0,s={unversionedId:"data-sources/decipher",id:"version-3.21/data-sources/decipher",title:"DECIPHER",description:"Overview",source:"@site/versioned_docs/version-3.21/data-sources/decipher.mdx",sourceDirName:"data-sources",slug:"/data-sources/decipher",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/decipher",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/decipher.mdx",tags:[],version:"3.21",frontMatter:{title:"DECIPHER"},sidebar:"docs",previous:{title:"dbSNP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/dbsnp"},next:{title:"FusionCatcher",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/fusioncatcher"}},d=[{value:"Overview",id:"overview",children:[{value:"TSV Extraction",id:"tsv-extraction",children:[{value:"Parsing",id:"parsing",children:[],level:4}],level:3}],level:2},{value:"Download URL",id:"download-url",children:[{value:"JSON output",id:"json-output",children:[],level:3}],level:2}],p={toc:d},c="wrapper";function u(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://www.deciphergenomics.org/"},"DECIPHER")," (DatabasE of genomiC varIation and Phenotype in Humans using Ensembl Resources) is an interactive web-based database which incorporates a suite of tools designed to aid the interpretation of genomic variants."),(0,r.kt)("p",null,"DECIPHER enhances clinical diagnosis by retrieving information from a variety of bioinformatics resources relevant to the variant found in the patient. The patient's variant is displayed in the context of both normal variation and pathogenic variation reported at that locus thereby facilitating interpretation."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"DECIPHER: Database of Chromosomal Imbalance and Phenotype in Humans using Ensembl Resources. Firth, H.V. et al., 2009. Am.J.Hum.Genet 84, 524-533 (DOI: dx.doi.org/10/1016/j.ajhg.2009.03.010)"))),(0,r.kt)("h3",{id:"tsv-extraction"},"TSV Extraction"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#population_cnv_id chr start end deletion_observations deletion_frequency deletion_standard_error duplication_observations duplication_frequency duplication_standard_error observations frequency standard_error type sample_size study\n1 1 10529 177368 0 0 1 3 0.075 0.555277708 3 0.075 0.555277708 1 40 42M calls\n2 1 13516 91073 0 0 1 27 0.675 0.109713431 27 0.675 0.109713431 1 40 42M calls\n3 1 18888 35451 0 0 1 2 0.002366864 0.706269473 2 0.002366864 0.706269473 1 845 DDD\n")),(0,r.kt)("h4",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"We parse the DECIPHER tsv file and extract the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"chr"),(0,r.kt)("li",{parentName:"ul"},"start"),(0,r.kt)("li",{parentName:"ul"},"end"),(0,r.kt)("li",{parentName:"ul"},"deletion_observations"),(0,r.kt)("li",{parentName:"ul"},"deletion_frequency"),(0,r.kt)("li",{parentName:"ul"},"duplication_observations"),(0,r.kt)("li",{parentName:"ul"},"duplication_frequency"),(0,r.kt)("li",{parentName:"ul"},"sample_size")),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://www.deciphergenomics.org/files/downloads/population_cnv_grch38.txt.gz"},"https://www.deciphergenomics.org/files/downloads/population_cnv_grch38.txt.gz"),"\n",(0,r.kt)("a",{parentName:"p",href:"https://www.deciphergenomics.org/files/downloads/population_cnv_grch37.txt.gz"},"https://www.deciphergenomics.org/files/downloads/population_cnv_grch37.txt.gz")),(0,r.kt)("h3",{id:"json-output"},"JSON output"),(0,r.kt)(o.default,{mdxType:"JSON"}))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/324cae3c.7a90dc43.js b/assets/js/324cae3c.7a90dc43.js deleted file mode 100644 index 0a85aeb0..00000000 --- a/assets/js/324cae3c.7a90dc43.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9742,6974,2439,8808,9836,8947,7138,2520,5096,7043,2154,882,6132,3103,104,2137,4005,1212],{3905:(t,e,a)=>{a.d(e,{Zo:()=>s,kt:()=>k});var n=a(67294);function r(t,e,a){return e in t?Object.defineProperty(t,e,{value:a,enumerable:!0,configurable:!0,writable:!0}):t[e]=a,t}function l(t,e){var a=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),a.push.apply(a,n)}return a}function i(t){for(var e=1;e=0||(r[a]=t[a]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,a)&&(r[a]=t[a])}return r}var p=n.createContext({}),d=function(t){var e=n.useContext(p),a=e;return t&&(a="function"==typeof t?t(e):i(i({},e),t)),a},s=function(t){var e=d(t.components);return n.createElement(p.Provider,{value:e},t.children)},m="mdxType",c={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},g=n.forwardRef((function(t,e){var a=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,s=o(t,["components","mdxType","originalType","parentName"]),m=d(a),g=r,k=m["".concat(p,".").concat(g)]||m[g]||c[g]||l;return a?n.createElement(k,i(i({ref:e},s),{},{components:a})):n.createElement(k,i({ref:e},s))}));function k(t,e){var a=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=a.length,i=new Array(l);i[0]=g;var o={};for(var p in e)hasOwnProperty.call(e,p)&&(o[p]=e[p]);o.originalType=t,o[m]="string"==typeof t?t:r,i[1]=o;for(var d=2;d{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/1000Genomes-snv-json",id:"version-3.16/data-sources/1000Genomes-snv-json",title:"1000Genomes-snv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/1000Genomes-snv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-snv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/1000Genomes-snv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/1000Genomes-snv-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":{\n "allAf":0.200879,\n "afrAf":0.210287,\n "amrAf":0.139769,\n "easAf":0.275794,\n "eurAf":0.181909,\n "sasAf":0.173824,\n "allAn":5008,\n "afrAn":1322,\n "amrAn":694,\n "easAn":1008,\n "eurAn":1006,\n "sasAn":978,\n "allAc":1006,\n "afrAc":278,\n "amrAc":97,\n "easAc":278,\n "eurAc":183,\n "sasAc":170\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the African super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the African super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the Ad Mixed American super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the Ad Mixed American super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the East Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the East Asian super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the European super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the European super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the South Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the South Asian super population. Non-zero integer.")))))}m.isMDXComponent=!0},74146:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/1000Genomes-sv-json",id:"version-3.16/data-sources/1000Genomes-sv-json",title:"1000Genomes-sv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/1000Genomes-sv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-sv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/1000Genomes-sv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/1000Genomes-sv-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":[\n {\n "chromosome":"1",\n "begin":1595369,\n "end":1612441,\n "variantType": "copy_number_variation",\n "id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",\n "allAn": 5008,\n "allAc": 2702,\n "allAf": 0.539537,\n "afrAf": 0.6052,\n "amrAf": 0.3675,\n "eurAf": 0.5357,\n "easAf": 0.5368,\n "sasAf": 0.5797,\n "reciprocalOverlap": 0.07555\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"range: 0 - 1.")))))}m.isMDXComponent=!0},99679:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/amino-acid-conservation-json",id:"version-3.16/data-sources/amino-acid-conservation-json",title:"amino-acid-conservation-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/amino-acid-conservation-json.md",sourceDirName:"data-sources",slug:"/data-sources/amino-acid-conservation-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/amino-acid-conservation-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/amino-acid-conservation-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"aminoAcidConservation": {\n "scores": [0.34]\n} \n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"aminoAcidConservation"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scores"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array of doubles"),(0,r.kt)("td",{parentName:"tr",align:"left"},"percent conserved with respect to human amino acid residue. Range: 0.01 - 1.00")))))}m.isMDXComponent=!0},53496:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clingen-dosage-json",id:"version-3.16/data-sources/clingen-dosage-json",title:"clingen-dosage-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/clingen-dosage-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-dosage-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/clingen-dosage-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/clingen-dosage-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingenDosageSensitivityMap": [{\n "chromosome": "15",\n "begin": 30900686,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 0.33994\n},\n{\n "chromosome": "15",\n "begin": 31727418,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "dosage sensitivity unlikely",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 1\n}]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingenDosageSensitivityMap"),(0,r.kt)("td",{parentName:"tr",align:null},"object array"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"haploinsufficiency"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"triplosensitivity"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"(same as haploinsufficiency)\xa0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"haploinsufficiency and triplosensitivity")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no evidence to suggest that dosage sensitivity is associated with clinical phenotype"),(0,r.kt)("li",{parentName:"ul"},"little evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,r.kt)("li",{parentName:"ul"},"emerging evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,r.kt)("li",{parentName:"ul"},"sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,r.kt)("li",{parentName:"ul"},"gene associated with autosomal recessive phenotype"),(0,r.kt)("li",{parentName:"ul"},"dosage sensitivity unlikely")))}m.isMDXComponent=!0},53379:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clingen-gene-validity-json",id:"version-3.16/data-sources/clingen-gene-validity-json",title:"clingen-gene-validity-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/clingen-gene-validity-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-gene-validity-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/clingen-gene-validity-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/clingen-gene-validity-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingenGeneValidity":[\n {\n "diseaseId":"MONDO_0007893",\n "disease":"Noonan syndrome with multiple lentigines",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n },\n {\n "diseaseId":"MONDO_0015280",\n "disease":"cardiofaciocutaneous syndrome",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingenGeneValidity"),(0,r.kt)("td",{parentName:"tr",align:null},"object"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"diseaseId"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Monarch Disease Ontology ID (MONDO)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"disease"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"disease label")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"classification"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see below for possible values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"classificationDate"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"yyyy-MM-dd")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"classification")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no reported evidence"),(0,r.kt)("li",{parentName:"ul"},"disputed"),(0,r.kt)("li",{parentName:"ul"},"limited"),(0,r.kt)("li",{parentName:"ul"},"moderate"),(0,r.kt)("li",{parentName:"ul"},"definitive"),(0,r.kt)("li",{parentName:"ul"},"strong"),(0,r.kt)("li",{parentName:"ul"},"refuted")))}m.isMDXComponent=!0},86806:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clingen-json",id:"version-3.16/data-sources/clingen-json",title:"clingen-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/clingen-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/clingen-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/clingen-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingen":[\n {\n "chromosome":"17",\n "begin":525,\n "end":14667519,\n "variantType":"copy_number_gain",\n "id":"nsv996083",\n "clinicalInterpretation":"pathogenic",\n "observedGains":1,\n "validated":true,\n "phenotypes":[\n "Intrauterine growth retardation"\n ],\n "phenotypeIds":[\n "HP:0001511",\n "MedGen:C1853481"\n ],\n "reciprocalOverlap":0.00131\n },\n {\n "chromosome":"17",\n "begin":45835,\n "end":7600330,\n "variantType":"copy_number_loss",\n "id":"nsv869419",\n "clinicalInterpretation":"pathogenic",\n "observedLosses":1,\n "validated":true,\n "phenotypes":[\n "Developmental delay AND/OR other significant developmental or morphological phenotypes"\n ],\n "reciprocalOverlap":0.00254\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingen"),(0,r.kt)("td",{parentName:"tr",align:null},"object array"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Any of the\xa0sequence alterations defined here.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Identifier from the data source. Alternatively a VID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clinicalInterpretation"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"observedGains"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,r.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"observedLosses"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,r.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"validated"),(0,r.kt)("td",{parentName:"tr",align:null},"boolean"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:null},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"Description of the phenotype.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"phenotypeIds"),(0,r.kt)("td",{parentName:"tr",align:null},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"Description of the phenotype IDs.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"clinicalInterpretation")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"curated benign"),(0,r.kt)("li",{parentName:"ul"},"curated pathogenic"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"path gain"),(0,r.kt)("li",{parentName:"ul"},"path loss"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"uncertain")))}m.isMDXComponent=!0},48247:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clinvar-json",id:"version-3.16/data-sources/clinvar-json",title:"clinvar-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/clinvar-json.md",sourceDirName:"data-sources",slug:"/data-sources/clinvar-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/clinvar-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/clinvar-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "id":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "significance":[\n "benign"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "lastUpdatedDate":"2020-03-01",\n "isAlleleSpecific":true\n },\n {\n "id":"RCV000030258.4",\n "variationId":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "alleleOrigins":[\n "germline"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "phenotypes":[\n "Lynch syndrome"\n ],\n "medGenIds":[\n "C1333990"\n ],\n "omimIds":[\n "120435"\n ],\n "significance":[\n "benign"\n ],\n "lastUpdatedDate":"2017-05-01",\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variationId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar VCV ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"reviewStatus"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"alleleOrigins"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"medGenIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MedGen IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"omimIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"orphanetIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Orphanet IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"significance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"lastUpdatedDate"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the ClinVar alternate allele")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"reviewStatus:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no assertion provided"),(0,r.kt)("li",{parentName:"ul"},"no assertion criteria provided"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, single submitter"),(0,r.kt)("li",{parentName:"ul"},"practice guideline"),(0,r.kt)("li",{parentName:"ul"},"classified by multiple submitters"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, conflicting interpretations"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, multiple submitters, no conflicts"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"alleleOrigins:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"unknown"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"germline"),(0,r.kt)("li",{parentName:"ul"},"somatic"),(0,r.kt)("li",{parentName:"ul"},"inherited"),(0,r.kt)("li",{parentName:"ul"},"paternal"),(0,r.kt)("li",{parentName:"ul"},"maternal"),(0,r.kt)("li",{parentName:"ul"},"de-novo"),(0,r.kt)("li",{parentName:"ul"},"biparental"),(0,r.kt)("li",{parentName:"ul"},"uniparental"),(0,r.kt)("li",{parentName:"ul"},"not-tested"),(0,r.kt)("li",{parentName:"ul"},"tested-inconclusive")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"significance:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"uncertain significance"),(0,r.kt)("li",{parentName:"ul"},"not provided"),(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"drug response"),(0,r.kt)("li",{parentName:"ul"},"histocompatibility"),(0,r.kt)("li",{parentName:"ul"},"association"),(0,r.kt)("li",{parentName:"ul"},"risk factor"),(0,r.kt)("li",{parentName:"ul"},"protective"),(0,r.kt)("li",{parentName:"ul"},"affects"),(0,r.kt)("li",{parentName:"ul"},"conflicting data from submitters"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant"),(0,r.kt)("li",{parentName:"ul"},"conflicting interpretations of pathogenicity")))}m.isMDXComponent=!0},69841:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/dbsnp-json",id:"version-3.16/data-sources/dbsnp-json",title:"dbsnp-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/dbsnp-json.md",sourceDirName:"data-sources",slug:"/data-sources/dbsnp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/dbsnp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/dbsnp-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"dbsnp":[\n "rs1042821"\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"dbsnp"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"dbSNP rsIDs")))))}m.isMDXComponent=!0},81633:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gnomad-lof-json",id:"version-3.16/data-sources/gnomad-lof-json",title:"gnomad-lof-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/gnomad-lof-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-lof-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/gnomad-lof-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/gnomad-lof-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD":{ \n "pLi":1.00e0,\n "pNull":8.94e-40,\n "pRec":1.84e-16,\n "synZ":-8.44e-2,\n "misZ":5.96e-1,\n "loeuf":1.13e0\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"pLi"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"pNull"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"pRec"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"synZ"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"misZ"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))))}m.isMDXComponent=!0},87602:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gnomad-small-variants-json",id:"version-3.16/data-sources/gnomad-small-variants-json",title:"gnomad-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/gnomad-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/gnomad-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/gnomad-small-variants-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomad":{ \n "coverage":20,\n "allAf":0.190317,\n "maleAf":0.193,\n "femaleAf": 0.1935,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "maleAn":15096,\n "femaleAn":15700\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "maleAc":2930,\n "femaleAc": 2931,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "maleHc":280,\n "femaleHc":281,\n "controlsAllAf":0.190317,\n "controlsAllAn":30796,\n "controlsAllAc":5861,\n "lowComplexityRegion":true,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"coverage"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"controlsAllAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the controls subset. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for male population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for female population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"controlsAllAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the controls subset. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for male population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for female population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"controlsAllAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the controls subset. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the South Asian population Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the South Asian population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the South Asian population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"lowComplexityRegion"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant is located in a low complexity region.")))))}m.isMDXComponent=!0},35270:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/mitomap-small-variants-json",id:"version-3.16/data-sources/mitomap-small-variants-json",title:"mitomap-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/mitomap-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/mitomap-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/mitomap-small-variants-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "refAllele":"G",\n "altAllele":"A",\n "diseases":[ \n "Bipolar disorder",\n "Melanoma"\n ],\n "hasHomoplasmy":false,\n "hasHeteroplasmy":true,\n "status":"Reported",\n "clinicalSignificance":"confirmed pathogenic",\n "scorePercentile":83.30,\n "numGenBankFullLengthSeqs":2,\n "pubMedIds":["2316527","6299878","6301949"],\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"diseases"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"associated diseases")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHomoplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHeteroplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"status"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"record status")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"clinicalSignificance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"predicted pathogenicity")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MitoTIP score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numGenBankFullLengthSeqs"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"# of GenBank full-length sequences")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the MITOMAP alternate allele")))))}m.isMDXComponent=!0},96502:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/mitomap-structural-variants-json",id:"version-3.16/data-sources/mitomap-structural-variants-json",title:"mitomap-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/mitomap-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/mitomap-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/mitomap-structural-variants-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "chromosome":"MT",\n "begin":3166,\n "end":14152,\n "variantType":"deletion",\n "reciprocalOverlap":0.18068,\n "annotationOverlap":0.42405\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"end"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")))))}m.isMDXComponent=!0},55654:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/omim-json",id:"version-3.16/data-sources/omim-json",title:"omim-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/omim-json.md",sourceDirName:"data-sources",slug:"/data-sources/omim-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/omim-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/omim-json.md",tags:[],version:"3.16",frontMatter:{}},p=[{value:"Phenotype",id:"phenotype",children:[],level:4},{value:"Mapping",id:"mapping",children:[],level:4},{value:"Inheritance",id:"inheritance",children:[],level:4},{value:"Comments",id:"comments",children:[],level:4}],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"omim":[ \n { \n "mimNumber":600678,\n "geneName":"MutS, E. coli, homolog of, 6",\n "description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",\n "phenotypes":[ \n { \n "mimNumber":614350,\n "phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",\n "description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal dominant"\n ]\n },\n { \n "mimNumber":608089,\n "phenotype":"Endometrial cancer, familial",\n "mapping":"molecular basis of the disorder is known"\n },\n { \n "mimNumber":276300,\n "phenotype":"Mismatch repair cancer syndrome",\n "description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal recessive"\n ],\n "comments" : [\n "contribute to susceptibility to multifactorial disorders or to susceptibility to infection",\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM ID for gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneName"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene name")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"left"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#phenotype"},"Phenotype entry below"))))),(0,r.kt)("h4",{id:"phenotype"},"Phenotype"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mapping"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#mapping"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"inheritance"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#inheritance"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"comments"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#comments"},"possible values below"))))),(0,r.kt)("h4",{id:"mapping"},"Mapping"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"disorder was positioned by mapping of the wild type gene"),(0,r.kt)("li",{parentName:"ol"},"disease phenotype itself was mapped"),(0,r.kt)("li",{parentName:"ol"},"molecular basis of the disorder is known"),(0,r.kt)("li",{parentName:"ol"},"disorder is a chromosome deletion or duplication syndrome")),(0,r.kt)("h4",{id:"inheritance"},"Inheritance"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"autosomal recessive"),(0,r.kt)("li",{parentName:"ul"},"autosomal dominant")),(0,r.kt)("h4",{id:"comments"},"Comments"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"contributes to the susceptibility to multifactorial disorders"),(0,r.kt)("li",{parentName:"ul"},"variations that lead to apparently abnormal laboratory test values"),(0,r.kt)("li",{parentName:"ul"},"unconfirmed mapping")))}m.isMDXComponent=!0},3301:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/primate-ai-json",id:"version-3.16/data-sources/primate-ai-json",title:"primate-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/primate-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/primate-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/primate-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/primate-ai-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI":[\n {\n "hgnc":"TP53",\n "scorePercentile":0.3,\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))))}m.isMDXComponent=!0},91945:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/revel-json",id:"version-3.16/data-sources/revel-json",title:"revel-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/revel-json.md",sourceDirName:"data-sources",slug:"/data-sources/revel-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/revel-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/revel-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"revel":{ \n "score":0.027\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"score"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1.0")))))}m.isMDXComponent=!0},94791:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/splice-ai-json",id:"version-3.16/data-sources/splice-ai-json",title:"splice-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/splice-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/splice-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/splice-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/splice-ai-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"spliceAI":[ \n {\n "hgnc":"BLCAP",\n "acceptorGainDistance":-3,\n "acceptorGainScore":0.3,\n "donorLossDistance":7,\n "donorLossScore":0.9\n },\n { \n "hgnc":"NNAT",\n "acceptorGainDistance":-1,\n "acceptorGainScore":0.2,\n "donorGainDistance":-2,\n "donorGainScore":0.3\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")))))}m.isMDXComponent=!0},42472:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/topmed-json",id:"version-3.16/data-sources/topmed-json",title:"topmed-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/topmed-json.md",sourceDirName:"data-sources",slug:"/data-sources/topmed-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/topmed-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/topmed-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"topmed":{ \n "allAc":20,\n "allAn":125568,\n "allAf":0.000159,\n "allHc":0,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele number. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele frequency (computed by Nirvana)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed homozygous count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}m.isMDXComponent=!0},69871:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>w,default:()=>M,frontMatter:()=>A,metadata:()=>j,toc:()=>I});var n=a(87462),r=(a(67294),a(3905)),l=a(99679),i=a(48247),o=a(86806),p=a(53496),d=a(53379),s=a(69841),m=a(3301),c=a(91945),g=a(94791),k=a(35270),N=a(96502),u=a(87602),f=a(81633),y=a(43853),h=a(74146),v=a(55654),b=a(42472);const A={title:"Nirvana JSON File Format"},w=void 0,j={unversionedId:"file-formats/nirvana-json-file-format",id:"version-3.16/file-formats/nirvana-json-file-format",title:"Nirvana JSON File Format",description:"Overview",source:"@site/versioned_docs/version-3.16/file-formats/nirvana-json-file-format.mdx",sourceDirName:"file-formats",slug:"/file-formats/nirvana-json-file-format",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/file-formats/nirvana-json-file-format",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/file-formats/nirvana-json-file-format.mdx",tags:[],version:"3.16",frontMatter:{title:"Nirvana JSON File Format"},sidebar:"version-3.16/docs",previous:{title:"TOPMed",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/topmed"},next:{title:"Custom Annotations",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/file-formats/custom-annotations"}},I=[{value:"Overview",id:"overview",children:[{value:"Conventions",id:"conventions",children:[],level:3},{value:"JSON Layout",id:"json-layout",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Header",id:"header",children:[{value:"Data Source",id:"data-source",children:[],level:4},{value:"Genome Assemblies",id:"genome-assemblies",children:[],level:4}],level:2},{value:"Positions",id:"positions",children:[{value:"ClinGen",id:"clingen",children:[],level:3},{value:"1000 Genomes (SV)",id:"1000-genomes-sv",children:[],level:3},{value:"MITOMAP (SV)",id:"mitomap-sv",children:[],level:3}],level:2},{value:"Samples",id:"samples",children:[],level:2},{value:"Variants",id:"variants",children:[{value:"Transcripts",id:"transcripts",children:[{value:"PolyPhen",id:"polyphen",children:[],level:4},{value:"SIFT",id:"sift",children:[],level:4},{value:"Amino Acid Conservation",id:"amino-acid-conservation",children:[],level:4},{value:"Gene Fusions",id:"gene-fusions",children:[],level:4},{value:"Fusion",id:"fusion",children:[],level:4}],level:3},{value:"Regulatory Regions",id:"regulatory-regions",children:[{value:"Regulatory Types",id:"regulatory-types",children:[],level:4},{value:"Regulatory Consequences",id:"regulatory-consequences",children:[],level:4}],level:3},{value:"ClinVar",id:"clinvar",children:[],level:3},{value:"1000 Genomes",id:"1000-genomes",children:[],level:3},{value:"gnomAD",id:"gnomad",children:[],level:3},{value:"dbSNP",id:"dbsnp",children:[],level:3},{value:"MITOMAP",id:"mitomap",children:[],level:3},{value:"Primate AI",id:"primate-ai",children:[],level:3},{value:"REVEL",id:"revel",children:[],level:3},{value:"Splice AI",id:"splice-ai",children:[],level:3},{value:"TOPMed",id:"topmed",children:[],level:3}],level:2},{value:"Genes",id:"genes",children:[{value:"OMIM",id:"omim",children:[],level:3},{value:"gnomAD LoF Gene Metrics",id:"gnomad-lof-gene-metrics",children:[],level:3},{value:"ClinGen Disease Validity",id:"clingen-disease-validity",children:[],level:3}],level:2}],D={toc:I},T="wrapper";function M(t){let{components:e,...A}=t;return(0,r.kt)(T,(0,n.Z)({},D,A,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("h3",{id:"conventions"},"Conventions"),(0,r.kt)("p",null,"In the Nirvana JSON representation, we try to maximize the amount of useful information that is relayed in the output file. As such, we have several conventions that are useful to know about:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"With boolean key/value pairs, we only output the keys that have a true value. I.e. there's no reason to display ",(0,r.kt)("inlineCode",{parentName:"li"},'"isStructuralVariant":false')," a few million times when annotating a small variant VCF."),(0,r.kt)("li",{parentName:"ul"},"When transferring data from the VCF file to the JSON (e.g. for allele depths (AD)), it is common to use a period (.) as a placeholder for missing data in the VCF file. Nirvana treats periods like empty or null strings and therefore will not output those entries.")),(0,r.kt)("h3",{id:"json-layout"},"JSON Layout"),(0,r.kt)("p",null,(0,r.kt)("img",{src:a(48978).Z})),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"In general, each position corresponds to a row in the original VCF file."),(0,r.kt)("p",{parentName:"div"},"For each gene that was referenced in the transcripts found in the positions section, there will be additional gene-level annotation in the gene section."))),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"We've put together a ",(0,r.kt)("a",{parentName:"p",href:"../introduction/parsing-json"},"new section that discusses how to parse our JSON files")," easily using examples in a ",(0,r.kt)("a",{parentName:"p",href:"https://github.com/Illumina/NirvanaDocumentation/blob/master/static/files/parse-nirvana-json-python.ipynb"},"Python Jupyter notebook")," and a ",(0,r.kt)("a",{parentName:"p",href:"https://github.com/Illumina/NirvanaDocumentation/blob/master/static/files/parse-nirvana-json-r.ipynb"},"R version")," as well. In addition, we have information about how to quickly dump content from our JSON file using a tabix-like utility called JASIX."))),(0,r.kt)("h2",{id:"header"},"Header"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'{\n "header":{\n "annotator":"Nirvana 3.0.0-alpha.5+g6c52e247",\n "creationTime":"2017-06-14 15:53:13",\n "genomeAssembly":"GRCh37",\n "dataSources":[\n {\n "name":"OMIM",\n "version":"unknown",\n "description":"An Online Catalog of Human Genes and Genetic Disorders",\n "releaseDate":"2017-05-03"\n },\n {\n "name":"VEP",\n "version":"84",\n "description":"BothRefSeqAndEnsembl",\n "releaseDate":"2017-01-16"\n },\n {\n "name":"ClinVar",\n "version":"20170503",\n "description":"A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence",\n "releaseDate":"2017-05-03"\n },\n {\n "name":"phyloP",\n "version":"hg19",\n "description":"46 way conservation score between humans and 45 other vertebrates",\n "releaseDate":"2009-11-10"\n }\n ],\n "samples":[\n "NA12878",\n "NA12891",\n "NA12892"\n ]\n },\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"annotator"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"the name of the annotator and the current version")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"creationTime"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd hh:mm:ss")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genomeAssembly"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#genome-assemblies"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"schemaVersion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"incremented whenever the core structure of the JSON file introduces breaking changes")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"dataVersion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"dataSources"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#data-source"},"Data Source entry below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"samples"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"the order of these sample names will be used throughout the JSON file when enumerating samples")))),(0,r.kt)("h4",{id:"data-source"},"Data Source"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"name"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"version"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"optional description of the data source")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"releaseDate"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")))),(0,r.kt)("h4",{id:"genome-assemblies"},"Genome Assemblies"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"GRCh37"),(0,r.kt)("li",{parentName:"ul"},"GRCh38"),(0,r.kt)("li",{parentName:"ul"},"hg19"),(0,r.kt)("li",{parentName:"ul"},"SARSCoV2")),(0,r.kt)("h2",{id:"positions"},"Positions"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"positions":[\n {\n "chromosome":"chr2",\n "position":48010488,\n "repeatUnit":"GGCCCC",\n "refRepeatCount":3,\n "svEnd":48020488,\n "refAllele":"G",\n "altAlleles":[\n "A",\n "GT"\n ],\n "quality":461,\n "filters":[\n "PASS"\n ],\n "ciPos":[\n -170,\n 170\n ],\n "ciEnd":[\n -175,\n 175\n ],\n "svLength":1000,\n "strandBias":1.23,\n "jointSomaticNormalQuality":29,\n "cytogeneticBand":"2p16.3",\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Variant Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"position"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf (1-based notation). Range: 1 - 250 million")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"repeatUnit"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"STR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by ExpansionHunter")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refRepeatCount"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"STR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by ExpansionHunter")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"svEnd"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"quality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf (Normally an integer, but some variant callers using floating point. Has been observed as high as 500k)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"filters"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ciPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ciEnd"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"svLength"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"strandBias"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"small variant"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by GATK (from SB)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"jointSomaticNormalQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by the Manta variant caller (SOMATICSCORE)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cytogeneticBand"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"e.g. 17p13.1")))),(0,r.kt)("h3",{id:"clingen"},"ClinGen"),(0,r.kt)(o.default,{mdxType:"ClinGen"}),(0,r.kt)(p.default,{mdxType:"ClinGenDosage"}),(0,r.kt)("h3",{id:"1000-genomes-sv"},"1000 Genomes (SV)"),(0,r.kt)(h.default,{mdxType:"ThousandGenomesSV"}),(0,r.kt)("h3",{id:"mitomap-sv"},"MITOMAP (SV)"),(0,r.kt)(N.default,{mdxType:"MitoMapSV"}),(0,r.kt)("h2",{id:"samples"},"Samples"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"samples":[\n {\n "genotype":"0/1",\n "variantFrequencies":[\n 0.333,\n 0.5\n ],\n "totalDepth":57,\n "genotypeQuality":12,\n "copyNumber":3,\n "repeatUnitCounts":[\n 10,\n 20\n ],\n "alleleDepths":[\n 10,\n 20,\n 30\n ],\n "failedFilter":true,\n "splitReadCounts":[\n 10,\n 20\n ],\n "pairedEndReadCounts":[\n 10,\n 20\n ],\n "isDeNovo":true,\n "diseaseAffectedStatuses":[\n "-"\n ],\n "artifactAdjustedQualityScore":89.3,\n "likelihoodRatioQualityScore":78.2,\n "heteroplasmyPercentile":[\n 23.13,\n 12.65\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genotype"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantFrequencies"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. One value per alternate allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"totalDepth"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genotypeQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values. Typically maxes out at 99")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"copyNumber"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"repeatUnitCounts"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ExpansionHunter-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"alleleDepths"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"splitReadCounts"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Manta-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pairedEndReadCounts"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Manta-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isDeNovo"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"diseaseAffectedStatuses"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ExpansionHunter-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"artifactAdjustedQualityScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PEPE-specific. Range: 0 - 100.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"likelihoodRatioQualityScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PEPE-specific. Range: 0 - 100.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"heteroplasmyPercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 100. 2 decimal places. One value per alternate allele")))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Empty Samples")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"If a sample does not contain any entries, we will create a sample object that contains the ",(0,r.kt)("inlineCode",{parentName:"p"},"isEmpty")," key. This ensures that sample ordering is preserved while indicating that a sample is intentionally empty."),(0,r.kt)("pre",{parentName:"div"},(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"samples":[\n {\n "isEmpty":true\n }\n],\n')))),(0,r.kt)("h2",{id:"variants"},"Variants"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"variants":[\n {\n "vid":"2:48010488:A",\n "chromosome":"chr2",\n "begin":48010488,\n "end":48010488,\n "isReferenceMinorAllele":true,\n "isStructuralVariant":true,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "isDecomposedVariant":true,\n "isRecomposedVariant":true,\n "linkedVids":["2:48010488:GTA:ATC"],\n "hgvsg":"NC_000002.11:g.48010488G>A",\n "phylopScore":0.459\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"vid"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"../core-functionality/variant-ids"},"Variant Identifiers"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"1-based non-negative integer values. Range: 1 - 250 million")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"end"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"1-based non-negative integer values. Range: 1 - 250 million")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isReferenceMinorAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this is a reference minor allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isStructuralVariant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant is a structural variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"inLowComplexityRegion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant lies in a low complexity region (gnomAD low complexity regions)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"parsimonious representation of the reference allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"parsimonious representation of the alternate allele.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"uses\xa0",(0,r.kt)("a",{parentName:"td",href:"http://www.sequenceontology.org/browser/current_svn/term/SO:0001059"},"Sequence Ontology sequence alterations"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isDecomposedVariant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the decomposed variant has been used to create another recomposed variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isRecomposedVariant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant is recomposed from two or more decomposed variants")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"linkedVids"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"list of ",(0,r.kt)("a",{parentName:"td",href:"../core-functionality/variant-ids"},"VIDs")," for variants connecting decomposed and recomposed variants")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsg"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS g. notation")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phylopScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"phyloP conservation score. Range: -14.08 to 6.424")))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Reference Minor Alleles")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Nirvana supports annotating reference minor alleles. In such a case, ",(0,r.kt)("inlineCode",{parentName:"p"},"refAllele")," will be replaced by the global major allele and ",(0,r.kt)("inlineCode",{parentName:"p"},"altAllele")," will be replaced with the original reference allele."))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Flagging Decomposed & Recomposed Variants")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"When two or more decomposed variants are recomposed into an MNV, the decomposed variants will be marked with ",(0,r.kt)("inlineCode",{parentName:"p"},'"isDecomposedVariant":true'),"."),(0,r.kt)("p",{parentName:"div"},"Similarly, the recomposed variant will be shown as a new VCF position. This recomposed variant will be flagged with ",(0,r.kt)("inlineCode",{parentName:"p"},'"isRecomposedVariant":true'),"."))),(0,r.kt)("h3",{id:"transcripts"},"Transcripts"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"transcripts":[\n {\n "transcript":"ENST00000445503.1",\n "source":"Ensembl",\n "bioType":"nonsense_mediated_decay",\n "codons":"gGg/gAg",\n "aminoAcids":"G/E",\n "cdnaPos":"268",\n "cdsPos":"116",\n "exons":"1/9",\n "introns":"1/8",\n "proteinPos":"39",\n "geneId":"ENSG00000116062",\n "hgnc":"MSH6",\n "consequence":[\n "missense_variant",\n "NMD_transcript_variant"\n ],\n "hgvsc":"ENST00000445503.1:c.116G>A",\n "hgvsp":"ENSP00000405294.1:p.(Gly39Glu)",\n "geneFusion":{\n "exon":6,\n "intron":5,\n "fusions":[\n {\n "hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000437180.1}:c.58+568_1443",\n "exon":3,\n "intron":2\n },\n {\n "hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000300305.3}:c.58+568_1443",\n "exon":2,\n "intron":1\n }\n ]\n },\n "isCanonical":true,\n "polyPhenScore":0.95,\n "polyPhenPrediction":"probably damaging",\n "proteinId":"ENSP00000405294.1",\n "siftScore":0.61,\n "siftPrediction":"tolerated",\n "completeOverlap":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"transcript"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"transcript ID. e.g. ENST00000445503.1")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"source"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"RefSeq / Ensembl")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"bioType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"descriptions of the ",(0,r.kt)("a",{parentName:"td",href:"https://uswest.ensembl.org/info/genome/genebuild/biotypes.html"},"biotypes from Ensembl"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"codons"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"aminoAcids"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cdnaPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cdsPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"exons"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exons affected by the variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"introns"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"introns affected by the variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"proteinPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene ID. e.g. ENSG00000116062")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"consequence"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://www.sequenceontology.org/browser/obob.cgi"},"Sequence Ontology Consequences"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS coding nomenclature")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsp"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS protein nomenclature")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneFusion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#gene-fusions"},"Gene Fusions entry below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isCanonical"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this is a canonical transcript")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"polyPhenScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"polyPhenPrediction"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#polyphen"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"proteinId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"protein ID. E.g. ENSP00000405294.1")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"siftScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"siftPrediction"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#sift"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"completeOverlap"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this transcript is completely overlapped by the variant")))),(0,r.kt)("h4",{id:"polyphen"},"PolyPhen"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"probably damaging"),(0,r.kt)("li",{parentName:"ul"},"possibly damaging"),(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"unknown")),(0,r.kt)("h4",{id:"sift"},"SIFT"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"tolerated"),(0,r.kt)("li",{parentName:"ul"},"deleterious"),(0,r.kt)("li",{parentName:"ul"},"tolerated - low confidence"),(0,r.kt)("li",{parentName:"ul"},"deleterious - low confidence")),(0,r.kt)("h4",{id:"amino-acid-conservation"},"Amino Acid Conservation"),(0,r.kt)(l.default,{mdxType:"AminoAcidConservation"}),(0,r.kt)("h4",{id:"gene-fusions"},"Gene Fusions"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"exon"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual exon where the breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"intron"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual intron where the breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"fusions"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#fusion"},"Fusion entry below"))))),(0,r.kt)("h4",{id:"fusion"},"Fusion"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"exon"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual exon where the other breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"intron"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual intron where the other breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS coding nomenclature describing the two genes and the transcripts that are fused along with")))),(0,r.kt)("h3",{id:"regulatory-regions"},"Regulatory Regions"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"regulatoryRegions":[\n {\n "id":"ENSR00001542175",\n "type":"promoter",\n "consequence":[\n "regulatory_region_variant"\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"type"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see ",(0,r.kt)("a",{parentName:"td",href:"#regulatory-types"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"consequence"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"see ",(0,r.kt)("a",{parentName:"td",href:"#regulatory-consequences"},"possible values below"))))),(0,r.kt)("h4",{id:"regulatory-types"},"Regulatory Types"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"CTCF_binding_site"),(0,r.kt)("li",{parentName:"ul"},"enhancer"),(0,r.kt)("li",{parentName:"ul"},"open_chromatin_region"),(0,r.kt)("li",{parentName:"ul"},"promoter"),(0,r.kt)("li",{parentName:"ul"},"promoter_flanking_region"),(0,r.kt)("li",{parentName:"ul"},"TF_binding_site")),(0,r.kt)("h4",{id:"regulatory-consequences"},"Regulatory Consequences"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"regulatory_region_variant"),(0,r.kt)("li",{parentName:"ul"},"regulatory_region_ablation"),(0,r.kt)("li",{parentName:"ul"},"regulatory_region_amplification"),(0,r.kt)("li",{parentName:"ul"},"regulatory_region_truncation")),(0,r.kt)("h3",{id:"clinvar"},"ClinVar"),(0,r.kt)(i.default,{mdxType:"ClinVar"}),(0,r.kt)("h3",{id:"1000-genomes"},"1000 Genomes"),(0,r.kt)(y.default,{mdxType:"ThousandGenomesSmall"}),(0,r.kt)("h3",{id:"gnomad"},"gnomAD"),(0,r.kt)(u.default,{mdxType:"GnomadSmall"}),(0,r.kt)("h3",{id:"dbsnp"},"dbSNP"),(0,r.kt)(s.default,{mdxType:"DbSNP"}),(0,r.kt)("h3",{id:"mitomap"},"MITOMAP"),(0,r.kt)(k.default,{mdxType:"MitoMapSmall"}),(0,r.kt)("h3",{id:"primate-ai"},"Primate AI"),(0,r.kt)(m.default,{mdxType:"PrimateAI"}),(0,r.kt)("h3",{id:"revel"},"REVEL"),(0,r.kt)(c.default,{mdxType:"REVEL"}),(0,r.kt)("h3",{id:"splice-ai"},"Splice AI"),(0,r.kt)(g.default,{mdxType:"SpliceAI"}),(0,r.kt)("h3",{id:"topmed"},"TOPMed"),(0,r.kt)(b.default,{mdxType:"TOPMed"}),(0,r.kt)("h2",{id:"genes"},"Genes"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"genes":[\n {\n "name":"MSH6",\n "hgncId":7329,\n "summary":"This gene encodes a member of the DNA mismatch repair MutS family. In E. coli, the MutS protein helps in the recognition of mismatched nucleotides prior to their repair. A highly conserved region of approximately 150 aa, called the Walker-A adenine nucleotide binding motif, exists in MutS homologs. The encoded protein heterodimerizes with MSH2 to form a mismatch recognition complex that functions as a bidirectional molecular switch that exchanges ADP and ATP as DNA mismatches are bound and dissociated. Mutations in this gene may be associated with hereditary nonpolyposis colon cancer, colorectal cancer, and endometrial cancer. Transcripts variants encoding different isoforms have been described. [provided by RefSeq, Jul 2013]",\n /* this is where gene-level data sources can be found e.g. OMIM */\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"name"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgncId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"summary"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"short description of the gene from ",(0,r.kt)("a",{parentName:"td",href:"https://www.omim.org/"},"OMIM"))))),(0,r.kt)("h3",{id:"omim"},"OMIM"),(0,r.kt)(v.default,{mdxType:"Omim"}),(0,r.kt)("h3",{id:"gnomad-lof-gene-metrics"},"gnomAD LoF Gene Metrics"),(0,r.kt)(f.default,{mdxType:"GnomadGeneLof"}),(0,r.kt)("h3",{id:"clingen-disease-validity"},"ClinGen Disease Validity"),(0,r.kt)(d.default,{mdxType:"ClinGenDiseaseValidity"}))}M.isMDXComponent=!0},48978:(t,e,a)=>{a.d(e,{Z:()=>n});const n=a.p+"assets/images/JSON-Layout-fc8e5c0cf4c8428981cd206fe9b6feac.svg"}}]); \ No newline at end of file diff --git a/assets/js/327ce96f.1126124a.js b/assets/js/327ce96f.1126124a.js deleted file mode 100644 index 9a80555c..00000000 --- a/assets/js/327ce96f.1126124a.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5791,525],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>v});var a=n(67294);function l(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(l[n]=e[n]);return l}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(l[n]=e[n])}return l}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},m="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,l=e.mdxType,r=e.originalType,s=e.parentName,c=o(e,["components","mdxType","originalType","parentName"]),m=d(n),u=l,v=m["".concat(s,".").concat(u)]||m[u]||p[u]||r;return n?a.createElement(v,i(i({ref:t},c),{},{components:n})):a.createElement(v,i({ref:t},c))}));function v(e,t){var n=arguments,l=t&&t.mdxType;if("string"==typeof e||l){var r=n.length,i=new Array(r);i[0]=u;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[m]="string"==typeof e?e:l,i[1]=o;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>m,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var a=n(87462),l=(n(67294),n(3905));const r={},i=void 0,o={unversionedId:"data-sources/dbsnp-json",id:"version-3.21/data-sources/dbsnp-json",title:"dbsnp-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/dbsnp-json.md",sourceDirName:"data-sources",slug:"/data-sources/dbsnp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/dbsnp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/dbsnp-json.md",tags:[],version:"3.21",frontMatter:{}},s=[],d={toc:s},c="wrapper";function m(e){let{components:t,...n}=e;return(0,l.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"dbsnp":[\n "rs1042821"\n]\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,l.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"dbsnp"),(0,l.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,l.kt)("td",{parentName:"tr",align:"left"},"dbSNP rsIDs")))))}m.isMDXComponent=!0},49814:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>i,metadata:()=>s,toc:()=>d});var a=n(87462),l=(n(67294),n(3905)),r=n(76707);const i={title:"dbSNP"},o=void 0,s={unversionedId:"data-sources/dbsnp",id:"version-3.21/data-sources/dbsnp",title:"dbSNP",description:"Overview",source:"@site/versioned_docs/version-3.21/data-sources/dbsnp.mdx",sourceDirName:"data-sources",slug:"/data-sources/dbsnp",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/dbsnp",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/dbsnp.mdx",tags:[],version:"3.21",frontMatter:{title:"dbSNP"},sidebar:"docs",previous:{title:"DANN",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/dann"},next:{title:"DECIPHER",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/decipher"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"VCF File",id:"vcf-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Global allele extraction",id:"global-allele-extraction",children:[],level:4},{value:"Equal Allele Frequency Example (2 alleles)",id:"equal-allele-frequency-example-2-alleles",children:[],level:4},{value:"Equal Allele Frequency Example (3 alleles)",id:"equal-allele-frequency-example-3-alleles",children:[],level:4},{value:"Equal Allele Frequency in Alternate Alleles",id:"equal-allele-frequency-in-alternate-alleles",children:[],level:4},{value:"Equal Allele Frequency Between Reference & Alternate Allele",id:"equal-allele-frequency-between-reference--alternate-allele",children:[],level:4}],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:d},m="wrapper";function p(e){let{components:t,...n}=e;return(0,l.kt)(m,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("h2",{id:"overview"},"Overview"),(0,l.kt)("p",null,"dbSNP contains human single nucleotide variations, microsatellites, and small-scale insertions and deletions along with publication, population frequency, molecular consequence, and genomic and RefSeq mapping information for both common variations and clinical mutations."),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Sherry, S.T., Ward, M. and Sirotkin, K. (1999) dbSNP\u2014Database for Single Nucleotide Polymorphisms and Other Classes of Minor Genetic Variation. ",(0,l.kt)("em",{parentName:"p"},"Genome Res."),", ",(0,l.kt)("strong",{parentName:"p"},"9"),", 677\u2013679."))),(0,l.kt)("h2",{id:"vcf-file"},"VCF File"),(0,l.kt)("h3",{id:"example"},"Example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 10177 rs367896724 A AC . . RS=367896724;RSPOS=10177;dbSNPBuildID=138; \\ \n SSR=0;SAO=0;VP=0x050000020005130026000200;GENEINFO=DDX11L1:100287102;WGT=1; \\\n VC=DIV;R5;ASP;G5A;G5;KGPhase3;CAF=0.5747,0.4253;COMMON=1; \\\n TOPMED=0.76728147298674821,0.23271852701325178\n")),(0,l.kt)("h3",{id:"parsing"},"Parsing"),(0,l.kt)("p",null,"From the VCF file, we're mainly interested in the following:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"rsID")," from the ",(0,l.kt)("inlineCode",{parentName:"li"},"ID")," field"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"CAF")," from the ",(0,l.kt)("inlineCode",{parentName:"li"},"INFO")," field")),(0,l.kt)("h4",{id:"global-allele-extraction"},"Global allele extraction"),(0,l.kt)("p",null,"The global major and minor alleles are extracted based on the frequency of the alleles provided in the CAF field. The global minor allele frequency is the second highest value of the CAF comma delimited field (ignoring '.' values). "),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Tie Breaking: Global Major Allele")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"If there are two candidates for global major and the reference allele is one of them, we prefer the reference allele."))),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Tie Breaking: Global Minor Allele")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"If there are two candidates for global minor and the reference allele is one of them, we prefer the other allele. If the reference allele is not involved, they are chosen arbitrarily."))),(0,l.kt)("h4",{id:"equal-allele-frequency-example-2-alleles"},"Equal Allele Frequency Example (2 alleles)"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C CAF=0.5,0.5\n")),(0,l.kt)("p",null,"We will select A to be the global major allele and C to be the global minor allele."),(0,l.kt)("h4",{id:"equal-allele-frequency-example-3-alleles"},"Equal Allele Frequency Example (3 alleles)"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C,T CAF=0.33,0.33,0.33\n")),(0,l.kt)("p",null,"We will select A to be the global major allele and either C or T is chosen (arbitrarily) to be the global minor allele."),(0,l.kt)("h4",{id:"equal-allele-frequency-in-alternate-alleles"},"Equal Allele Frequency in Alternate Alleles"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C,T CAF=0.2,0.4,0.4\n")),(0,l.kt)("p",null,"We will select C or T to be arbitrarily assigned to be the global major or global minor allele."),(0,l.kt)("h4",{id:"equal-allele-frequency-between-reference--alternate-allele"},"Equal Allele Frequency Between Reference & Alternate Allele"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C,T CAF=0.2,0.2,0.6\n")),(0,l.kt)("p",null,"We will select T to be the global major allele and C to be the global minor allele."),(0,l.kt)("h2",{id:"known-issues"},"Known Issues"),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"If there are multiple entries with different CAF values for the same allele, we use the first CAF value."))),(0,l.kt)("h2",{id:"download-url"},"Download URL"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://ftp.ncbi.nih.gov/snp/organisms/"},"https://ftp.ncbi.nih.gov/snp/organisms/")),(0,l.kt)("h2",{id:"json-output"},"JSON Output"),(0,l.kt)(r.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/32fefedd.d275c757.js b/assets/js/32fefedd.d275c757.js deleted file mode 100644 index 5a1ce430..00000000 --- a/assets/js/32fefedd.d275c757.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7560,4829],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>v});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},c=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),p=d(n),u=r,v=p["".concat(s,".").concat(u)]||p[u]||m[u]||i;return n?a.createElement(v,o(o({ref:t},c),{},{components:n})):a.createElement(v,o({ref:t},c))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[p]="string"==typeof e?e:r,o[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>i,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/revel-json",id:"version-3.17/data-sources/revel-json",title:"revel-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/revel-json.md",sourceDirName:"data-sources",slug:"/data-sources/revel-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/revel-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/revel-json.md",tags:[],version:"3.17",frontMatter:{}},s=[],d={toc:s},c="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"revel":{ \n "score":0.027\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"score"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1.0")))))}p.isMDXComponent=!0},74151:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>o,metadata:()=>s,toc:()=>d});var a=n(87462),r=(n(67294),n(3905)),i=n(33799);const o={title:"REVEL"},l=void 0,s={unversionedId:"data-sources/revel",id:"version-3.17/data-sources/revel",title:"REVEL",description:"Overview",source:"@site/versioned_docs/version-3.17/data-sources/revel.mdx",sourceDirName:"data-sources",slug:"/data-sources/revel",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/revel",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/revel.mdx",tags:[],version:"3.17",frontMatter:{title:"REVEL"},sidebar:"version-3.17/docs",previous:{title:"Primate AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/primate-ai"},next:{title:"Splice AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/splice-ai"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"CSV File",id:"csv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:d},p="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"REVEL is an ensemble method for predicting the pathogenicity of missense variants based on a combination of scores from 13 individual tools: MutPred, FATHMM v2.3, VEST 3.0, PolyPhen-2, SIFT, PROVEAN, MutationAssessor, MutationTaster, LRT, GERP++, SiPhy, phyloP, and phastCons."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Ioannidis, N. M. et al. REVEL: An Ensemble Method for Predicting the Pathogenicity of Rare Missense Variants. ",(0,r.kt)("em",{parentName:"p"},"The American Journal of Human Genetics")," ",(0,r.kt)("strong",{parentName:"p"},"99"),", 877-885 (2016). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1016/j.ajhg.2016.08.016"},"https://doi.org/10.1016/j.ajhg.2016.08.016")))),(0,r.kt)("h2",{id:"csv-file"},"CSV File"),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr,hg19_pos,grch38_pos,ref,alt,aaref,aaalt,REVEL\n1,35142,35142,G,A,T,M,0.027\n1,35142,35142,G,C,T,R,0.035\n1,35142,35142,G,T,T,K,0.043\n1,35143,35143,T,A,T,S,0.018\n1,35143,35143,T,C,T,A,0.034\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the CSV file, we're mainly interested in the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"chr")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"hg19_pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"grch38_pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"ref")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"alt")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"REVEL"))),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Sorting")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Since the input file contains positions for both GRCh37 and GRCh38, we split it into two ",(0,r.kt)("strong",{parentName:"p"},"TSV")," files (for the sake of better readability) with identical format. The positions for GRCh37 were sorted but not for GRCh38. So we re-sort the variants by position in the GRCh38 file."))),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Conflicting Scores")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"When there are multiple scores available for the same variant (i.e. the same position with the same alternative allele), we pick the highest score."))),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://sites.google.com/site/revelgenomics/downloads"},"https://sites.google.com/site/revelgenomics/downloads")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(i.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/3448d4ab.6d5761c3.js b/assets/js/3448d4ab.6d5761c3.js deleted file mode 100644 index 1c921eab..00000000 --- a/assets/js/3448d4ab.6d5761c3.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8947],{3905:(e,t,n)=>{n.d(t,{Zo:()=>s,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function i(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var l=r.createContext({}),p=function(e){var t=r.useContext(l),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},s=function(e){var t=p(e.components);return r.createElement(l.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,l=e.parentName,s=c(e,["components","mdxType","originalType","parentName"]),u=p(n),d=a,f=u["".concat(l,".").concat(d)]||u[d]||m[d]||o;return n?r.createElement(f,i(i({ref:t},s),{},{components:n})):r.createElement(f,i({ref:t},s))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,i=new Array(o);i[0]=d;var c={};for(var l in t)hasOwnProperty.call(t,l)&&(c[l]=t[l]);c.originalType=e,c[u]="string"==typeof e?e:a,i[1]=c;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>c,toc:()=>l});var r=n(87462),a=(n(67294),n(3905));const o={},i=void 0,c={unversionedId:"data-sources/primate-ai-json",id:"version-3.16/data-sources/primate-ai-json",title:"primate-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/primate-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/primate-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/primate-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/primate-ai-json.md",tags:[],version:"3.16",frontMatter:{}},l=[],p={toc:l},s="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(s,(0,r.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI":[\n {\n "hgnc":"TP53",\n "scorePercentile":0.3,\n }\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/3495e38c.e665f834.js b/assets/js/3495e38c.e665f834.js deleted file mode 100644 index 11cb2d2b..00000000 --- a/assets/js/3495e38c.e665f834.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4689,2074],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>v});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var l=a.createContext({}),c=function(e){var t=a.useContext(l),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},p=function(e){var t=c(e.components);return a.createElement(l.Provider,{value:t},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,l=e.parentName,p=s(e,["components","mdxType","originalType","parentName"]),d=c(n),u=r,v=d["".concat(l,".").concat(u)]||d[u]||m[u]||i;return n?a.createElement(v,o(o({ref:t},p),{},{components:n})):a.createElement(v,o({ref:t},p))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var s={};for(var l in t)hasOwnProperty.call(t,l)&&(s[l]=t[l]);s.originalType=e,s[d]="string"==typeof e?e:r,o[1]=s;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>s,toc:()=>l});var a=n(87462),r=(n(67294),n(3905));const i={},o=void 0,s={unversionedId:"data-sources/primate-ai-json",id:"version-3.17/data-sources/primate-ai-json",title:"primate-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/primate-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/primate-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/primate-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/primate-ai-json.md",tags:[],version:"3.17",frontMatter:{}},l=[],c={toc:l},p="wrapper";function d(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI":[\n {\n "hgnc":"TP53",\n "scorePercentile":0.3,\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))))}d.isMDXComponent=!0},34224:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>s,default:()=>m,frontMatter:()=>o,metadata:()=>l,toc:()=>c});var a=n(87462),r=(n(67294),n(3905)),i=n(18981);const o={title:"Primate AI"},s=void 0,l={unversionedId:"data-sources/primate-ai",id:"version-3.17/data-sources/primate-ai",title:"Primate AI",description:"Overview",source:"@site/versioned_docs/version-3.17/data-sources/primate-ai.mdx",sourceDirName:"data-sources",slug:"/data-sources/primate-ai",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/primate-ai",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/primate-ai.mdx",tags:[],version:"3.17",frontMatter:{title:"Primate AI"},sidebar:"version-3.17/docs",previous:{title:"PhyloP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/phylop"},next:{title:"REVEL",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/revel"}},c=[{value:"Overview",id:"overview",children:[],level:2},{value:"TSV File",id:"tsv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Pre-processing",id:"pre-processing",children:[{value:"Converting UCSC IDs",id:"converting-ucsc-ids",children:[],level:3},{value:"Running the Pre-Processor",id:"running-the-pre-processor",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],p={toc:c},d="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"Primate AI is a deep residual neural network for classifying the pathogenicity of missense mutations. The method is described in the publication:"),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Sundaram, L., Gao, H., Padigepati, S.R. et al. Predicting the clinical impact of human mutation with deep neural networks. ",(0,r.kt)("em",{parentName:"p"},"Nat Genet")," ",(0,r.kt)("strong",{parentName:"p"},"50"),", 1161\u20131170 (2018). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1038/s41588-018-0167-z"},"https://doi.org/10.1038/s41588-018-0167-z")))),(0,r.kt)("h2",{id:"tsv-file"},"TSV File"),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr pos ref alt refAA altAA strand_1pos_0neg trinucleotide_context UCSC_gene ExAC_coverage primateDL_score\nchr10 1046704 C T R C 1 CCG uc001ift.3 45.49 0.849114537239\nchr10 1046704 C G R G 1 CCG uc001ift.3 45.49 0.795686006546\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the TSV file, we're mainly interested in the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"chr")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"ref")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"alt")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"primateDL_score"))),(0,r.kt)("p",null,"We also use ",(0,r.kt)("inlineCode",{parentName:"p"},"UCSC_gene")," to filter out variants that don't have matching gene models in Nirvana."),(0,r.kt)("h2",{id:"pre-processing"},"Pre-processing"),(0,r.kt)("h3",{id:"converting-ucsc-ids"},"Converting UCSC IDs"),(0,r.kt)("p",null,"Primate AI only provides UCSC IDs. As an initial pre-processing step, we'll need to convert these to either Entrez or Ensembl Gene IDs."),(0,r.kt)("p",null,"The following queries are used to download the conversions from UCSC:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-bash"},'mysql -h genome-mysql.soe.ucsc.edu -u genome -A -P 3306 \\\n -e "select * FROM knownToLocusLink;" hg19 > ucsc_locuslink.tsv\n\nmysql -h genome-mysql.soe.ucsc.edu -u genome -A -P 3306 \\\n -e "select knownToEnsembl.name, knownToEnsembl.value, ensGene.name2 FROM knownToEnsembl, ensGene WHERE knownToEnsembl.value = ensGene.name;" \\\n hg19 > ucsc_ensembl.tsv\n')),(0,r.kt)("h3",{id:"running-the-pre-processor"},"Running the Pre-Processor"),(0,r.kt)("p",null,"The Primate AI pre-processor can be run as follows:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet PrimateAiPreProcessor.dll UGA_develop.tsv PrimateAI_scores_v0.2.tsv.gz \\\n ucsc_locuslink.tsv ucsc_ensembl.tsv PrimateAI_0.2_GRCh37.tsv.gz\n")),(0,r.kt)("p",null,"During conversion, 0.5% of the UCSC Ids cannot be converted to either Entrez or Ensembl gene IDs. Once the gene IDs have been acquired, we check to see which are available in Nirvana."),(0,r.kt)("p",null,"The following Entrez Gene IDs were not found:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"399753\n401980\n504189\n504191\n100293534\n")),(0,r.kt)("p",null,"Here is the output from the pre-processor:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"- loading UCSC to Entrez Gene ID dictionary... 73,432 genes loaded.\n- loading UCSC to Ensembl Gene ID dictionary... 76,178 genes loaded.\n- loading UGA gene ID to gene dictionary... 103,277 genes loaded.\n- parsing Primate AI variants... 70,121,953 variants parsed.\n \n# variants with unknown gene ID: 27,253 / 70,121,953\n# genes with unknown gene ID: 109 / 19,614\n \n# variants not in UGA: 2,036 / 70,121,953\n# genes not in UGA: 6 / 19,614\n")),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"The Primate AI data set provides raw scores, but the scores are biased according to gene context. I.e. a 0.4 means something different in ",(0,r.kt)("inlineCode",{parentName:"p"},"TP53")," than it does in ",(0,r.kt)("inlineCode",{parentName:"p"},"KRAS"),"."),(0,r.kt)("p",{parentName:"div"},"As a result, the Primate AI team provided guidance on aggregating these scores and presenting them as percentiles with respect to the associated gene. According to their research, the 25",(0,r.kt)("sup",null,"th")," percentile is a good proxy for benign variants and the 75",(0,r.kt)("sup",null,"th")," percentile is a good proxy for pathogenic variants."))),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://basespace.illumina.com/s/cPgCSmecvhb4"},"https://basespace.illumina.com/s/cPgCSmecvhb4")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(i.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/34e55124.95f30f74.js b/assets/js/34e55124.95f30f74.js new file mode 100644 index 00000000..e045a1f9 --- /dev/null +++ b/assets/js/34e55124.95f30f74.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7942],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>u});var r=n(7294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function i(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function o(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var c=r.createContext({}),p=function(t){var e=r.useContext(c),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},m=function(t){var e=p(t.components);return r.createElement(c.Provider,{value:e},t.children)},d="mdxType",s={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},f=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,i=t.originalType,c=t.parentName,m=l(t,["components","mdxType","originalType","parentName"]),d=p(n),f=a,u=d["".concat(c,".").concat(f)]||d[f]||s[f]||i;return n?r.createElement(u,o(o({ref:e},m),{},{components:n})):r.createElement(u,o({ref:e},m))}));function u(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var i=n.length,o=new Array(i);o[0]=f;var l={};for(var c in e)hasOwnProperty.call(e,c)&&(l[c]=e[c]);l.originalType=t,l[d]="string"==typeof t?t:a,o[1]=l;for(var p=2;p{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>l,toc:()=>c});var r=n(7462),a=(n(7294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/primate-ai-json",id:"data-sources/primate-ai-json",title:"primate-ai-json",description:"GRCh38",source:"@site/docs/data-sources/primate-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/primate-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/primate-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/primate-ai-json.md",tags:[],version:"current",frontMatter:{}},c=[{value:"GRCh38",id:"grch38",children:[],level:4},{value:"GRCh37",id:"grch37",children:[],level:4}],p={toc:c},m="wrapper";function d(t){let{components:e,...n}=t;return(0,a.kt)(m,(0,r.Z)({},p,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("h4",{id:"grch38"},"GRCh38"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI-3D": [\n {\n "aminoAcidPosition": 2,\n "refAminoAcid": "V",\n "altAminoAcid": "M",\n "score": 0.616944,\n "scorePercentile": 0.52,\n "ensemblTranscriptId": "ENST00000335137.4",\n "refSeqTranscriptId": "NM_001005484.1"\n }\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"aminoAcidPosition"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Amino Acid Position (1-based)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"refAminoAcid"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Reference Amino Acid")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"altAminoAcid"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Alternate Amino Acid")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"ensemblTranscriptId"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Transcript ID (Ensembl)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"refSeqTranscriptId"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Transcript ID (RefSeq)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"score"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))),(0,a.kt)("h4",{id:"grch37"},"GRCh37"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI": [\n {\n "hgnc":"TP53",\n "scorePercentile":0.3,\n }\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"HGNC Gene Symbol")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/34e55124.f49d17fb.js b/assets/js/34e55124.f49d17fb.js deleted file mode 100644 index fb3d4ae2..00000000 --- a/assets/js/34e55124.f49d17fb.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7942],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>u});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function i(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function o(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var c=r.createContext({}),p=function(t){var e=r.useContext(c),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},m=function(t){var e=p(t.components);return r.createElement(c.Provider,{value:e},t.children)},d="mdxType",s={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},f=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,i=t.originalType,c=t.parentName,m=l(t,["components","mdxType","originalType","parentName"]),d=p(n),f=a,u=d["".concat(c,".").concat(f)]||d[f]||s[f]||i;return n?r.createElement(u,o(o({ref:e},m),{},{components:n})):r.createElement(u,o({ref:e},m))}));function u(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var i=n.length,o=new Array(i);o[0]=f;var l={};for(var c in e)hasOwnProperty.call(e,c)&&(l[c]=e[c]);l.originalType=t,l[d]="string"==typeof t?t:a,o[1]=l;for(var p=2;p{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>l,toc:()=>c});var r=n(87462),a=(n(67294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/primate-ai-json",id:"data-sources/primate-ai-json",title:"primate-ai-json",description:"GRCh38",source:"@site/docs/data-sources/primate-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/primate-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/primate-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/primate-ai-json.md",tags:[],version:"current",frontMatter:{}},c=[{value:"GRCh38",id:"grch38",children:[],level:4},{value:"GRCh37",id:"grch37",children:[],level:4}],p={toc:c},m="wrapper";function d(t){let{components:e,...n}=t;return(0,a.kt)(m,(0,r.Z)({},p,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("h4",{id:"grch38"},"GRCh38"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI-3D": [\n {\n "aminoAcidPosition": 2,\n "refAminoAcid": "V",\n "altAminoAcid": "M",\n "score": 0.616944,\n "scorePercentile": 0.52,\n "ensemblTranscriptId": "ENST00000335137.4",\n "refSeqTranscriptId": "NM_001005484.1"\n }\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"aminoAcidPosition"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Amino Acid Position (1-based)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"refAminoAcid"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Reference Amino Acid")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"altAminoAcid"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Alternate Amino Acid")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"ensemblTranscriptId"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Transcript ID (Ensembl)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"refSeqTranscriptId"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Transcript ID (RefSeq)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"score"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))),(0,a.kt)("h4",{id:"grch37"},"GRCh37"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI": [\n {\n "hgnc":"TP53",\n "scorePercentile":0.3,\n }\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"HGNC Gene Symbol")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/3511c199.3afcdea8.js b/assets/js/3511c199.3afcdea8.js deleted file mode 100644 index c3913274..00000000 --- a/assets/js/3511c199.3afcdea8.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4934],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>m});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},p=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},d="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},h=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),d=c(n),h=r,m=d["".concat(s,".").concat(h)]||d[h]||u[h]||i;return n?a.createElement(m,o(o({ref:t},p),{},{components:n})):a.createElement(m,o({ref:t},p))}));function m(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=h;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[d]="string"==typeof e?e:r,o[1]=l;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const i={title:"Canonical Transcripts"},o=void 0,l={unversionedId:"core-functionality/canonical-transcripts",id:"version-3.18/core-functionality/canonical-transcripts",title:"Canonical Transcripts",description:"Overview",source:"@site/versioned_docs/version-3.18/core-functionality/canonical-transcripts.md",sourceDirName:"core-functionality",slug:"/core-functionality/canonical-transcripts",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/core-functionality/canonical-transcripts",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/core-functionality/canonical-transcripts.md",tags:[],version:"3.18",frontMatter:{title:"Canonical Transcripts"},sidebar:"docs",previous:{title:"Custom Annotations",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/file-formats/custom-annotations"},next:{title:"Gene Fusion Detection",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/core-functionality/gene-fusions"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Known Algorithms",id:"known-algorithms",children:[{value:"UCSC",id:"ucsc",children:[],level:3},{value:"Ensembl",id:"ensembl",children:[],level:3},{value:"ACMG",id:"acmg",children:[],level:3},{value:"ClinVar",id:"clinvar",children:[],level:3}],level:2},{value:"Unified Approach",id:"unified-approach",children:[],level:2}],c={toc:s},p="wrapper";function d(e){let{components:t,...i}=e;return(0,r.kt)(p,(0,a.Z)({},c,i,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"One of the more polarizing topics within annotation is the notion of canonical transcripts. Because of alternative splicing, we often have several transcripts for each gene. In the human genome, there are an average of 3.4 transcripts per gene (Tung, 2020). As scientists, we seem to have a need for identifying a representative example of a gene - even if there's no biological basis for the motivation."),(0,r.kt)("p",null,(0,r.kt)("img",{src:n(31099).Z})),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Golden Helix Blog")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"A few years ago, the guys over at Golden Helix wrote an excellent post about the pitfalls and issues surrounding the identification of canonical transcripts: ",(0,r.kt)("a",{parentName:"p",href:"https://blog.goldenhelix.com/whats-in-a-name-the-intricacies-of-identifying-variants/"},"What\u2019s in a Name: The Intricacies of Identifying Variants"),"."))),(0,r.kt)("p",null,"In Nirvana, we wanted to identify an algorithm for determining the canonical transcript and apply it consistently to all of our transcript data sources."),(0,r.kt)("h2",{id:"known-algorithms"},"Known Algorithms"),(0,r.kt)("h3",{id:"ucsc"},"UCSC"),(0,r.kt)("p",null,"UCSC publishes a list of canonical transcripts in its ",(0,r.kt)("inlineCode",{parentName:"p"},"knownCanonical")," table which is available via the ",(0,r.kt)("a",{parentName:"p",href:"https://genome.ucsc.edu/cgi-bin/hgTables"},"TableBrowser"),". Of the RefSeq data sources, it was the only one we could find that provided canonical transcripts:"),(0,r.kt)("blockquote",null,(0,r.kt)("p",{parentName:"blockquote"},"The canonical transcript is defined as either the longest CDS, if the gene has translated transcripts, or the longest cDNA.")),(0,r.kt)("p",null,"If you were to implement this and compare it with the knownCanonical table, you would see a lot of exceptions to the rule."),(0,r.kt)("h3",{id:"ensembl"},"Ensembl"),(0,r.kt)("p",null,"The ",(0,r.kt)("a",{parentName:"p",href:"http://uswest.ensembl.org/Help/Glossary"},"Ensembl glossary")," states:"),(0,r.kt)("blockquote",null,(0,r.kt)("p",{parentName:"blockquote"},"The canonical transcript is used in the gene tree analysis in Ensembl and does not necessarily reflect the most biologically relevant transcript of a gene. For human, the canonical transcript for a gene is set according to the following hierarchy:"),(0,r.kt)("ol",{parentName:"blockquote"},(0,r.kt)("li",{parentName:"ol"},"Longest CCDS translation with no stop codons."),(0,r.kt)("li",{parentName:"ol"},"If no (1), choose the longest Ensembl/Havana merged translation with no stop codons."),(0,r.kt)("li",{parentName:"ol"},"If no (2), choose the longest translation with no stop codons."),(0,r.kt)("li",{parentName:"ol"},"If no translation, choose the longest non-protein-coding transcript."))),(0,r.kt)("h3",{id:"acmg"},"ACMG"),(0,r.kt)("p",null,"From the ACMG Guidelines for the Interpretation of Sequence Variants:"),(0,r.kt)("blockquote",null,(0,r.kt)("p",{parentName:"blockquote"},"A reference transcript for each gene should be used and provided in the report when describing coding variants. The transcript should represent either the longest known transcript and/or the most clinically relevant transcript.")),(0,r.kt)("h3",{id:"clinvar"},"ClinVar"),(0,r.kt)("p",null,"From the ClinVar paper:"),(0,r.kt)("blockquote",null,(0,r.kt)("p",{parentName:"blockquote"},"When there are multiple transcripts for a gene, ClinVar selects one HGVS expression to construct a preferred name. By default, this selection is based on the first reference standard transcript identified by the RefSeqGene/LRG (Locus Reference Genomic) collaboration.")),(0,r.kt)("h2",{id:"unified-approach"},"Unified Approach"),(0,r.kt)("p",null,"Our approach is almost identical to the one Golden Helix discussed in their article:"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"If we're looking at RefSeq, only consider NM & NR transcripts as candidates for canonical transcripts."),(0,r.kt)("li",{parentName:"ol"},"Sort the transcripts in the following order:",(0,r.kt)("ol",{parentName:"li"},(0,r.kt)("li",{parentName:"ol"},(0,r.kt)("a",{parentName:"li",href:"https://www.lrg-sequence.org/"},"Locus Reference Genomic (LRG)")," entries occur before non-LRG entries"),(0,r.kt)("li",{parentName:"ol"},"Descending CDS length"),(0,r.kt)("li",{parentName:"ol"},"Descending transcript length"),(0,r.kt)("li",{parentName:"ol"},"Ascending accession number"))),(0,r.kt)("li",{parentName:"ol"},"Grab the first entry")))}d.isMDXComponent=!0},31099:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/hk1-transcripts-a5b85474d3b002553687715dbd004907.png"}}]); \ No newline at end of file diff --git a/assets/js/351f3248.a1275a8e.js b/assets/js/351f3248.a1275a8e.js deleted file mode 100644 index 389ce460..00000000 --- a/assets/js/351f3248.a1275a8e.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7138],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>f});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var s=a.createContext({}),p=function(t){var e=a.useContext(s),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},m=function(t){var e=p(t.components);return a.createElement(s.Provider,{value:e},t.children)},c="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},u=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,s=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),c=p(n),u=r,f=c["".concat(s,".").concat(u)]||c[u]||d[u]||l;return n?a.createElement(f,o(o({ref:e},m),{},{components:n})):a.createElement(f,o({ref:e},m))}));function f(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=u;var i={};for(var s in e)hasOwnProperty.call(e,s)&&(i[s]=e[s]);i.originalType=t,i[c]="string"==typeof t?t:r,o[1]=i;for(var p=2;p{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>c,frontMatter:()=>l,metadata:()=>i,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/mitomap-small-variants-json",id:"version-3.16/data-sources/mitomap-small-variants-json",title:"mitomap-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/mitomap-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/mitomap-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/mitomap-small-variants-json.md",tags:[],version:"3.16",frontMatter:{}},s=[],p={toc:s},m="wrapper";function c(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},p,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "refAllele":"G",\n "altAllele":"A",\n "diseases":[ \n "Bipolar disorder",\n "Melanoma"\n ],\n "hasHomoplasmy":false,\n "hasHeteroplasmy":true,\n "status":"Reported",\n "clinicalSignificance":"confirmed pathogenic",\n "scorePercentile":83.30,\n "numGenBankFullLengthSeqs":2,\n "pubMedIds":["2316527","6299878","6301949"],\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"diseases"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"associated diseases")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHomoplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHeteroplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"status"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"record status")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"clinicalSignificance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"predicted pathogenicity")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MitoTIP score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numGenBankFullLengthSeqs"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"# of GenBank full-length sequences")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the MITOMAP alternate allele")))))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/3632da51.bc204372.js b/assets/js/3632da51.bc204372.js deleted file mode 100644 index 742946f4..00000000 --- a/assets/js/3632da51.bc204372.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8985,5862,9482],{3905:(e,t,n)=>{n.d(t,{Zo:()=>u,kt:()=>N});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var p=a.createContext({}),s=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},u=function(e){var t=s(e.components);return a.createElement(p.Provider,{value:t},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},c=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,l=e.originalType,p=e.parentName,u=i(e,["components","mdxType","originalType","parentName"]),m=s(n),c=r,N=m["".concat(p,".").concat(c)]||m[c]||d[c]||l;return n?a.createElement(N,o(o({ref:t},u),{},{components:n})):a.createElement(N,o({ref:t},u))}));function N(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=n.length,o=new Array(l);o[0]=c;var i={};for(var p in t)hasOwnProperty.call(t,p)&&(i[p]=t[p]);i.originalType=e,i[m]="string"==typeof e?e:r,o[1]=i;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>m,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/1000Genomes-snv-json",id:"version-3.21/data-sources/1000Genomes-snv-json",title:"1000Genomes-snv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/1000Genomes-snv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-snv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/1000Genomes-snv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/1000Genomes-snv-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],s={toc:p},u="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(u,(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":{\n "allAf":0.200879,\n "afrAf":0.210287,\n "amrAf":0.139769,\n "easAf":0.275794,\n "eurAf":0.181909,\n "sasAf":0.173824,\n "allAn":5008,\n "afrAn":1322,\n "amrAn":694,\n "easAn":1008,\n "eurAn":1006,\n "sasAn":978,\n "allAc":1006,\n "afrAc":278,\n "amrAc":97,\n "easAc":278,\n "eurAc":183,\n "sasAc":170\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the African super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the African super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the Ad Mixed American super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the Ad Mixed American super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the East Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the East Asian super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the European super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the European super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the South Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the South Asian super population. Non-zero integer.")))))}m.isMDXComponent=!0},12146:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>m,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/1000Genomes-sv-json",id:"version-3.21/data-sources/1000Genomes-sv-json",title:"1000Genomes-sv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/1000Genomes-sv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-sv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/1000Genomes-sv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/1000Genomes-sv-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],s={toc:p},u="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(u,(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":[\n {\n "chromosome":"1",\n "begin":1595369,\n "end":1612441,\n "variantType": "copy_number_variation",\n "id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",\n "allAn": 5008,\n "allAc": 2702,\n "allAf": 0.539537,\n "afrAf": 0.6052,\n "amrAf": 0.3675,\n "eurAf": 0.5357,\n "easAf": 0.5368,\n "sasAf": 0.5797,\n "reciprocalOverlap": 0.07555\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"range: 0 - 1.")))))}m.isMDXComponent=!0},26740:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>p,default:()=>c,frontMatter:()=>i,metadata:()=>s,toc:()=>u});var a=n(87462),r=(n(67294),n(3905)),l=n(3952),o=n(12146);const i={title:"1000 Genomes"},p=void 0,s={unversionedId:"data-sources/1000Genomes",id:"version-3.21/data-sources/1000Genomes",title:"1000 Genomes",description:"Overview",source:"@site/versioned_docs/version-3.21/data-sources/1000Genomes.mdx",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/1000Genomes",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/1000Genomes.mdx",tags:[],version:"3.21",frontMatter:{title:"1000 Genomes"},sidebar:"docs",previous:{title:"Annotating COVID-19",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/introduction/covid19"},next:{title:"Amino Acid Conservation",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/amino-acid-conservation"}},u=[{value:"Overview",id:"overview",children:[],level:2},{value:"Populations",id:"populations",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF File Parsing",id:"vcf-file-parsing",children:[{value:"Conflict Resolution",id:"conflict-resolution",children:[],level:4}],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2},{value:"Structural Variants",id:"structural-variants",children:[{value:"VCF File Parsing",id:"vcf-file-parsing-1",children:[],level:3},{value:"Converting VCF svTypes to SO sequence alterations",id:"converting-vcf-svtypes-to-so-sequence-alterations",children:[{value:"Exceptions",id:"exceptions",children:[],level:4}],level:3}],level:2},{value:"JSON Output",id:"json-output-1",children:[],level:2}],m={toc:u},d="wrapper";function c(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"The goal of the 1000 Genomes Project was to find most genetic variants with frequencies of at least 1% in the populations studied. It was the first project to sequence the genomes of a large number of people, to provide a comprehensive resource on human genetic variation. Data from the 1000 Genomes Project was quickly made available to the worldwide scientific community through freely accessible public databases."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Sudmant, P., Rausch, T., Gardner, E. et al. An integrated map of structural variation in 2,504 human genomes. ",(0,r.kt)("em",{parentName:"p"},"Nature 526"),", 75\u201381 (2015). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1038/nature15394"},"https://doi.org/10.1038/nature15394")))),(0,r.kt)("h2",{id:"populations"},"Populations"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"The super population membership can be found here: (",(0,r.kt)("a",{parentName:"li",href:"http://www.1000genomes.org/category/population/"},"http://www.1000genomes.org/category/population/"),")"),(0,r.kt)("li",{parentName:"ul"},"We want to capture the allele frequencies for all 26 populations as well as the 5 super populations and the total population.")),(0,r.kt)("h2",{id:"small-variants"},"Small Variants"),(0,r.kt)("h3",{id:"vcf-file-parsing"},"VCF File Parsing"),(0,r.kt)("p",null,"The original VCF files come with allele frequency fields (e.g. ALL_AF, AMR_AF) but we recompute them using allele counts and allele numbers in order to get 6 digit precision. The allele counts and allele numbers (e.g. AMR_AC, AMR_AN) are not expressed in the INFO field. Instead the genotypes need to be parsed to compute that information. Our team converted the original data to VCF entries with allele counts and allele numbers like the following."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 15274 rs62636497 A G,T 100 PASS AC=1739,3210;AF=0.347244,0.640974;AN=5008;NS=2504;DP=23255;EAS_AF=0.4812,0.5188;AMR_AF=0.2752,0.7205;AFR_AF=0.323,0.6369;EUR_AF=0.2922,0.7078;SAS_AF=0.3497,0.6472;AA=g|||;VT=SNP;MULTI_ALLELIC;EAS_AN=1008;EAS_AC=485,523;EUR_AN=1006;EUR_AC=294,712;AFR_AN=1322;AFR_AC=427,842;AMR_AN=694;AMR_AC=191,500;SAS_AN=978;SAS_AC=342,633\n")),(0,r.kt)("p",null,"The ancestral allele, if it exists, is the first value in the pipe separated AA fields (the Indel specific REF, ALT, IndelType fields are ignored)."),(0,r.kt)("p",null,"We parse the VCF file and extract the following fields from INFO:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"AA"),(0,r.kt)("li",{parentName:"ul"},"AC"),(0,r.kt)("li",{parentName:"ul"},"AN"),(0,r.kt)("li",{parentName:"ul"},"EAS_AN"),(0,r.kt)("li",{parentName:"ul"},"AMR_AN"),(0,r.kt)("li",{parentName:"ul"},"AFR_AN"),(0,r.kt)("li",{parentName:"ul"},"EUR_AN"),(0,r.kt)("li",{parentName:"ul"},"SAS_AN"),(0,r.kt)("li",{parentName:"ul"},"EAS_AC"),(0,r.kt)("li",{parentName:"ul"},"AMR_AC"),(0,r.kt)("li",{parentName:"ul"},"AFR_AC"),(0,r.kt)("li",{parentName:"ul"},"EUR_AC"),(0,r.kt)("li",{parentName:"ul"},"SAS_AC")),(0,r.kt)("h4",{id:"conflict-resolution"},"Conflict Resolution"),(0,r.kt)("p",null,"We have observed conflicting allele frequency information in the source. Take the following example:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 20505705 rs35377696 C CTCTG,CTG,CTGTG 100 PASS AC=46,1513,152;AF=0.0091853,0.302117,0.0303514;\n1 20505705 rs35377696 C CTG 100 PASS AC=4;AF=0.000798722;\n")),(0,r.kt)("p",null,"That is, the variant 1-20505705-C-CTG has conflicting entries. To get an idea of how frequently we observe this, here is a table summarizing ChrX and all chromosomes. Note that almost all such entries are found in ChrX."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"center"},"Chromosome"),(0,r.kt)("th",{parentName:"tr",align:"left"},"#"," of alleles"),(0,r.kt)("th",{parentName:"tr",align:"center"},"#"," of conflicting alleles"),(0,r.kt)("th",{parentName:"tr",align:"left"},"percentage"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"chrX"),(0,r.kt)("td",{parentName:"tr",align:"left"},"834800"),(0,r.kt)("td",{parentName:"tr",align:"center"},"2733"),(0,r.kt)("td",{parentName:"tr",align:"left"},"0.33%")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"Total"),(0,r.kt)("td",{parentName:"tr",align:"left"},"21413098"),(0,r.kt)("td",{parentName:"tr",align:"center"},"2743"),(0,r.kt)("td",{parentName:"tr",align:"left"},"0.013%")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Currently"),", we removed the allele frequency of the conflicting allele (i.e., insertion TG in the example) but keep allele frequencies of all other alleles in the VCF line."),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Potential Alternate Solutions")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"Remove all alleles that are contained in the vcf lines which have conflicting allele. (Recommended by 1000 genome group Holly Zheng-Bradley, 7/29/2015)"),(0,r.kt)("li",{parentName:"ul"},"Recalculate the allele frequency for the conflicting allele."),(0,r.kt)("li",{parentName:"ul"},"Pick the allele frequency that has the highest data support.")),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/"},"GRCh37"),"\n",(0,r.kt)("a",{parentName:"p",href:"http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/"},"GRCh38")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(l.default,{mdxType:"JSONSNV"}),(0,r.kt)("h2",{id:"structural-variants"},"Structural Variants"),(0,r.kt)("h3",{id:"vcf-file-parsing-1"},"VCF File Parsing"),(0,r.kt)("p",null,"The VCF files contain entries like the following:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103\n22 16050654 esv3647175;esv3647176;esv3647177;esv3647178 A ,,, 100 PASS AC=9,87,599,20;AF=0.00179712,0.0173722,0.119609,0.00399361;AN=5008;CS=DUP_gs;END=16063474;NS=2504;SVTYPE=CNV;DP=22545;EAS_AF=0.001,0.0169,0.2361,0.0099;AMR_AF=0,0.0101,0.219,0.0072;AFR_AF=0.0061,0.0363,0.0053,0;EUR_AF=0,0.007,0.0944,0.003;SAS_AF=0,0.0082,0.1094,0.002;VT=SV GT 3|0 0|0 0|0 0|0 0|0 0|0 0|4\n")),(0,r.kt)("p",null,"Please note that, CNVs are allele-specific. For example, HG00096 is effectively copy number 4, which would be a net gain on chr22."),(0,r.kt)("p",null,"1000 Genomes contains 5 types of structural variants:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"CNV"),(0,r.kt)("li",{parentName:"ul"},"DEL"),(0,r.kt)("li",{parentName:"ul"},"DUP"),(0,r.kt)("li",{parentName:"ul"},"INS"),(0,r.kt)("li",{parentName:"ul"},"INV")),(0,r.kt)("p",null,"Since data of 1000 genomes is provided in VCF format, we assume that the coordinates follow the vcf format, i.e., there is a padding base for symbolic alleles. So all the interval can be interpreted as ","[BEGIN+1, END]",".\nSimilarly, for all other variant types except insertion, END is far larger than BEGIN. The distribution of BEGIN and END for insertions is summarized below."),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Insertion issues")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"END = BEGIN for 6/165"),(0,r.kt)("li",{parentName:"ul"},"END = BEGIN+2 for 93/165"),(0,r.kt)("li",{parentName:"ul"},"END = BEGIN+3 for 11/165"),(0,r.kt)("li",{parentName:"ul"},"END = BEGIN+4 for 11/165"),(0,r.kt)("li",{parentName:"ul"},"END \u2013 BEGIN range from 5 to 1156 for others.")),(0,r.kt)("h3",{id:"converting-vcf-svtypes-to-so-sequence-alterations"},"Converting VCF svTypes to SO sequence alterations"),(0,r.kt)("p",null,"The svType will be captured in our JSON file under the ",(0,r.kt)("a",{parentName:"p",href:"http://www.sequenceontology.org/browser/current_svn/term/SO:0001059"},"sequenceAlteration")," key. Here's the translation we'll use according to svType in 1000 Genomes."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"svType"),(0,r.kt)("th",{parentName:"tr",align:null},"Alternative Alleles contain "),(0,r.kt)("th",{parentName:"tr",align:null},"sequenceAlteration"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"ALU"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"DUP"),(0,r.kt)("td",{parentName:"tr",align:null},"TRUE"),(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_gain")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"CNV"),(0,r.kt)("td",{parentName:"tr",align:null},"TRUE"),(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_gain (observed_gains >0 and observed_losses =0) ",(0,r.kt)("br",null),"copy_number_loss\xa0(observed_gains = 0 and observed_losses > 0) ",(0,r.kt)("br",null),"copy_number_variation (otherwise)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"DEL"),(0,r.kt)("td",{parentName:"tr",align:null},"TRUE"),(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_loss")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"LINE1"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"SVA"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"INV"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"inversion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"INS"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"insertion")))),(0,r.kt)("h4",{id:"exceptions"},"Exceptions"),(0,r.kt)("p",null,(0,r.kt)("em",{parentName:"p"},"We discard structural variants without END")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103\n21 9495848 esv3646347 A 100 PASS AC=1543;AF=0.308107;AN=5008;CS=L1_umary;MEINFO=LINE1,5669,6005,+;NS=2504;SVLEN=336;SVTYPE=LINE1;TSD=null;DP=20015;EAS_AF=0.3125;AMR_AF=0.2911;AFR_AF=0.3026;EUR_AF=0.2922;SAS_AF=0.3395;VT=SV GT 0|0 1|1 1|0 0|1 1|0 1|0 0|0\n")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"CNVs in chrY")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"No other types of structural variants exist in chrY"),(0,r.kt)("li",{parentName:"ul"},'Since copy number is provided in genotype field, we directly parse the copy number from "CN" field.'),(0,r.kt)("li",{parentName:"ul"},"For most CNVs in chrY, the reference copy number is 1, but the refence number for CNVs in segmental duplication sites is 2 ("," in the 2nd example). All segmental duplication calls have identifiers starting with GS_SD_M2.")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00101 HG00103 HG00105 HG00107 HG00108\nY 2888555 CNV_Y_2888555_3014661 T 100 PASS AC=1;AF=0.000817661;AN=1223;END=3014661;NS=1233;SVTYPE=CNV;AMR_AF=0.0000;AFR_AF=0.0000;EUR_AF=0.0000;SAS_AF=0.0019;EAS_AF=0.0000;VT=SV GT:CN:CNL:CNP:CNQ:GP:GQ:PL 0:1:-1000,0,-58.45:-1000,0,-61.55:99:0,-61.55:99:0,585 0:1:-296.36,0,-16.6:-300.46,0,-19.7:99:0,-19.7:99:0,166 0:1:-1000,0,-39.44:-1000,0,-42.54:99:0,-42.54:99:0,394\nY 6128381 GS_SD_M2_Y_6128381_6230094_Y_9650284_9752225 C , 100 PASS AC=4,2;AF=0.00327065,0.00163532;AN=1223;END=6230094;NS=1233;SVTYPE=CNV;AMR_AF=0.0029,0.0029;AFR_AF=0.0016,0.0016;EUR_AF=0.0000,0.0000;SAS_AF=0.0038,0.0000;EAS_AF=0.0000,0.0000;VT=SV;EX_TARGET GT:CN:CNL:CNP:CNQ:GP:GQ 0:2:-1000,-138.78,0,-38.53:-1000,-141.27,0,-41.33:99:0,-141.27,-41.33:99 0:2:-1000,-53.32,0,-17.85:-1000,-55.81,0,-20.64:99:0,-55.81,-20.64:99 0:2:-1000,-71.83,0,-32.5:-1000,-74.32,0,-35.29:99:0,-74.32,-35.29:99 0:2:-1000,-60.96,0,-20.29:-1000,-63.45,0,-23.08:99:0,-63.45,-23.08:99 0:2:-1000,-77.6,0,-31.45:-1000,-80.09,0,-34.24:99:0,-80.09,-34.24:99\n")),(0,r.kt)("h2",{id:"json-output-1"},"JSON Output"),(0,r.kt)(o.default,{mdxType:"JSONSV"}))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/37ff1bb7.14294456.js b/assets/js/37ff1bb7.14294456.js deleted file mode 100644 index 6af18d6e..00000000 --- a/assets/js/37ff1bb7.14294456.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7454],{3905:(t,e,n)=>{n.d(e,{Zo:()=>c,kt:()=>g});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var p=a.createContext({}),u=function(t){var e=a.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},c=function(t){var e=u(t.components);return a.createElement(p.Provider,{value:e},t.children)},s="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},m=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,c=i(t,["components","mdxType","originalType","parentName"]),s=u(n),m=r,g=s["".concat(p,".").concat(m)]||s[m]||d[m]||l;return n?a.createElement(g,o(o({ref:e},c),{},{components:n})):a.createElement(g,o({ref:e},c))}));function g(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=m;var i={};for(var p in e)hasOwnProperty.call(e,p)&&(i[p]=e[p]);i.originalType=t,i[s]="string"==typeof t?t:r,o[1]=i;for(var u=2;u{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>s,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/clingen-json",id:"version-3.18/data-sources/clingen-json",title:"clingen-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/clingen-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/clingen-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/clingen-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],u={toc:p},c="wrapper";function s(t){let{components:e,...n}=t;return(0,r.kt)(c,(0,a.Z)({},u,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingen":[\n {\n "chromosome":"17",\n "begin":525,\n "end":14667519,\n "variantType":"copy_number_gain",\n "id":"nsv996083",\n "clinicalInterpretation":"pathogenic",\n "observedGains":1,\n "validated":true,\n "phenotypes":[\n "Intrauterine growth retardation"\n ],\n "phenotypeIds":[\n "HP:0001511",\n "MedGen:C1853481"\n ],\n "reciprocalOverlap":0.00131\n },\n {\n "chromosome":"17",\n "begin":45835,\n "end":7600330,\n "variantType":"copy_number_loss",\n "id":"nsv869419",\n "clinicalInterpretation":"pathogenic",\n "observedLosses":1,\n "validated":true,\n "phenotypes":[\n "Developmental delay AND/OR other significant developmental or morphological phenotypes"\n ],\n "reciprocalOverlap":0.00254\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingen"),(0,r.kt)("td",{parentName:"tr",align:null},"object array"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Any of the\xa0sequence alterations defined here.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Identifier from the data source. Alternatively a VID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clinicalInterpretation"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"observedGains"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,r.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"observedLosses"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,r.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"validated"),(0,r.kt)("td",{parentName:"tr",align:null},"boolean"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:null},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"Description of the phenotype.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"phenotypeIds"),(0,r.kt)("td",{parentName:"tr",align:null},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"Description of the phenotype IDs.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"clinicalInterpretation")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"curated benign"),(0,r.kt)("li",{parentName:"ul"},"curated pathogenic"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"path gain"),(0,r.kt)("li",{parentName:"ul"},"path loss"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"uncertain")))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/389cb7c6.82e0aea0.js b/assets/js/389cb7c6.82e0aea0.js deleted file mode 100644 index 9fd2f158..00000000 --- a/assets/js/389cb7c6.82e0aea0.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[357],{3905:(e,t,n)=>{n.d(t,{Zo:()=>s,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function i(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var l=r.createContext({}),p=function(e){var t=r.useContext(l),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},s=function(e){var t=p(e.components);return r.createElement(l.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,l=e.parentName,s=c(e,["components","mdxType","originalType","parentName"]),u=p(n),d=a,f=u["".concat(l,".").concat(d)]||u[d]||m[d]||o;return n?r.createElement(f,i(i({ref:t},s),{},{components:n})):r.createElement(f,i({ref:t},s))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,i=new Array(o);i[0]=d;var c={};for(var l in t)hasOwnProperty.call(t,l)&&(c[l]=t[l]);c.originalType=e,c[u]="string"==typeof e?e:a,i[1]=c;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>c,toc:()=>l});var r=n(87462),a=(n(67294),n(3905));const o={},i=void 0,c={unversionedId:"data-sources/primate-ai-json",id:"version-3.14/data-sources/primate-ai-json",title:"primate-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/primate-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/primate-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/primate-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/primate-ai-json.md",tags:[],version:"3.14",frontMatter:{}},l=[],p={toc:l},s="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(s,(0,r.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI":[\n {\n "hgnc":"TP53",\n "scorePercentile":0.3,\n }\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/38fc4dd2.9cd589e0.js b/assets/js/38fc4dd2.9cd589e0.js deleted file mode 100644 index 399eb057..00000000 --- a/assets/js/38fc4dd2.9cd589e0.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8345,3514,7859,5938,6766,8244,8841,4592,2074,5919,3966,4291,4829,3476,3460,2218,5146,8823],{3905:(t,e,a)=>{a.d(e,{Zo:()=>s,kt:()=>N});var n=a(67294);function r(t,e,a){return e in t?Object.defineProperty(t,e,{value:a,enumerable:!0,configurable:!0,writable:!0}):t[e]=a,t}function l(t,e){var a=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),a.push.apply(a,n)}return a}function i(t){for(var e=1;e=0||(r[a]=t[a]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,a)&&(r[a]=t[a])}return r}var p=n.createContext({}),d=function(t){var e=n.useContext(p),a=e;return t&&(a="function"==typeof t?t(e):i(i({},e),t)),a},s=function(t){var e=d(t.components);return n.createElement(p.Provider,{value:e},t.children)},m="mdxType",c={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},g=n.forwardRef((function(t,e){var a=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,s=o(t,["components","mdxType","originalType","parentName"]),m=d(a),g=r,N=m["".concat(p,".").concat(g)]||m[g]||c[g]||l;return a?n.createElement(N,i(i({ref:e},s),{},{components:a})):n.createElement(N,i({ref:e},s))}));function N(t,e){var a=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=a.length,i=new Array(l);i[0]=g;var o={};for(var p in e)hasOwnProperty.call(e,p)&&(o[p]=e[p]);o.originalType=t,o[m]="string"==typeof t?t:r,i[1]=o;for(var d=2;d{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/1000Genomes-snv-json",id:"version-3.17/data-sources/1000Genomes-snv-json",title:"1000Genomes-snv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/1000Genomes-snv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-snv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/1000Genomes-snv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/1000Genomes-snv-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":{\n "allAf":0.200879,\n "afrAf":0.210287,\n "amrAf":0.139769,\n "easAf":0.275794,\n "eurAf":0.181909,\n "sasAf":0.173824,\n "allAn":5008,\n "afrAn":1322,\n "amrAn":694,\n "easAn":1008,\n "eurAn":1006,\n "sasAn":978,\n "allAc":1006,\n "afrAc":278,\n "amrAc":97,\n "easAc":278,\n "eurAc":183,\n "sasAc":170\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the African super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the African super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the Ad Mixed American super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the Ad Mixed American super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the East Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the East Asian super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the European super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the European super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the South Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the South Asian super population. Non-zero integer.")))))}m.isMDXComponent=!0},96351:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/1000Genomes-sv-json",id:"version-3.17/data-sources/1000Genomes-sv-json",title:"1000Genomes-sv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/1000Genomes-sv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-sv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/1000Genomes-sv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/1000Genomes-sv-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":[\n {\n "chromosome":"1",\n "begin":1595369,\n "end":1612441,\n "variantType": "copy_number_variation",\n "id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",\n "allAn": 5008,\n "allAc": 2702,\n "allAf": 0.539537,\n "afrAf": 0.6052,\n "amrAf": 0.3675,\n "eurAf": 0.5357,\n "easAf": 0.5368,\n "sasAf": 0.5797,\n "reciprocalOverlap": 0.07555\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"range: 0 - 1.")))))}m.isMDXComponent=!0},22027:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/amino-acid-conservation-json",id:"version-3.17/data-sources/amino-acid-conservation-json",title:"amino-acid-conservation-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/amino-acid-conservation-json.md",sourceDirName:"data-sources",slug:"/data-sources/amino-acid-conservation-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/amino-acid-conservation-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/amino-acid-conservation-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"aminoAcidConservation": {\n "scores": [0.34]\n} \n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"aminoAcidConservation"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scores"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array of doubles"),(0,r.kt)("td",{parentName:"tr",align:"left"},"percent conserved with respect to human amino acid residue. Range: 0.01 - 1.00")))))}m.isMDXComponent=!0},81474:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clingen-dosage-json",id:"version-3.17/data-sources/clingen-dosage-json",title:"clingen-dosage-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/clingen-dosage-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-dosage-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/clingen-dosage-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/clingen-dosage-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingenDosageSensitivityMap": [{\n "chromosome": "15",\n "begin": 30900686,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 0.33994\n},\n{\n "chromosome": "15",\n "begin": 31727418,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "dosage sensitivity unlikely",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 1\n}]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingenDosageSensitivityMap"),(0,r.kt)("td",{parentName:"tr",align:null},"object array"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"haploinsufficiency"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"triplosensitivity"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"(same as haploinsufficiency)\xa0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"haploinsufficiency and triplosensitivity")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no evidence to suggest that dosage sensitivity is associated with clinical phenotype"),(0,r.kt)("li",{parentName:"ul"},"little evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,r.kt)("li",{parentName:"ul"},"emerging evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,r.kt)("li",{parentName:"ul"},"sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,r.kt)("li",{parentName:"ul"},"gene associated with autosomal recessive phenotype"),(0,r.kt)("li",{parentName:"ul"},"dosage sensitivity unlikely")))}m.isMDXComponent=!0},1890:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clingen-gene-validity-json",id:"version-3.17/data-sources/clingen-gene-validity-json",title:"clingen-gene-validity-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/clingen-gene-validity-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-gene-validity-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/clingen-gene-validity-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/clingen-gene-validity-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingenGeneValidity":[\n {\n "diseaseId":"MONDO_0007893",\n "disease":"Noonan syndrome with multiple lentigines",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n },\n {\n "diseaseId":"MONDO_0015280",\n "disease":"cardiofaciocutaneous syndrome",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingenGeneValidity"),(0,r.kt)("td",{parentName:"tr",align:null},"object"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"diseaseId"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Monarch Disease Ontology ID (MONDO)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"disease"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"disease label")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"classification"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see below for possible values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"classificationDate"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"yyyy-MM-dd")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"classification")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no reported evidence"),(0,r.kt)("li",{parentName:"ul"},"disputed"),(0,r.kt)("li",{parentName:"ul"},"limited"),(0,r.kt)("li",{parentName:"ul"},"moderate"),(0,r.kt)("li",{parentName:"ul"},"definitive"),(0,r.kt)("li",{parentName:"ul"},"strong"),(0,r.kt)("li",{parentName:"ul"},"refuted")))}m.isMDXComponent=!0},35295:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clingen-json",id:"version-3.17/data-sources/clingen-json",title:"clingen-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/clingen-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/clingen-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/clingen-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingen":[\n {\n "chromosome":"17",\n "begin":525,\n "end":14667519,\n "variantType":"copy_number_gain",\n "id":"nsv996083",\n "clinicalInterpretation":"pathogenic",\n "observedGains":1,\n "validated":true,\n "phenotypes":[\n "Intrauterine growth retardation"\n ],\n "phenotypeIds":[\n "HP:0001511",\n "MedGen:C1853481"\n ],\n "reciprocalOverlap":0.00131\n },\n {\n "chromosome":"17",\n "begin":45835,\n "end":7600330,\n "variantType":"copy_number_loss",\n "id":"nsv869419",\n "clinicalInterpretation":"pathogenic",\n "observedLosses":1,\n "validated":true,\n "phenotypes":[\n "Developmental delay AND/OR other significant developmental or morphological phenotypes"\n ],\n "reciprocalOverlap":0.00254\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingen"),(0,r.kt)("td",{parentName:"tr",align:null},"object array"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Any of the\xa0sequence alterations defined here.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Identifier from the data source. Alternatively a VID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clinicalInterpretation"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"observedGains"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,r.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"observedLosses"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,r.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"validated"),(0,r.kt)("td",{parentName:"tr",align:null},"boolean"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:null},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"Description of the phenotype.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"phenotypeIds"),(0,r.kt)("td",{parentName:"tr",align:null},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"Description of the phenotype IDs.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"clinicalInterpretation")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"curated benign"),(0,r.kt)("li",{parentName:"ul"},"curated pathogenic"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"path gain"),(0,r.kt)("li",{parentName:"ul"},"path loss"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"uncertain")))}m.isMDXComponent=!0},69487:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clinvar-json",id:"version-3.17/data-sources/clinvar-json",title:"clinvar-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/clinvar-json.md",sourceDirName:"data-sources",slug:"/data-sources/clinvar-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/clinvar-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/clinvar-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "id":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "significance":[\n "benign"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "lastUpdatedDate":"2020-03-01",\n "isAlleleSpecific":true\n },\n {\n "id":"RCV000030258.4",\n "variationId":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "alleleOrigins":[\n "germline"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "phenotypes":[\n "Lynch syndrome"\n ],\n "medGenIds":[\n "C1333990"\n ],\n "omimIds":[\n "120435"\n ],\n "significance":[\n "benign"\n ],\n "lastUpdatedDate":"2017-05-01",\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variationId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar VCV ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"reviewStatus"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"alleleOrigins"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"medGenIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MedGen IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"omimIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"orphanetIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Orphanet IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"significance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"lastUpdatedDate"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the ClinVar alternate allele")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"reviewStatus:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no assertion provided"),(0,r.kt)("li",{parentName:"ul"},"no assertion criteria provided"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, single submitter"),(0,r.kt)("li",{parentName:"ul"},"practice guideline"),(0,r.kt)("li",{parentName:"ul"},"classified by multiple submitters"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, conflicting interpretations"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, multiple submitters, no conflicts"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"alleleOrigins:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"unknown"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"germline"),(0,r.kt)("li",{parentName:"ul"},"somatic"),(0,r.kt)("li",{parentName:"ul"},"inherited"),(0,r.kt)("li",{parentName:"ul"},"paternal"),(0,r.kt)("li",{parentName:"ul"},"maternal"),(0,r.kt)("li",{parentName:"ul"},"de-novo"),(0,r.kt)("li",{parentName:"ul"},"biparental"),(0,r.kt)("li",{parentName:"ul"},"uniparental"),(0,r.kt)("li",{parentName:"ul"},"not-tested"),(0,r.kt)("li",{parentName:"ul"},"tested-inconclusive")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"significance:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"uncertain significance"),(0,r.kt)("li",{parentName:"ul"},"not provided"),(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"drug response"),(0,r.kt)("li",{parentName:"ul"},"histocompatibility"),(0,r.kt)("li",{parentName:"ul"},"association"),(0,r.kt)("li",{parentName:"ul"},"risk factor"),(0,r.kt)("li",{parentName:"ul"},"protective"),(0,r.kt)("li",{parentName:"ul"},"affects"),(0,r.kt)("li",{parentName:"ul"},"conflicting data from submitters"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant"),(0,r.kt)("li",{parentName:"ul"},"conflicting interpretations of pathogenicity")))}m.isMDXComponent=!0},22384:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/dbsnp-json",id:"version-3.17/data-sources/dbsnp-json",title:"dbsnp-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/dbsnp-json.md",sourceDirName:"data-sources",slug:"/data-sources/dbsnp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/dbsnp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/dbsnp-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"dbsnp":[\n "rs1042821"\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"dbsnp"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"dbSNP rsIDs")))))}m.isMDXComponent=!0},84094:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gnomad-lof-json",id:"version-3.17/data-sources/gnomad-lof-json",title:"gnomad-lof-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/gnomad-lof-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-lof-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/gnomad-lof-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/gnomad-lof-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD":{ \n "pLi":1.00e0,\n "pNull":8.94e-40,\n "pRec":1.84e-16,\n "synZ":-8.44e-2,\n "misZ":5.96e-1,\n "loeuf":1.13e0\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"pLi"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"pNull"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"pRec"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"synZ"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"misZ"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))))}m.isMDXComponent=!0},56249:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gnomad-small-variants-json",id:"version-3.17/data-sources/gnomad-small-variants-json",title:"gnomad-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/gnomad-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/gnomad-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/gnomad-small-variants-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomad":{ \n "coverage":20,\n "allAf":0.190317,\n "maleAf":0.193,\n "femaleAf": 0.1935,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "maleAn":15096,\n "femaleAn":15700\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "maleAc":2930,\n "femaleAc": 2931,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "maleHc":280,\n "femaleHc":281,\n "controlsAllAf":0.190317,\n "controlsAllAn":30796,\n "controlsAllAc":5861,\n "lowComplexityRegion":true,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"coverage"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"controlsAllAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the controls subset. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for male population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for female population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"controlsAllAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the controls subset. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for male population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for female population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"controlsAllAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the controls subset. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the South Asian population Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the South Asian population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the South Asian population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"lowComplexityRegion"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant is located in a low complexity region.")))))}m.isMDXComponent=!0},95584:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/mitomap-small-variants-json",id:"version-3.17/data-sources/mitomap-small-variants-json",title:"mitomap-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/mitomap-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/mitomap-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/mitomap-small-variants-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "refAllele":"G",\n "altAllele":"A",\n "diseases":[ \n "Bipolar disorder",\n "Melanoma"\n ],\n "hasHomoplasmy":false,\n "hasHeteroplasmy":true,\n "status":"Reported",\n "clinicalSignificance":"confirmed pathogenic",\n "scorePercentile":83.30,\n "numGenBankFullLengthSeqs":2,\n "pubMedIds":["2316527","6299878","6301949"],\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"diseases"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"associated diseases")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHomoplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHeteroplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"status"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"record status")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"clinicalSignificance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"predicted pathogenicity")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MitoTIP score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numGenBankFullLengthSeqs"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"# of GenBank full-length sequences")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the MITOMAP alternate allele")))))}m.isMDXComponent=!0},73356:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/mitomap-structural-variants-json",id:"version-3.17/data-sources/mitomap-structural-variants-json",title:"mitomap-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/mitomap-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/mitomap-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/mitomap-structural-variants-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "chromosome":"MT",\n "begin":3166,\n "end":14152,\n "variantType":"deletion",\n "reciprocalOverlap":0.18068,\n "annotationOverlap":0.42405\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"end"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")))))}m.isMDXComponent=!0},55074:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/omim-json",id:"version-3.17/data-sources/omim-json",title:"omim-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/omim-json.md",sourceDirName:"data-sources",slug:"/data-sources/omim-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/omim-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/omim-json.md",tags:[],version:"3.17",frontMatter:{}},p=[{value:"Phenotype",id:"phenotype",children:[],level:4},{value:"Mapping",id:"mapping",children:[],level:4},{value:"Inheritance",id:"inheritance",children:[],level:4},{value:"Comments",id:"comments",children:[],level:4}],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"omim":[ \n { \n "mimNumber":600678,\n "geneName":"MutS, E. coli, homolog of, 6",\n "description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",\n "phenotypes":[ \n { \n "mimNumber":614350,\n "phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",\n "description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal dominant"\n ]\n },\n { \n "mimNumber":608089,\n "phenotype":"Endometrial cancer, familial",\n "mapping":"molecular basis of the disorder is known"\n },\n { \n "mimNumber":276300,\n "phenotype":"Mismatch repair cancer syndrome",\n "description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal recessive"\n ],\n "comments" : [\n "contribute to susceptibility to multifactorial disorders or to susceptibility to infection",\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM ID for gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneName"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene name")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"left"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#phenotype"},"Phenotype entry below"))))),(0,r.kt)("h4",{id:"phenotype"},"Phenotype"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mapping"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#mapping"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"inheritance"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#inheritance"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"comments"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#comments"},"possible values below"))))),(0,r.kt)("h4",{id:"mapping"},"Mapping"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"disorder was positioned by mapping of the wild type gene"),(0,r.kt)("li",{parentName:"ol"},"disease phenotype itself was mapped"),(0,r.kt)("li",{parentName:"ol"},"molecular basis of the disorder is known"),(0,r.kt)("li",{parentName:"ol"},"disorder is a chromosome deletion or duplication syndrome")),(0,r.kt)("h4",{id:"inheritance"},"Inheritance"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"autosomal recessive"),(0,r.kt)("li",{parentName:"ul"},"autosomal dominant")),(0,r.kt)("h4",{id:"comments"},"Comments"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"contributes to the susceptibility to multifactorial disorders"),(0,r.kt)("li",{parentName:"ul"},"variations that lead to apparently abnormal laboratory test values"),(0,r.kt)("li",{parentName:"ul"},"unconfirmed mapping")))}m.isMDXComponent=!0},18981:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/primate-ai-json",id:"version-3.17/data-sources/primate-ai-json",title:"primate-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/primate-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/primate-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/primate-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/primate-ai-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI":[\n {\n "hgnc":"TP53",\n "scorePercentile":0.3,\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))))}m.isMDXComponent=!0},33799:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/revel-json",id:"version-3.17/data-sources/revel-json",title:"revel-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/revel-json.md",sourceDirName:"data-sources",slug:"/data-sources/revel-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/revel-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/revel-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"revel":{ \n "score":0.027\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"score"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1.0")))))}m.isMDXComponent=!0},52629:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/splice-ai-json",id:"version-3.17/data-sources/splice-ai-json",title:"splice-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/splice-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/splice-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/splice-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/splice-ai-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"spliceAI":[ \n {\n "hgnc":"BLCAP",\n "acceptorGainDistance":-3,\n "acceptorGainScore":0.3,\n "donorLossDistance":7,\n "donorLossScore":0.9\n },\n { \n "hgnc":"NNAT",\n "acceptorGainDistance":-1,\n "acceptorGainScore":0.2,\n "donorGainDistance":-2,\n "donorGainScore":0.3\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")))))}m.isMDXComponent=!0},63745:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/topmed-json",id:"version-3.17/data-sources/topmed-json",title:"topmed-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/topmed-json.md",sourceDirName:"data-sources",slug:"/data-sources/topmed-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/topmed-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/topmed-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],d={toc:p},s="wrapper";function m(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"topmed":{ \n "allAc":20,\n "allAn":125568,\n "allAf":0.000159,\n "allHc":0,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele number. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele frequency (computed by Nirvana)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed homozygous count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}m.isMDXComponent=!0},19746:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>w,default:()=>M,frontMatter:()=>A,metadata:()=>j,toc:()=>D});var n=a(87462),r=(a(67294),a(3905)),l=a(22027),i=a(69487),o=a(35295),p=a(81474),d=a(1890),s=a(22384),m=a(18981),c=a(33799),g=a(52629),N=a(95584),k=a(73356),u=a(56249),f=a(84094),y=a(84517),h=a(96351),v=a(55074),b=a(63745);const A={title:"Nirvana JSON File Format"},w=void 0,j={unversionedId:"file-formats/nirvana-json-file-format",id:"version-3.17/file-formats/nirvana-json-file-format",title:"Nirvana JSON File Format",description:"Overview",source:"@site/versioned_docs/version-3.17/file-formats/nirvana-json-file-format.mdx",sourceDirName:"file-formats",slug:"/file-formats/nirvana-json-file-format",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/file-formats/nirvana-json-file-format",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/file-formats/nirvana-json-file-format.mdx",tags:[],version:"3.17",frontMatter:{title:"Nirvana JSON File Format"},sidebar:"version-3.17/docs",previous:{title:"TOPMed",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/topmed"},next:{title:"Custom Annotations",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/file-formats/custom-annotations"}},D=[{value:"Overview",id:"overview",children:[{value:"Conventions",id:"conventions",children:[],level:3},{value:"JSON Layout",id:"json-layout",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Header",id:"header",children:[{value:"Data Source",id:"data-source",children:[],level:4},{value:"Genome Assemblies",id:"genome-assemblies",children:[],level:4}],level:2},{value:"Positions",id:"positions",children:[{value:"ClinGen",id:"clingen",children:[],level:3},{value:"1000 Genomes (SV)",id:"1000-genomes-sv",children:[],level:3},{value:"MITOMAP (SV)",id:"mitomap-sv",children:[],level:3}],level:2},{value:"Samples",id:"samples",children:[],level:2},{value:"Variants",id:"variants",children:[{value:"Transcripts",id:"transcripts",children:[{value:"PolyPhen",id:"polyphen",children:[],level:4},{value:"SIFT",id:"sift",children:[],level:4},{value:"Amino Acid Conservation",id:"amino-acid-conservation",children:[],level:4},{value:"Gene Fusions",id:"gene-fusions",children:[],level:4},{value:"Fusion",id:"fusion",children:[],level:4}],level:3},{value:"Regulatory Regions",id:"regulatory-regions",children:[{value:"Regulatory Types",id:"regulatory-types",children:[],level:4},{value:"Regulatory Consequences",id:"regulatory-consequences",children:[],level:4}],level:3},{value:"ClinVar",id:"clinvar",children:[],level:3},{value:"1000 Genomes",id:"1000-genomes",children:[],level:3},{value:"gnomAD",id:"gnomad",children:[],level:3},{value:"dbSNP",id:"dbsnp",children:[],level:3},{value:"MITOMAP",id:"mitomap",children:[],level:3},{value:"Primate AI",id:"primate-ai",children:[],level:3},{value:"REVEL",id:"revel",children:[],level:3},{value:"Splice AI",id:"splice-ai",children:[],level:3},{value:"TOPMed",id:"topmed",children:[],level:3}],level:2},{value:"Genes",id:"genes",children:[{value:"OMIM",id:"omim",children:[],level:3},{value:"gnomAD LoF Gene Metrics",id:"gnomad-lof-gene-metrics",children:[],level:3},{value:"ClinGen Disease Validity",id:"clingen-disease-validity",children:[],level:3}],level:2}],I={toc:D},T="wrapper";function M(t){let{components:e,...A}=t;return(0,r.kt)(T,(0,n.Z)({},I,A,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("h3",{id:"conventions"},"Conventions"),(0,r.kt)("p",null,"In the Nirvana JSON representation, we try to maximize the amount of useful information that is relayed in the output file. As such, we have several conventions that are useful to know about:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"With boolean key/value pairs, we only output the keys that have a true value. I.e. there's no reason to display ",(0,r.kt)("inlineCode",{parentName:"li"},'"isStructuralVariant":false')," a few million times when annotating a small variant VCF."),(0,r.kt)("li",{parentName:"ul"},"When transferring data from the VCF file to the JSON (e.g. for allele depths (AD)), it is common to use a period (.) as a placeholder for missing data in the VCF file. Nirvana treats periods like empty or null strings and therefore will not output those entries.")),(0,r.kt)("h3",{id:"json-layout"},"JSON Layout"),(0,r.kt)("p",null,(0,r.kt)("img",{src:a(65709).Z})),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"In general, each position corresponds to a row in the original VCF file."),(0,r.kt)("p",{parentName:"div"},"For each gene that was referenced in the transcripts found in the positions section, there will be additional gene-level annotation in the gene section."))),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"We've put together a ",(0,r.kt)("a",{parentName:"p",href:"../introduction/parsing-json"},"new section that discusses how to parse our JSON files")," easily using examples in a ",(0,r.kt)("a",{parentName:"p",href:"https://github.com/Illumina/NirvanaDocumentation/blob/master/static/files/parse-nirvana-json-python.ipynb"},"Python Jupyter notebook")," and a ",(0,r.kt)("a",{parentName:"p",href:"https://github.com/Illumina/NirvanaDocumentation/blob/master/static/files/parse-nirvana-json-r.ipynb"},"R version")," as well. In addition, we have information about how to quickly dump content from our JSON file using a tabix-like utility called JASIX."))),(0,r.kt)("h2",{id:"header"},"Header"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'{\n "header":{\n "annotator":"Nirvana 3.0.0-alpha.5+g6c52e247",\n "creationTime":"2017-06-14 15:53:13",\n "genomeAssembly":"GRCh37",\n "dataSources":[\n {\n "name":"OMIM",\n "version":"unknown",\n "description":"An Online Catalog of Human Genes and Genetic Disorders",\n "releaseDate":"2017-05-03"\n },\n {\n "name":"VEP",\n "version":"84",\n "description":"BothRefSeqAndEnsembl",\n "releaseDate":"2017-01-16"\n },\n {\n "name":"ClinVar",\n "version":"20170503",\n "description":"A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence",\n "releaseDate":"2017-05-03"\n },\n {\n "name":"phyloP",\n "version":"hg19",\n "description":"46 way conservation score between humans and 45 other vertebrates",\n "releaseDate":"2009-11-10"\n }\n ],\n "samples":[\n "NA12878",\n "NA12891",\n "NA12892"\n ]\n },\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"annotator"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"the name of the annotator and the current version")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"creationTime"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd hh:mm:ss")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genomeAssembly"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#genome-assemblies"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"schemaVersion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"incremented whenever the core structure of the JSON file introduces breaking changes")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"dataVersion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"dataSources"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#data-source"},"Data Source entry below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"samples"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"the order of these sample names will be used throughout the JSON file when enumerating samples")))),(0,r.kt)("h4",{id:"data-source"},"Data Source"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"name"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"version"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"optional description of the data source")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"releaseDate"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")))),(0,r.kt)("h4",{id:"genome-assemblies"},"Genome Assemblies"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"GRCh37"),(0,r.kt)("li",{parentName:"ul"},"GRCh38"),(0,r.kt)("li",{parentName:"ul"},"hg19"),(0,r.kt)("li",{parentName:"ul"},"SARSCoV2")),(0,r.kt)("h2",{id:"positions"},"Positions"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"positions":[\n {\n "chromosome":"chr2",\n "position":48010488,\n "repeatUnit":"GGCCCC",\n "refRepeatCount":3,\n "svEnd":48020488,\n "refAllele":"G",\n "altAlleles":[\n "A",\n "GT"\n ],\n "quality":461,\n "filters":[\n "PASS"\n ],\n "ciPos":[\n -170,\n 170\n ],\n "ciEnd":[\n -175,\n 175\n ],\n "svLength":1000,\n "strandBias":1.23,\n "jointSomaticNormalQuality":29,\n "cytogeneticBand":"2p16.3",\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Variant Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"position"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf (1-based notation). Range: 1 - 250 million")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"repeatUnit"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"STR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by ExpansionHunter")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refRepeatCount"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"STR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by ExpansionHunter")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"svEnd"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"quality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf (Normally an integer, but some variant callers using floating point. Has been observed as high as 500k)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"filters"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ciPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ciEnd"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"svLength"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"strandBias"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"small variant"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by GATK (from SB)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"jointSomaticNormalQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by the Manta variant caller (SOMATICSCORE)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cytogeneticBand"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"e.g. 17p13.1")))),(0,r.kt)("h3",{id:"clingen"},"ClinGen"),(0,r.kt)(o.default,{mdxType:"ClinGen"}),(0,r.kt)(p.default,{mdxType:"ClinGenDosage"}),(0,r.kt)("h3",{id:"1000-genomes-sv"},"1000 Genomes (SV)"),(0,r.kt)(h.default,{mdxType:"ThousandGenomesSV"}),(0,r.kt)("h3",{id:"mitomap-sv"},"MITOMAP (SV)"),(0,r.kt)(k.default,{mdxType:"MitoMapSV"}),(0,r.kt)("h2",{id:"samples"},"Samples"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"samples":[\n {\n "genotype":"0/1",\n "variantFrequencies":[\n 0.333,\n 0.5\n ],\n "totalDepth":57,\n "genotypeQuality":12,\n "copyNumber":3,\n "repeatUnitCounts":[\n 10,\n 20\n ],\n "alleleDepths":[\n 10,\n 20,\n 30\n ],\n "failedFilter":true,\n "splitReadCounts":[\n 10,\n 20\n ],\n "pairedEndReadCounts":[\n 10,\n 20\n ],\n "isDeNovo":true,\n "diseaseAffectedStatuses":[\n "-"\n ],\n "artifactAdjustedQualityScore":89.3,\n "likelihoodRatioQualityScore":78.2,\n "heteroplasmyPercentile":[\n 23.13,\n 12.65\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"center"},"VCF"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genotype"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"GT"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantFrequencies"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"VF, AD"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. One value per alternate allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"totalDepth"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"DP"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genotypeQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"GQ"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values. Typically maxes out at 99")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"copyNumber"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"CN"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"minorHaplotypeCopyNumber"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"MCN"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"repeatUnitCounts"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"REPCN"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ExpansionHunter-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"alleleDepths"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AD"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"center"},"FT"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"splitReadCounts"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Manta-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pairedEndReadCounts"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"PR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Manta-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isDeNovo"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"center"},"DN"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"deNovoQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"DQ"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"diseaseAffectedStatuses"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"DST"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ExpansionHunter-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"artifactAdjustedQualityScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AQ"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PEPE-specific. Range: 0 - 100.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"likelihoodRatioQualityScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"LQ"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PEPE-specific. Range: 0 - 100.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"lossOfHeterozygosity"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"center"},"CN, MCN"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"somaticQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SQ"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"heteroplasmyPercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"VF"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 100. 2 decimal places. One value per alternate allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"binCount"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"BC"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Empty Samples")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"If a sample does not contain any entries, we will create a sample object that contains the ",(0,r.kt)("inlineCode",{parentName:"p"},"isEmpty")," key. This ensures that sample ordering is preserved while indicating that a sample is intentionally empty."),(0,r.kt)("pre",{parentName:"div"},(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"samples":[\n {\n "isEmpty":true\n }\n],\n')))),(0,r.kt)("h2",{id:"variants"},"Variants"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"variants":[\n {\n "vid":"2:48010488:A",\n "chromosome":"chr2",\n "begin":48010488,\n "end":48010488,\n "isReferenceMinorAllele":true,\n "isStructuralVariant":true,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "isDecomposedVariant":true,\n "isRecomposedVariant":true,\n "linkedVids":["2:48010488:GTA:ATC"],\n "hgvsg":"NC_000002.11:g.48010488G>A",\n "phylopScore":0.459\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"vid"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"../core-functionality/variant-ids"},"Variant Identifiers"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"1-based non-negative integer values. Range: 1 - 250 million")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"end"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"1-based non-negative integer values. Range: 1 - 250 million")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isReferenceMinorAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this is a reference minor allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isStructuralVariant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant is a structural variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"inLowComplexityRegion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant lies in a low complexity region (gnomAD low complexity regions)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"parsimonious representation of the reference allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"parsimonious representation of the alternate allele.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"uses\xa0",(0,r.kt)("a",{parentName:"td",href:"http://www.sequenceontology.org/browser/current_svn/term/SO:0001059"},"Sequence Ontology sequence alterations"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isDecomposedVariant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the decomposed variant has been used to create another recomposed variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isRecomposedVariant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant is recomposed from two or more decomposed variants")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"linkedVids"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"list of ",(0,r.kt)("a",{parentName:"td",href:"../core-functionality/variant-ids"},"VIDs")," for variants connecting decomposed and recomposed variants")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsg"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS g. notation")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phylopScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"phyloP conservation score. Range: -14.08 to 6.424")))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Reference Minor Alleles")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Nirvana supports annotating reference minor alleles. In such a case, ",(0,r.kt)("inlineCode",{parentName:"p"},"refAllele")," will be replaced by the global major allele and ",(0,r.kt)("inlineCode",{parentName:"p"},"altAllele")," will be replaced with the original reference allele."))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Flagging Decomposed & Recomposed Variants")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"When two or more decomposed variants are recomposed into an MNV, the decomposed variants will be marked with ",(0,r.kt)("inlineCode",{parentName:"p"},'"isDecomposedVariant":true'),"."),(0,r.kt)("p",{parentName:"div"},"Similarly, the recomposed variant will be shown as a new VCF position. This recomposed variant will be flagged with ",(0,r.kt)("inlineCode",{parentName:"p"},'"isRecomposedVariant":true'),"."))),(0,r.kt)("h3",{id:"transcripts"},"Transcripts"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"transcripts":[\n {\n "transcript":"ENST00000445503.1",\n "source":"Ensembl",\n "bioType":"nonsense_mediated_decay",\n "codons":"gGg/gAg",\n "aminoAcids":"G/E",\n "cdnaPos":"268",\n "cdsPos":"116",\n "exons":"1/9",\n "introns":"1/8",\n "proteinPos":"39",\n "geneId":"ENSG00000116062",\n "hgnc":"MSH6",\n "consequence":[\n "missense_variant",\n "NMD_transcript_variant"\n ],\n "hgvsc":"ENST00000445503.1:c.116G>A",\n "hgvsp":"ENSP00000405294.1:p.(Gly39Glu)",\n "geneFusion":{\n "exon":6,\n "intron":5,\n "fusions":[\n {\n "hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000437180.1}:c.58+568_1443",\n "exon":3,\n "intron":2\n },\n {\n "hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000300305.3}:c.58+568_1443",\n "exon":2,\n "intron":1\n }\n ]\n },\n "isCanonical":true,\n "polyPhenScore":0.95,\n "polyPhenPrediction":"probably damaging",\n "proteinId":"ENSP00000405294.1",\n "siftScore":0.61,\n "siftPrediction":"tolerated",\n "completeOverlap":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"transcript"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"transcript ID. e.g. ENST00000445503.1")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"source"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"RefSeq / Ensembl")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"bioType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"descriptions of the ",(0,r.kt)("a",{parentName:"td",href:"https://uswest.ensembl.org/info/genome/genebuild/biotypes.html"},"biotypes from Ensembl"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"codons"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"aminoAcids"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cdnaPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cdsPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"exons"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exons affected by the variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"introns"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"introns affected by the variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"proteinPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene ID. e.g. ENSG00000116062")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"consequence"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://www.sequenceontology.org/browser/obob.cgi"},"Sequence Ontology Consequences"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS coding nomenclature")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsp"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS protein nomenclature")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneFusion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#gene-fusions"},"Gene Fusions entry below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isCanonical"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this is a canonical transcript")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"polyPhenScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"polyPhenPrediction"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#polyphen"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"proteinId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"protein ID. E.g. ENSP00000405294.1")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"siftScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"siftPrediction"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#sift"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"completeOverlap"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this transcript is completely overlapped by the variant")))),(0,r.kt)("h4",{id:"polyphen"},"PolyPhen"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"probably damaging"),(0,r.kt)("li",{parentName:"ul"},"possibly damaging"),(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"unknown")),(0,r.kt)("h4",{id:"sift"},"SIFT"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"tolerated"),(0,r.kt)("li",{parentName:"ul"},"deleterious"),(0,r.kt)("li",{parentName:"ul"},"tolerated - low confidence"),(0,r.kt)("li",{parentName:"ul"},"deleterious - low confidence")),(0,r.kt)("h4",{id:"amino-acid-conservation"},"Amino Acid Conservation"),(0,r.kt)(l.default,{mdxType:"AminoAcidConservation"}),(0,r.kt)("h4",{id:"gene-fusions"},"Gene Fusions"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"exon"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual exon where the breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"intron"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual intron where the breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"fusions"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#fusion"},"Fusion entry below"))))),(0,r.kt)("h4",{id:"fusion"},"Fusion"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"exon"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual exon where the other breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"intron"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual intron where the other breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS coding nomenclature describing the two genes and the transcripts that are fused along with")))),(0,r.kt)("h3",{id:"regulatory-regions"},"Regulatory Regions"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"regulatoryRegions":[\n {\n "id":"ENSR00001542175",\n "type":"promoter",\n "consequence":[\n "regulatory_region_variant"\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"type"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see ",(0,r.kt)("a",{parentName:"td",href:"#regulatory-types"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"consequence"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"see ",(0,r.kt)("a",{parentName:"td",href:"#regulatory-consequences"},"possible values below"))))),(0,r.kt)("h4",{id:"regulatory-types"},"Regulatory Types"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"CTCF_binding_site"),(0,r.kt)("li",{parentName:"ul"},"enhancer"),(0,r.kt)("li",{parentName:"ul"},"open_chromatin_region"),(0,r.kt)("li",{parentName:"ul"},"promoter"),(0,r.kt)("li",{parentName:"ul"},"promoter_flanking_region"),(0,r.kt)("li",{parentName:"ul"},"TF_binding_site")),(0,r.kt)("h4",{id:"regulatory-consequences"},"Regulatory Consequences"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"regulatory_region_variant"),(0,r.kt)("li",{parentName:"ul"},"regulatory_region_ablation"),(0,r.kt)("li",{parentName:"ul"},"regulatory_region_amplification"),(0,r.kt)("li",{parentName:"ul"},"regulatory_region_truncation")),(0,r.kt)("h3",{id:"clinvar"},"ClinVar"),(0,r.kt)(i.default,{mdxType:"ClinVar"}),(0,r.kt)("h3",{id:"1000-genomes"},"1000 Genomes"),(0,r.kt)(y.default,{mdxType:"ThousandGenomesSmall"}),(0,r.kt)("h3",{id:"gnomad"},"gnomAD"),(0,r.kt)(u.default,{mdxType:"GnomadSmall"}),(0,r.kt)("h3",{id:"dbsnp"},"dbSNP"),(0,r.kt)(s.default,{mdxType:"DbSNP"}),(0,r.kt)("h3",{id:"mitomap"},"MITOMAP"),(0,r.kt)(N.default,{mdxType:"MitoMapSmall"}),(0,r.kt)("h3",{id:"primate-ai"},"Primate AI"),(0,r.kt)(m.default,{mdxType:"PrimateAI"}),(0,r.kt)("h3",{id:"revel"},"REVEL"),(0,r.kt)(c.default,{mdxType:"REVEL"}),(0,r.kt)("h3",{id:"splice-ai"},"Splice AI"),(0,r.kt)(g.default,{mdxType:"SpliceAI"}),(0,r.kt)("h3",{id:"topmed"},"TOPMed"),(0,r.kt)(b.default,{mdxType:"TOPMed"}),(0,r.kt)("h2",{id:"genes"},"Genes"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"genes":[\n {\n "name":"MSH6",\n "hgncId":7329,\n "summary":"This gene encodes a member of the DNA mismatch repair MutS family. In E. coli, the MutS protein helps in the recognition of mismatched nucleotides prior to their repair. A highly conserved region of approximately 150 aa, called the Walker-A adenine nucleotide binding motif, exists in MutS homologs. The encoded protein heterodimerizes with MSH2 to form a mismatch recognition complex that functions as a bidirectional molecular switch that exchanges ADP and ATP as DNA mismatches are bound and dissociated. Mutations in this gene may be associated with hereditary nonpolyposis colon cancer, colorectal cancer, and endometrial cancer. Transcripts variants encoding different isoforms have been described. [provided by RefSeq, Jul 2013]",\n /* this is where gene-level data sources can be found e.g. OMIM */\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"name"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgncId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"summary"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"short description of the gene from ",(0,r.kt)("a",{parentName:"td",href:"https://www.omim.org/"},"OMIM"))))),(0,r.kt)("h3",{id:"omim"},"OMIM"),(0,r.kt)(v.default,{mdxType:"Omim"}),(0,r.kt)("h3",{id:"gnomad-lof-gene-metrics"},"gnomAD LoF Gene Metrics"),(0,r.kt)(f.default,{mdxType:"GnomadGeneLof"}),(0,r.kt)("h3",{id:"clingen-disease-validity"},"ClinGen Disease Validity"),(0,r.kt)(d.default,{mdxType:"ClinGenDiseaseValidity"}))}M.isMDXComponent=!0},65709:(t,e,a)=>{a.d(e,{Z:()=>n});const n=a.p+"assets/images/JSON-Layout-fc8e5c0cf4c8428981cd206fe9b6feac.svg"}}]); \ No newline at end of file diff --git a/assets/js/393f3ed0.0cb2b64c.js b/assets/js/393f3ed0.0cb2b64c.js deleted file mode 100644 index 69d55cfc..00000000 --- a/assets/js/393f3ed0.0cb2b64c.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4931],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>u});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},p=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},m="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},h=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),m=d(n),h=i,u=m["".concat(s,".").concat(h)]||m[h]||c[h]||r;return n?a.createElement(u,o(o({ref:t},p),{},{components:n})):a.createElement(u,o({ref:t},p))}));function u(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,o=new Array(r);o[0]=h;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[m]="string"==typeof e?e:i,o[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>m,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={title:"Mitochondrial Heteroplasmy"},o=void 0,l={unversionedId:"data-sources/mito-heteroplasmy",id:"version-3.21/data-sources/mito-heteroplasmy",title:"Mitochondrial Heteroplasmy",description:"Overview",source:"@site/versioned_docs/version-3.21/data-sources/mito-heteroplasmy.md",sourceDirName:"data-sources",slug:"/data-sources/mito-heteroplasmy",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/mito-heteroplasmy",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/mito-heteroplasmy.md",tags:[],version:"3.21",frontMatter:{title:"Mitochondrial Heteroplasmy"},sidebar:"docs",previous:{title:"gnomAD",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gnomad"},next:{title:"MITOMAP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/mitomap"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"JSON File",id:"json-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Binning VRF Data",id:"binning-vrf-data",children:[],level:4},{value:"Pre-processing the Data",id:"pre-processing-the-data",children:[],level:4},{value:"Algorithm",id:"algorithm",children:[],level:4}],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],d={toc:s},p="wrapper";function m(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"Mitochondrial Heteroplasmy is an aggregate population data set that characterizes the amount of heteroplasmy observed for each variant. The latest version of this data set is based on re-processed 1000 Genomes Project data using the Illumina DRAGEN pipeline."),(0,i.kt)("h2",{id:"json-file"},"JSON File"),(0,i.kt)("h3",{id:"example"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{\n "T:C":{\n "ad":[\n 1,\n 1,\n 1,\n 1,\n 1,\n 1\n ],\n "allele_type":"alt",\n "vrf":[\n 0.002369668246445498,\n 0.0024937655860349127,\n 0.0016129032258064516,\n 0.0025188916876574307,\n 0.0022935779816513763,\n 0.002008032128514056\n ],\n "vrf_stats":{\n "kurtosis":38.889891511122556,\n "max":0.0025188916876574307,\n "mean":5.4052190471990743e-05,\n "min":0.0,\n "nobs":246,\n "skewness":6.346664692283075,\n "stdev":0.0003461416264750575,\n "variance":1.1981402557879823e-07\n }\n }\n}\n\n')),(0,i.kt)("h3",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"From the JSON file, we're mainly interested in the following keys:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"variant")," (i.e. ",(0,i.kt)("inlineCode",{parentName:"li"},"T:C"),")"),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"ad")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"vrf")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"nobs")," (number of observations)")),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Adjusting for null observations")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The ",(0,i.kt)("inlineCode",{parentName:"p"},"nobs")," value indicates how many observations were made. Ideally this would have been represented in the ",(0,i.kt)("inlineCode",{parentName:"p"},"ad")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"vrf")," arrays, but it's left as an exercise for the reader."))),(0,i.kt)("h4",{id:"binning-vrf-data"},"Binning VRF Data"),(0,i.kt)("p",null,"The ",(0,i.kt)("inlineCode",{parentName:"p"},"vrf")," (variant read frequency) array in the JSON object above is paired with with the ",(0,i.kt)("inlineCode",{parentName:"p"},"ad")," array (allele depths) shown above."),(0,i.kt)("p",null,"The data in the JSON object has a crazy number of significant digits. This means that as the number of samples increase, this array will grow. To make this more future-proof, Nirvana bins everything according to 0.1% increments."),(0,i.kt)("p",null,"With the binned data, we end up having 775 distinct ",(0,i.kt)("inlineCode",{parentName:"p"},"vrf")," values in the entire JSON file. This also means that the variant with the largest number of VRFs would originally have 246 entries, but due to binning this will decrease to 143."),(0,i.kt)("h4",{id:"pre-processing-the-data"},"Pre-processing the Data"),(0,i.kt)("p",null,"The JSON file is converted into a small TSV file that is ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/Illumina/Nirvana/blob/main/MitoHeteroplasmy/Resources/MitoHeteroplasmy.tsv.gz"},"embedded in Nirvana"),". Here is an example of the TSV file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS REF ALT VRF_BINS VRF_COUNTS\nchrM 1 G . 0.981,0.987,0.988,0.989,0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.998,0.999 1,2,2,4,7,8,11,19,43,60,48,64,499,1736\nchrM 2 A . 0.981,0.987,0.988,0.989,0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.998,0.999 1,2,2,4,7,8,11,19,43,60,48,64,499,1736\n")),(0,i.kt)("h4",{id:"algorithm"},"Algorithm"),(0,i.kt)("p",null,"Nirvana will calculate mitochondrial heteroplasmy data for every sample in the VCF. Using the computed VRF for each sample, we compute where in the empirical mitochondrial heteroplasmy distribution that VRF occurs and express that as a percentile."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Percentiles")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Nirvana uses the ",(0,i.kt)("a",{parentName:"p",href:"https://en.wikipedia.org/wiki/Percentile"},"statistical definition of percentile")," (indicating the value below which a given percentage of observations in a group of observations falls). Unless the sample's VRF is higher than all the VRFs represented in the distribution, the range will be [0, 1)."))),(0,i.kt)("h2",{id:"download-url"},"Download URL"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Unavailable")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The original data set is only available internally at Illumina at the moment."))),(0,i.kt)("h2",{id:"json-output"},"JSON Output"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{14-17}","{14-17}":!0},'"samples":[\n {\n "genotype":"0/1",\n "variantFrequencies":[\n 0.333,\n 0.5\n ],\n ],\n "alleleDepths":[\n 10,\n 20,\n 30\n ],\n "heteroplasmyPercentile":[\n 23.13,\n 12.65\n ]\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"heteroplasmyPercentile"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"one percentile for each variant frequency (each alternate allele)")))))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/395936bc.230e27d6.js b/assets/js/395936bc.230e27d6.js deleted file mode 100644 index d9041c8f..00000000 --- a/assets/js/395936bc.230e27d6.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3759],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>u});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var c=r.createContext({}),s=function(e){var t=r.useContext(c),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},p=function(e){var t=s(e.components);return r.createElement(c.Provider,{value:t},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},g=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,c=e.parentName,p=i(e,["components","mdxType","originalType","parentName"]),d=s(n),g=a,u=d["".concat(c,".").concat(g)]||d[g]||m[g]||o;return n?r.createElement(u,l(l({ref:t},p),{},{components:n})):r.createElement(u,l({ref:t},p))}));function u(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=g;var i={};for(var c in t)hasOwnProperty.call(t,c)&&(i[c]=t[c]);i.originalType=e,i[d]="string"==typeof e?e:a,l[1]=i;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>d,frontMatter:()=>o,metadata:()=>i,toc:()=>c});var r=n(87462),a=(n(67294),n(3905));const o={},l=void 0,i={unversionedId:"data-sources/fusioncatcher-json",id:"version-3.21/data-sources/fusioncatcher-json",title:"fusioncatcher-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/fusioncatcher-json.md",sourceDirName:"data-sources",slug:"/data-sources/fusioncatcher-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/fusioncatcher-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/fusioncatcher-json.md",tags:[],version:"3.21",frontMatter:{}},c=[{value:"genes",id:"genes",children:[],level:4},{value:"gene",id:"gene",children:[],level:4}],s={toc:c},p="wrapper";function d(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},' "fusionCatcher":[\n {\n "genes":{\n "first":{\n "hgnc":"ETV6",\n "isOncogene":true\n },\n "second":{\n "hgnc":"RUNX1"\n },\n "isParalogPair":true,\n "isPseudogenePair":true,\n "isReadthrough":true\n },\n "germlineSources":[\n "1000 Genomes Project"\n ],\n "somaticSources":[\n "COSMIC",\n "TCGA oesophageal carcinomas"\n ]\n }\n ]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"genes"),(0,a.kt)("td",{parentName:"tr",align:"center"},"genes object"),(0,a.kt)("td",{parentName:"tr",align:"left"},"5' gene & 3' gene")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"germlineSources"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"matches in known germline data sources")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"somaticSources"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"matches in known somatic data sources")))),(0,a.kt)("h4",{id:"genes"},"genes"),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"first"),(0,a.kt)("td",{parentName:"tr",align:"center"},"gene object"),(0,a.kt)("td",{parentName:"tr",align:"left"},"5' gene")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"second"),(0,a.kt)("td",{parentName:"tr",align:"center"},"gene object"),(0,a.kt)("td",{parentName:"tr",align:"left"},"3' gene")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"isParalogPair"),(0,a.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,a.kt)("td",{parentName:"tr",align:"left"},"true when both genes are paralogs for each other")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"isPseudogenePair"),(0,a.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,a.kt)("td",{parentName:"tr",align:"left"},"true when both genes are pseudogenes for each other")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"isReadthrough"),(0,a.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,a.kt)("td",{parentName:"tr",align:"left"},"true when this fusion gene is a readthrough event (both are on the same strand and there are no genes between them)")))),(0,a.kt)("h4",{id:"gene"},"gene"),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"isOncogene"),(0,a.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,a.kt)("td",{parentName:"tr",align:"left"},"true when this gene is an oncogene")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/3b274206.10bb3599.js b/assets/js/3b274206.10bb3599.js deleted file mode 100644 index e29c8aab..00000000 --- a/assets/js/3b274206.10bb3599.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9076],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>g});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function o(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function i(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var s=r.createContext({}),p=function(t){var e=r.useContext(s),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},m=function(t){var e=p(t.components);return r.createElement(s.Provider,{value:e},t.children)},c="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},u=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,o=t.originalType,s=t.parentName,m=l(t,["components","mdxType","originalType","parentName"]),c=p(n),u=a,g=c["".concat(s,".").concat(u)]||c[u]||d[u]||o;return n?r.createElement(g,i(i({ref:e},m),{},{components:n})):r.createElement(g,i({ref:e},m))}));function g(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var o=n.length,i=new Array(o);i[0]=u;var l={};for(var s in e)hasOwnProperty.call(e,s)&&(l[s]=e[s]);l.originalType=t,l[c]="string"==typeof t?t:a,i[1]=l;for(var p=2;p{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>c,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var r=n(87462),a=(n(67294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/cosmic-json",id:"version-3.18/data-sources/cosmic-json",title:"cosmic-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/cosmic-json.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/cosmic-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/cosmic-json.md",tags:[],version:"3.18",frontMatter:{}},s=[],p={toc:s},m="wrapper";function c(t){let{components:e,...n}=t;return(0,a.kt)(m,(0,r.Z)({},p,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},' "cosmicGeneFusions":[\n {\n "id":"COSF881",\n "numSamples":6,\n "geneSymbols":[\n "MYB",\n "NFIB"\n ],\n "hgvsr":"ENST00000341911.5(MYB):r.1_2368::ENST00000397581.2(NFIB):r.2592_3318",\n "histologies":[\n {\n "name":"adenoid cystic carcinoma",\n "numSamples":6\n }\n ],\n "sites":[\n {\n "name":"salivary gland (submandibular)",\n "numSamples":1\n },\n {\n "name":"salivary gland (parotid)",\n "numSamples":1\n },\n {\n "name":"salivary gland (nasal cavity)",\n "numSamples":1\n },\n {\n "name":"breast",\n "numSamples":3\n }\n ],\n "pubMedIds":[\n 19841262\n ]\n }\n ]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"id"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"COSMIC fusion ID")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"geneSymbols"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"5' gene & 3' gene")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"hgvsr"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"HGVS RNA translocation fusion notation")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"histologies"),(0,a.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"phenotypic descriptions")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"sites"),(0,a.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"tissue types")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")))),(0,a.kt)("p",null,(0,a.kt)("strong",{parentName:"p"},"Count")),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"name"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"description")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"})))))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/3b79e001.6715cb0c.js b/assets/js/3b79e001.6715cb0c.js deleted file mode 100644 index 4199300b..00000000 --- a/assets/js/3b79e001.6715cb0c.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9373],{3905:(e,t,n)=>{n.d(t,{Zo:()=>s,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function i(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var l=r.createContext({}),p=function(e){var t=r.useContext(l),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},s=function(e){var t=p(e.components);return r.createElement(l.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,l=e.parentName,s=c(e,["components","mdxType","originalType","parentName"]),u=p(n),d=a,f=u["".concat(l,".").concat(d)]||u[d]||m[d]||o;return n?r.createElement(f,i(i({ref:t},s),{},{components:n})):r.createElement(f,i({ref:t},s))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,i=new Array(o);i[0]=d;var c={};for(var l in t)hasOwnProperty.call(t,l)&&(c[l]=t[l]);c.originalType=e,c[u]="string"==typeof e?e:a,i[1]=c;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>c,toc:()=>l});var r=n(87462),a=(n(67294),n(3905));const o={},i=void 0,c={unversionedId:"data-sources/primate-ai-json",id:"version-3.21/data-sources/primate-ai-json",title:"primate-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/primate-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/primate-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/primate-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/primate-ai-json.md",tags:[],version:"3.21",frontMatter:{}},l=[],p={toc:l},s="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(s,(0,r.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI":[\n {\n "hgnc":"TP53",\n "scorePercentile":0.3,\n }\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/3f49c75c.0a03fc91.js b/assets/js/3f49c75c.0a03fc91.js deleted file mode 100644 index a87189af..00000000 --- a/assets/js/3f49c75c.0a03fc91.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2520],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>g});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},d=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,d=o(e,["components","mdxType","originalType","parentName"]),p=c(n),m=r,g=p["".concat(s,".").concat(m)]||p[m]||u[m]||i;return n?a.createElement(g,l(l({ref:t},d),{},{components:n})):a.createElement(g,l({ref:t},d))}));function g(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,l=new Array(i);l[0]=m;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[p]="string"==typeof e?e:r,l[1]=o;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>p,frontMatter:()=>i,metadata:()=>o,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const i={},l=void 0,o={unversionedId:"data-sources/clingen-gene-validity-json",id:"version-3.16/data-sources/clingen-gene-validity-json",title:"clingen-gene-validity-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/clingen-gene-validity-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-gene-validity-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/clingen-gene-validity-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/clingen-gene-validity-json.md",tags:[],version:"3.16",frontMatter:{}},s=[],c={toc:s},d="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingenGeneValidity":[\n {\n "diseaseId":"MONDO_0007893",\n "disease":"Noonan syndrome with multiple lentigines",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n },\n {\n "diseaseId":"MONDO_0015280",\n "disease":"cardiofaciocutaneous syndrome",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingenGeneValidity"),(0,r.kt)("td",{parentName:"tr",align:null},"object"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"diseaseId"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Monarch Disease Ontology ID (MONDO)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"disease"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"disease label")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"classification"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see below for possible values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"classificationDate"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"yyyy-MM-dd")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"classification")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no reported evidence"),(0,r.kt)("li",{parentName:"ul"},"disputed"),(0,r.kt)("li",{parentName:"ul"},"limited"),(0,r.kt)("li",{parentName:"ul"},"moderate"),(0,r.kt)("li",{parentName:"ul"},"definitive"),(0,r.kt)("li",{parentName:"ul"},"strong"),(0,r.kt)("li",{parentName:"ul"},"refuted")))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/3ff3c98a.27f68912.js b/assets/js/3ff3c98a.27f68912.js deleted file mode 100644 index 7e4934a6..00000000 --- a/assets/js/3ff3c98a.27f68912.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7859],{3905:(e,t,n)=>{n.d(t,{Zo:()=>s,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var p=r.createContext({}),c=function(e){var t=r.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},s=function(e){var t=c(e.components);return r.createElement(p.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,p=e.parentName,s=i(e,["components","mdxType","originalType","parentName"]),u=c(n),m=a,f=u["".concat(p,".").concat(m)]||u[m]||d[m]||o;return n?r.createElement(f,l(l({ref:t},s),{},{components:n})):r.createElement(f,l({ref:t},s))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=m;var i={};for(var p in t)hasOwnProperty.call(t,p)&&(i[p]=t[p]);i.originalType=e,i[u]="string"==typeof e?e:a,l[1]=i;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>o,metadata:()=>i,toc:()=>p});var r=n(87462),a=(n(67294),n(3905));const o={},l=void 0,i={unversionedId:"data-sources/gnomad-lof-json",id:"version-3.17/data-sources/gnomad-lof-json",title:"gnomad-lof-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/gnomad-lof-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-lof-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/gnomad-lof-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/gnomad-lof-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],c={toc:p},s="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(s,(0,r.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD":{ \n "pLi":1.00e0,\n "pNull":8.94e-40,\n "pRec":1.84e-16,\n "synZ":-8.44e-2,\n "misZ":5.96e-1,\n "loeuf":1.13e0\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"pLi"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"pNull"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"pRec"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"synZ"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"misZ"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/40096745.93ca9e0c.js b/assets/js/40096745.93ca9e0c.js deleted file mode 100644 index 22e2b93e..00000000 --- a/assets/js/40096745.93ca9e0c.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1364],{3905:(t,e,n)=>{n.d(e,{Zo:()=>c,kt:()=>k});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function i(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function o(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var p=r.createContext({}),d=function(t){var e=r.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},c=function(t){var e=d(t.components);return r.createElement(p.Provider,{value:e},t.children)},m="mdxType",s={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},u=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,i=t.originalType,p=t.parentName,c=l(t,["components","mdxType","originalType","parentName"]),m=d(n),u=a,k=m["".concat(p,".").concat(u)]||m[u]||s[u]||i;return n?r.createElement(k,o(o({ref:e},c),{},{components:n})):r.createElement(k,o({ref:e},c))}));function k(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var p in e)hasOwnProperty.call(e,p)&&(l[p]=e[p]);l.originalType=t,l[m]="string"==typeof t?t:a,o[1]=l;for(var d=2;d{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>m,frontMatter:()=>i,metadata:()=>l,toc:()=>p});var r=n(87462),a=(n(67294),n(3905));const i={title:"Dependencies"},o=void 0,l={unversionedId:"introduction/dependencies",id:"version-3.16/introduction/dependencies",title:"Dependencies",description:"All of the following dependencies have been included in this repository.",source:"@site/versioned_docs/version-3.16/introduction/dependencies.md",sourceDirName:"introduction",slug:"/introduction/dependencies",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/introduction/dependencies",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/introduction/dependencies.md",tags:[],version:"3.16",frontMatter:{title:"Dependencies"},sidebar:"version-3.16/docs",previous:{title:"Introduction",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/"},next:{title:"Getting Started",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/introduction/getting-started"}},p=[],d={toc:p},c="wrapper";function m(t){let{components:e,...n}=t;return(0,a.kt)(c,(0,r.Z)({},d,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("p",null,"All of the following dependencies have been included in this repository."),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Name"),(0,a.kt)("th",{parentName:"tr",align:"center"},"License"),(0,a.kt)("th",{parentName:"tr",align:null},"Usage"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/aws/aws-extensions-for-dotnet-cli"},"Amazon.Lambda")),(0,a.kt)("td",{parentName:"tr",align:"center"},"Apache"),(0,a.kt)("td",{parentName:"tr",align:null},"AWS extensions for .NET CLI")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/aws/aws-sdk-net/"},"AWSSDK")),(0,a.kt)("td",{parentName:"tr",align:"center"},"Apache"),(0,a.kt)("td",{parentName:"tr",align:null},"AWS Lambda, S3, SNS support")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://www.newtonsoft.com/json"},"Json.NET")),(0,a.kt)("td",{parentName:"tr",align:"center"},"MIT"),(0,a.kt)("td",{parentName:"tr",align:null},"JASIX utility")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/ebiggers/libdeflate"},"libdeflate")),(0,a.kt)("td",{parentName:"tr",align:"center"},"MIT"),(0,a.kt)("td",{parentName:"tr",align:null},"BlockCompression library")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/moq/moq4"},"Moq")),(0,a.kt)("td",{parentName:"tr",align:"center"},"BSD"),(0,a.kt)("td",{parentName:"tr",align:null},"Mocking framework for unit tests")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"http://www.ndesk.org/Options"},"NDesk.Options")),(0,a.kt)("td",{parentName:"tr",align:"center"},"MIT/X11"),(0,a.kt)("td",{parentName:"tr",align:null},"CommandLine library")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/xunit/xunit"},"xUnit")),(0,a.kt)("td",{parentName:"tr",align:"center"},"Apache"),(0,a.kt)("td",{parentName:"tr",align:null},"Unit testing framework")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/Dead2/zlib-ng"},"zlib-ng")),(0,a.kt)("td",{parentName:"tr",align:"center"},"zlib"),(0,a.kt)("td",{parentName:"tr",align:null},"BlockCompression library")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/facebook/zstd"},"zstd")),(0,a.kt)("td",{parentName:"tr",align:"center"},"BSD"),(0,a.kt)("td",{parentName:"tr",align:null},"BlockCompression library")))))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/40b74949.e72ce752.js b/assets/js/40b74949.e72ce752.js deleted file mode 100644 index 1e6b4f78..00000000 --- a/assets/js/40b74949.e72ce752.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4408],{3905:(e,t,n)=>{n.d(t,{Zo:()=>m,kt:()=>f});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),p=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},m=function(e){var t=p(e.components);return a.createElement(s.Provider,{value:t},e.children)},c="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,m=l(e,["components","mdxType","originalType","parentName"]),c=p(n),u=r,f=c["".concat(s,".").concat(u)]||c[u]||d[u]||i;return n?a.createElement(f,o(o({ref:t},m),{},{components:n})):a.createElement(f,o({ref:t},m))}));function f(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[c]="string"==typeof e?e:r,o[1]=l;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>c,frontMatter:()=>i,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/omim-json",id:"version-3.21/data-sources/omim-json",title:"omim-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/omim-json.md",sourceDirName:"data-sources",slug:"/data-sources/omim-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/omim-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/omim-json.md",tags:[],version:"3.21",frontMatter:{}},s=[{value:"Phenotype",id:"phenotype",children:[],level:4},{value:"Mapping",id:"mapping",children:[],level:4},{value:"Inheritance",id:"inheritance",children:[],level:4},{value:"Comments",id:"comments",children:[],level:4}],p={toc:s},m="wrapper";function c(e){let{components:t,...n}=e;return(0,r.kt)(m,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"omim":[ \n { \n "mimNumber":600678,\n "geneName":"MutS, E. coli, homolog of, 6",\n "description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",\n "phenotypes":[ \n { \n "mimNumber":614350,\n "phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",\n "description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal dominant"\n ]\n },\n { \n "mimNumber":608089,\n "phenotype":"Endometrial cancer, familial",\n "mapping":"molecular basis of the disorder is known"\n },\n { \n "mimNumber":276300,\n "phenotype":"Mismatch repair cancer syndrome",\n "description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal recessive"\n ],\n "comments" : [\n "contribute to susceptibility to multifactorial disorders or to susceptibility to infection",\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM ID for gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneName"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene name")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"left"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#phenotype"},"Phenotype entry below"))))),(0,r.kt)("h4",{id:"phenotype"},"Phenotype"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mapping"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#mapping"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"inheritance"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#inheritance"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"comments"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#comments"},"possible values below"))))),(0,r.kt)("h4",{id:"mapping"},"Mapping"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"disorder was positioned by mapping of the wild type gene"),(0,r.kt)("li",{parentName:"ol"},"disease phenotype itself was mapped"),(0,r.kt)("li",{parentName:"ol"},"molecular basis of the disorder is known"),(0,r.kt)("li",{parentName:"ol"},"disorder is a chromosome deletion or duplication syndrome")),(0,r.kt)("h4",{id:"inheritance"},"Inheritance"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"autosomal recessive"),(0,r.kt)("li",{parentName:"ul"},"autosomal dominant")),(0,r.kt)("h4",{id:"comments"},"Comments"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"contributes to the susceptibility to multifactorial disorders"),(0,r.kt)("li",{parentName:"ul"},"variations that lead to apparently abnormal laboratory test values"),(0,r.kt)("li",{parentName:"ul"},"unconfirmed mapping")))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/40c17da5.ad94238e.js b/assets/js/40c17da5.ad94238e.js deleted file mode 100644 index 67831f0c..00000000 --- a/assets/js/40c17da5.ad94238e.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5096],{3905:(t,n,e)=>{e.d(n,{Zo:()=>m,kt:()=>k});var a=e(67294);function l(t,n,e){return n in t?Object.defineProperty(t,n,{value:e,enumerable:!0,configurable:!0,writable:!0}):t[n]=e,t}function r(t,n){var e=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(t,n).enumerable}))),e.push.apply(e,a)}return e}function o(t){for(var n=1;n=0||(l[e]=t[e]);return l}(t,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,e)&&(l[e]=t[e])}return l}var p=a.createContext({}),u=function(t){var n=a.useContext(p),e=n;return t&&(e="function"==typeof t?t(n):o(o({},n),t)),e},m=function(t){var n=u(t.components);return a.createElement(p.Provider,{value:n},t.children)},d="mdxType",g={inlineCode:"code",wrapper:function(t){var n=t.children;return a.createElement(a.Fragment,{},n)}},N=a.forwardRef((function(t,n){var e=t.components,l=t.mdxType,r=t.originalType,p=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),d=u(e),N=l,k=d["".concat(p,".").concat(N)]||d[N]||g[N]||r;return e?a.createElement(k,o(o({ref:n},m),{},{components:e})):a.createElement(k,o({ref:n},m))}));function k(t,n){var e=arguments,l=n&&n.mdxType;if("string"==typeof t||l){var r=e.length,o=new Array(r);o[0]=N;var i={};for(var p in n)hasOwnProperty.call(n,p)&&(i[p]=n[p]);i.originalType=t,i[d]="string"==typeof t?t:l,o[1]=i;for(var u=2;u{e.r(n),e.d(n,{contentTitle:()=>o,default:()=>d,frontMatter:()=>r,metadata:()=>i,toc:()=>p});var a=e(87462),l=(e(67294),e(3905));const r={},o=void 0,i={unversionedId:"data-sources/gnomad-small-variants-json",id:"version-3.16/data-sources/gnomad-small-variants-json",title:"gnomad-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/gnomad-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/gnomad-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/gnomad-small-variants-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],u={toc:p},m="wrapper";function d(t){let{components:n,...e}=t;return(0,l.kt)(m,(0,a.Z)({},u,e,{components:n,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomad":{ \n "coverage":20,\n "allAf":0.190317,\n "maleAf":0.193,\n "femaleAf": 0.1935,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "maleAn":15096,\n "femaleAn":15700\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "maleAc":2930,\n "femaleAc": 2931,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "maleHc":280,\n "femaleHc":281,\n "controlsAllAf":0.190317,\n "controlsAllAn":30796,\n "controlsAllAc":5861,\n "lowComplexityRegion":true,\n "failedFilter":true\n}\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"coverage"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the controls subset. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for male population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for female population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the controls subset. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for male population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for female population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the controls subset. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the South Asian population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the South Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the South Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"lowComplexityRegion"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant is located in a low complexity region.")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/40d384af.ca4fa976.js b/assets/js/40d384af.ca4fa976.js deleted file mode 100644 index 55e033cb..00000000 --- a/assets/js/40d384af.ca4fa976.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5938],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function i(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var s=r.createContext({}),l=function(e){var t=r.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},p=function(e){var t=l(e.components);return r.createElement(s.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,s=e.parentName,p=c(e,["components","mdxType","originalType","parentName"]),u=l(n),m=a,f=u["".concat(s,".").concat(m)]||u[m]||d[m]||o;return n?r.createElement(f,i(i({ref:t},p),{},{components:n})):r.createElement(f,i({ref:t},p))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,i=new Array(o);i[0]=m;var c={};for(var s in t)hasOwnProperty.call(t,s)&&(c[s]=t[s]);c.originalType=e,c[u]="string"==typeof e?e:a,i[1]=c;for(var l=2;l{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>c,toc:()=>s});var r=n(87462),a=(n(67294),n(3905));const o={},i=void 0,c={unversionedId:"data-sources/amino-acid-conservation-json",id:"version-3.17/data-sources/amino-acid-conservation-json",title:"amino-acid-conservation-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/amino-acid-conservation-json.md",sourceDirName:"data-sources",slug:"/data-sources/amino-acid-conservation-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/amino-acid-conservation-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/amino-acid-conservation-json.md",tags:[],version:"3.17",frontMatter:{}},s=[],l={toc:s},p="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},l,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"aminoAcidConservation": {\n "scores": [0.34]\n} \n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"aminoAcidConservation"),(0,a.kt)("td",{parentName:"tr",align:"center"},"object"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"scores"),(0,a.kt)("td",{parentName:"tr",align:"center"},"object array of doubles"),(0,a.kt)("td",{parentName:"tr",align:"left"},"percent conserved with respect to human amino acid residue. Range: 0.01 - 1.00")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/417500f8.8fb8d3be.js b/assets/js/417500f8.8fb8d3be.js deleted file mode 100644 index 5eabc06f..00000000 --- a/assets/js/417500f8.8fb8d3be.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3087,6776],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>D});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var p=a.createContext({}),s=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},c=function(e){var t=s(e.components);return a.createElement(p.Provider,{value:t},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,p=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),d=s(n),u=r,D=d["".concat(p,".").concat(u)]||d[u]||m[u]||i;return n?a.createElement(D,o(o({ref:t},c),{},{components:n})):a.createElement(D,o({ref:t},c))}));function D(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var p in t)hasOwnProperty.call(t,p)&&(l[p]=t[p]);l.originalType=e,l[d]="string"==typeof e?e:r,o[1]=l;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>l,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/splice-ai-json",id:"version-3.18/data-sources/splice-ai-json",title:"splice-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/splice-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/splice-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/splice-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/splice-ai-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],s={toc:p},c="wrapper";function d(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"spliceAI":[ \n {\n "hgnc":"BLCAP",\n "acceptorGainDistance":-3,\n "acceptorGainScore":0.3,\n "donorLossDistance":7,\n "donorLossScore":0.9\n },\n { \n "hgnc":"NNAT",\n "acceptorGainDistance":-1,\n "acceptorGainScore":0.2,\n "donorGainDistance":-2,\n "donorGainScore":0.3\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")))))}d.isMDXComponent=!0},3631:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>o,metadata:()=>p,toc:()=>s});var a=n(87462),r=(n(67294),n(3905)),i=n(43247);const o={title:"Splice AI"},l=void 0,p={unversionedId:"data-sources/splice-ai",id:"version-3.18/data-sources/splice-ai",title:"Splice AI",description:"Overview",source:"@site/versioned_docs/version-3.18/data-sources/splice-ai.mdx",sourceDirName:"data-sources",slug:"/data-sources/splice-ai",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/splice-ai",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/splice-ai.mdx",tags:[],version:"3.18",frontMatter:{title:"Splice AI"},sidebar:"docs",previous:{title:"REVEL",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/revel"},next:{title:"TOPMed",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/topmed"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"VCF File",id:"vcf-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Pre-processing",id:"pre-processing",children:[{value:"Filtering",id:"filtering",children:[],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:s},d="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"SpliceAI, a 32-layer deep neural network, predicts splicing from a pre-mRNA sequence."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"K. Jaganathan, et al. Predicting splicing from primary sequence with deep learning. ",(0,r.kt)("em",{parentName:"p"},"Cell"),", ",(0,r.kt)("strong",{parentName:"p"},"176")," (3) (2019), pp. 535-548 e24"))),(0,r.kt)("h2",{id:"vcf-file"},"VCF File"),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},'##fileformat=VCFv4.0\n##assembly=GRCh37/hg19\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n#CHROM POS ID REF ALT QUAL FILTER INFO\n10 92946 . C T . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0000;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=-26;DP_AL=-10;DP_DG=3;DP_DL=35\n10 92946 . C G . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0008;DS_AL=0.0000;DS_DG=0.0003;DS_DL=0.0000;DP_AG=34;DP_AL=-27;DP_DG=35;DP_DL=1\n10 92946 . C A . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0004;DS_AL=0.0000;DS_DG=0.0001;DS_DL=0.0000;DP_AG=-10;DP_AL=-48;DP_DG=35;DP_DL=-21\n10 92947 . A C . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0002;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=-49;DP_AL=-11;DP_DG=0;DP_DL=34\n10 92947 . A T . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0002;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=33;DP_AL=-11;DP_DG=-22;DP_DL=34\n10 92947 . A G . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0006;DS_AL=0.0000;DS_DG=0.0001;DS_DL=0.0000;DP_AG=33;DP_AL=-11;DP_DG=34;DP_DL=32\n')),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the VCF file, we're mainly interested in the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_AG")," - \u0394 score (acceptor gain)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_AL")," - \u0394 score (acceptor loss)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_DG")," - \u0394 score (donor gain)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_DL")," - \u0394 score (donor loss)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_AG")," - \u0394 position (acceptor gain) relative to the variant position"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_AL")," - \u0394 position (acceptor loss) relative to the variant position"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_DG")," - \u0394 position (donor gain) relative to the variant position"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_DL")," - \u0394 position (donor loss) relative to the variant position")),(0,r.kt)("p",null,"The Splice AI team suggests the following interpretation for the scores:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"center"},"Range"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Confidence"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Pathogenicity"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"0 \u2264 x < 0.1"),(0,r.kt)("td",{parentName:"tr",align:"left"},"low"),(0,r.kt)("td",{parentName:"tr",align:"left"},"likely benign")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"0.1 \u2264 x \u2264 0.5"),(0,r.kt)("td",{parentName:"tr",align:"left"},"medium"),(0,r.kt)("td",{parentName:"tr",align:"left"},"likely pathogenic")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"x > 0.5"),(0,r.kt)("td",{parentName:"tr",align:"left"},"high"),(0,r.kt)("td",{parentName:"tr",align:"left"},"pathogenic")))),(0,r.kt)("h2",{id:"pre-processing"},"Pre-processing"),(0,r.kt)("h3",{id:"filtering"},"Filtering"),(0,r.kt)("p",null,"Splice AI provides a comprehensive list of entries throughout the genome. However, many of the entries have little value. I.e. observing low splice scores in intergenic regions. Not only do these extra entries require more storage, but the unused content has a negative impact on annotation speed."),(0,r.kt)("p",null,"As a result, Nirvana filters out all the values in the low confidence tier except for regions within 15 bp of nascent splice sites. For those regions, we found it useful to see if Splice AI predicted an interruption of the splicing mechanism."),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://basespace.illumina.com/s/5u6ThOblecrh"},"https://basespace.illumina.com/s/5u6ThOblecrh")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(i.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/42c73b29.56d08408.js b/assets/js/42c73b29.56d08408.js new file mode 100644 index 00000000..e5f29735 --- /dev/null +++ b/assets/js/42c73b29.56d08408.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2508],{3905:(e,t,r)=>{r.d(t,{Zo:()=>p,kt:()=>f});var n=r(7294);function a(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function o(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function c(e){for(var t=1;t=0||(a[r]=e[r]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(a[r]=e[r])}return a}var i=n.createContext({}),s=function(e){var t=n.useContext(i),r=t;return e&&(r="function"==typeof e?e(t):c(c({},t),e)),r},p=function(e){var t=s(e.components);return n.createElement(i.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},d=n.forwardRef((function(e,t){var r=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),u=s(r),d=a,f=u["".concat(i,".").concat(d)]||u[d]||m[d]||o;return r?n.createElement(f,c(c({ref:t},p),{},{components:r})):n.createElement(f,c({ref:t},p))}));function f(e,t){var r=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=r.length,c=new Array(o);c[0]=d;var l={};for(var i in t)hasOwnProperty.call(t,i)&&(l[i]=t[i]);l.originalType=e,l[u]="string"==typeof e?e:a,c[1]=l;for(var s=2;s{r.r(t),r.d(t,{contentTitle:()=>c,default:()=>u,frontMatter:()=>o,metadata:()=>l,toc:()=>i});var n=r(7462),a=(r(7294),r(3905));const o={},c=void 0,l={unversionedId:"data-sources/revel-json",id:"data-sources/revel-json",title:"revel-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/revel-json.md",sourceDirName:"data-sources",slug:"/data-sources/revel-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/revel-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/revel-json.md",tags:[],version:"current",frontMatter:{}},i=[],s={toc:i},p="wrapper";function u(e){let{components:t,...r}=e;return(0,a.kt)(p,(0,n.Z)({},s,r,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"revel":{ \n "score":0.027\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"score"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1.0")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/42c73b29.94bf7024.js b/assets/js/42c73b29.94bf7024.js deleted file mode 100644 index 247384b5..00000000 --- a/assets/js/42c73b29.94bf7024.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2508],{3905:(e,t,r)=>{r.d(t,{Zo:()=>p,kt:()=>f});var n=r(67294);function a(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function o(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function c(e){for(var t=1;t=0||(a[r]=e[r]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(a[r]=e[r])}return a}var i=n.createContext({}),s=function(e){var t=n.useContext(i),r=t;return e&&(r="function"==typeof e?e(t):c(c({},t),e)),r},p=function(e){var t=s(e.components);return n.createElement(i.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},d=n.forwardRef((function(e,t){var r=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),u=s(r),d=a,f=u["".concat(i,".").concat(d)]||u[d]||m[d]||o;return r?n.createElement(f,c(c({ref:t},p),{},{components:r})):n.createElement(f,c({ref:t},p))}));function f(e,t){var r=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=r.length,c=new Array(o);c[0]=d;var l={};for(var i in t)hasOwnProperty.call(t,i)&&(l[i]=t[i]);l.originalType=e,l[u]="string"==typeof e?e:a,c[1]=l;for(var s=2;s{r.r(t),r.d(t,{contentTitle:()=>c,default:()=>u,frontMatter:()=>o,metadata:()=>l,toc:()=>i});var n=r(87462),a=(r(67294),r(3905));const o={},c=void 0,l={unversionedId:"data-sources/revel-json",id:"data-sources/revel-json",title:"revel-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/revel-json.md",sourceDirName:"data-sources",slug:"/data-sources/revel-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/revel-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/revel-json.md",tags:[],version:"current",frontMatter:{}},i=[],s={toc:i},p="wrapper";function u(e){let{components:t,...r}=e;return(0,a.kt)(p,(0,n.Z)({},s,r,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"revel":{ \n "score":0.027\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"score"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1.0")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/431213b1.277bfc60.js b/assets/js/431213b1.277bfc60.js deleted file mode 100644 index 054c379c..00000000 --- a/assets/js/431213b1.277bfc60.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8113],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function s(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var i=r.createContext({}),l=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):s(s({},t),e)),n},p=function(e){var t=l(e.components);return r.createElement(i.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,p=c(e,["components","mdxType","originalType","parentName"]),u=l(n),m=a,f=u["".concat(i,".").concat(m)]||u[m]||d[m]||o;return n?r.createElement(f,s(s({ref:t},p),{},{components:n})):r.createElement(f,s({ref:t},p))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,s=new Array(o);s[0]=m;var c={};for(var i in t)hasOwnProperty.call(t,i)&&(c[i]=t[i]);c.originalType=e,c[u]="string"==typeof e?e:a,s[1]=c;for(var l=2;l{n.r(t),n.d(t,{contentTitle:()=>s,default:()=>u,frontMatter:()=>o,metadata:()=>c,toc:()=>i});var r=n(87462),a=(n(67294),n(3905));const o={},s=void 0,c={unversionedId:"data-sources/dbsnp-json",id:"version-3.18/data-sources/dbsnp-json",title:"dbsnp-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/dbsnp-json.md",sourceDirName:"data-sources",slug:"/data-sources/dbsnp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/dbsnp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/dbsnp-json.md",tags:[],version:"3.18",frontMatter:{}},i=[],l={toc:i},p="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},l,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"dbsnp":[\n "rs1042821"\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"dbsnp"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"dbSNP rsIDs")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/4397ec05.8e84ff7e.js b/assets/js/4397ec05.8e84ff7e.js deleted file mode 100644 index 9088ba56..00000000 --- a/assets/js/4397ec05.8e84ff7e.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5360],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>g});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function o(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function i(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var s=a.createContext({}),p=function(t){var e=a.useContext(s),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},m=function(t){var e=p(t.components);return a.createElement(s.Provider,{value:e},t.children)},c="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},u=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,o=t.originalType,s=t.parentName,m=l(t,["components","mdxType","originalType","parentName"]),c=p(n),u=r,g=c["".concat(s,".").concat(u)]||c[u]||d[u]||o;return n?a.createElement(g,i(i({ref:e},m),{},{components:n})):a.createElement(g,i({ref:e},m))}));function g(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var o=n.length,i=new Array(o);i[0]=u;var l={};for(var s in e)hasOwnProperty.call(e,s)&&(l[s]=e[s]);l.originalType=t,l[c]="string"==typeof t?t:r,i[1]=l;for(var p=2;p{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>c,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/cosmic-gene-fusion-json",id:"data-sources/cosmic-gene-fusion-json",title:"cosmic-gene-fusion-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/cosmic-gene-fusion-json.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-gene-fusion-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-gene-fusion-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/cosmic-gene-fusion-json.md",tags:[],version:"current",frontMatter:{}},s=[],p={toc:s},m="wrapper";function c(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},p,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},' "cosmicGeneFusions":[\n {\n "id":"COSF881",\n "numSamples":6,\n "geneSymbols":[\n "MYB",\n "NFIB"\n ],\n "hgvsr":"ENST00000341911.5(MYB):r.1_2368::ENST00000397581.2(NFIB):r.2592_3318",\n "histologies":[\n {\n "name":"adenoid cystic carcinoma",\n "numSamples":6\n }\n ],\n "sites":[\n {\n "name":"salivary gland (submandibular)",\n "numSamples":1\n },\n {\n "name":"salivary gland (parotid)",\n "numSamples":1\n },\n {\n "name":"salivary gland (nasal cavity)",\n "numSamples":1\n },\n {\n "name":"breast",\n "numSamples":3\n }\n ],\n "pubMedIds":[\n 19841262\n ]\n }\n ]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"COSMIC fusion ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneSymbols"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"5' gene & 3' gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsr"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS RNA translocation fusion notation")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"histologies"),(0,r.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypic descriptions")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sites"),(0,r.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"tissue types")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Count")),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"name"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"description")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"})))))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/4397ec05.c9481829.js b/assets/js/4397ec05.c9481829.js new file mode 100644 index 00000000..930b94d3 --- /dev/null +++ b/assets/js/4397ec05.c9481829.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5360],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>g});var a=n(7294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function o(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function i(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var s=a.createContext({}),p=function(t){var e=a.useContext(s),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},m=function(t){var e=p(t.components);return a.createElement(s.Provider,{value:e},t.children)},c="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},u=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,o=t.originalType,s=t.parentName,m=l(t,["components","mdxType","originalType","parentName"]),c=p(n),u=r,g=c["".concat(s,".").concat(u)]||c[u]||d[u]||o;return n?a.createElement(g,i(i({ref:e},m),{},{components:n})):a.createElement(g,i({ref:e},m))}));function g(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var o=n.length,i=new Array(o);i[0]=u;var l={};for(var s in e)hasOwnProperty.call(e,s)&&(l[s]=e[s]);l.originalType=t,l[c]="string"==typeof t?t:r,i[1]=l;for(var p=2;p{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>c,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(7462),r=(n(7294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/cosmic-gene-fusion-json",id:"data-sources/cosmic-gene-fusion-json",title:"cosmic-gene-fusion-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/cosmic-gene-fusion-json.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-gene-fusion-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-gene-fusion-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/cosmic-gene-fusion-json.md",tags:[],version:"current",frontMatter:{}},s=[],p={toc:s},m="wrapper";function c(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},p,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},' "cosmicGeneFusions":[\n {\n "id":"COSF881",\n "numSamples":6,\n "geneSymbols":[\n "MYB",\n "NFIB"\n ],\n "hgvsr":"ENST00000341911.5(MYB):r.1_2368::ENST00000397581.2(NFIB):r.2592_3318",\n "histologies":[\n {\n "name":"adenoid cystic carcinoma",\n "numSamples":6\n }\n ],\n "sites":[\n {\n "name":"salivary gland (submandibular)",\n "numSamples":1\n },\n {\n "name":"salivary gland (parotid)",\n "numSamples":1\n },\n {\n "name":"salivary gland (nasal cavity)",\n "numSamples":1\n },\n {\n "name":"breast",\n "numSamples":3\n }\n ],\n "pubMedIds":[\n 19841262\n ]\n }\n ]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"COSMIC fusion ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneSymbols"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"5' gene & 3' gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsr"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS RNA translocation fusion notation")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"histologies"),(0,r.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypic descriptions")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sites"),(0,r.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"tissue types")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Count")),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"name"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"description")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"})))))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/43a128d3.7898b58b.js b/assets/js/43a128d3.7898b58b.js deleted file mode 100644 index 12a76c30..00000000 --- a/assets/js/43a128d3.7898b58b.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[784,8001],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>g});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function i(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var s=r.createContext({}),c=function(e){var t=r.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},d=function(e){var t=c(e.components);return r.createElement(s.Provider,{value:t},e.children)},u="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,s=e.parentName,d=l(e,["components","mdxType","originalType","parentName"]),u=c(n),m=a,g=u["".concat(s,".").concat(m)]||u[m]||p[m]||o;return n?r.createElement(g,i(i({ref:t},d),{},{components:n})):r.createElement(g,i({ref:t},d))}));function g(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,i=new Array(o);i[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[u]="string"==typeof e?e:a,i[1]=l;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var r=n(87462),a=(n(67294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/gme-json",id:"version-3.18/data-sources/gme-json",title:"gme-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/gme-json.md",sourceDirName:"data-sources",slug:"/data-sources/gme-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gme-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/gme-json.md",tags:[],version:"3.18",frontMatter:{}},s=[],c={toc:s},d="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(d,(0,r.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"gmeVariome":{\n "allAc":10,\n "allAn":202,\n "allAf":0.049504,\n "failedFilter":true\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAc"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"GME allele count")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAn"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"GME allele number")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAf"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"GME allele frequency")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,a.kt)("td",{parentName:"tr",align:null},"bool"),(0,a.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}u.isMDXComponent=!0},16027:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>p,frontMatter:()=>i,metadata:()=>s,toc:()=>c});var r=n(87462),a=(n(67294),n(3905)),o=n(97398);const i={title:"GME Variome"},l=void 0,s={unversionedId:"data-sources/gme",id:"version-3.18/data-sources/gme",title:"GME Variome",description:"Overview",source:"@site/versioned_docs/version-3.18/data-sources/gme.mdx",sourceDirName:"data-sources",slug:"/data-sources/gme",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gme",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/gme.mdx",tags:[],version:"3.18",frontMatter:{title:"GME Variome"},sidebar:"docs",previous:{title:"GERP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gerp"},next:{title:"gnomAD",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gnomad"}},c=[{value:"Overview",id:"overview",children:[{value:"TSV Extraction",id:"tsv-extraction",children:[{value:"Parsing",id:"parsing",children:[],level:4}],level:3}],level:2},{value:"GRCh37 liftover",id:"grch37-liftover",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON output",id:"json-output",children:[],level:2}],d={toc:c},u="wrapper";function p(e){let{components:t,...n}=e;return(0,a.kt)(u,(0,r.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("h2",{id:"overview"},"Overview"),(0,a.kt)("p",null,"The ",(0,a.kt)("a",{parentName:"p",href:"http://igm.ucsd.edu/gme/index.php"},"Greater Middle East (GME) Variome")," Project is aimed at generating a coding base reference for the countries found in the Greater Middle East. Nirvana presents variant frequencies for the Greater Middle Eastern population."),(0,a.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,a.kt)("div",{parentName:"div",className:"admonition-heading"},(0,a.kt)("h5",{parentName:"div"},(0,a.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,a.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,a.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,a.kt)("div",{parentName:"div",className:"admonition-content"},(0,a.kt)("p",{parentName:"div"},"Scott, E. M., Halees, A., Itan, Y., Spencer, E. G., He, Y., Azab, M. A., Gabriel, S. B., Belkadi, A., Boisson, B., Abel, L., Clark, A. G., Greater Middle East Variome Consortium, Alkuraya, F. S., Casanova, J. L., & Gleeson, J. G. (2016). Characterization of Greater Middle Eastern genetic variation for enhanced disease gene discovery. ",(0,a.kt)("em",{parentName:"p"},"Nature genetics"),", 48(9), 1071\u20131076. ",(0,a.kt)("a",{parentName:"p",href:"https://doi.org/10.1038/ng.3592"},"https://doi.org/10.1038/ng.3592")))),(0,a.kt)("h3",{id:"tsv-extraction"},"TSV Extraction"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"chrom pos ref alt AA filter FunctionGVS geneFunction Gene GeneID SIFT_pred GERP++ AF GME_GC GME_AC GME_AF NWA NEA AP Israel SD TP CA FunctionGVS_new Priority Polyphen2_HVAR_pred LRT_pred MutationTaster_pred rsid OMIM_MIM OMIM_Disease AA_AC EA_AC rsid_link position_link\n1 69134 A G A VQSRTrancheSNP99.90to100.00 nonsynonymous_SNV exonic OR4F5 79501 T 2.31 96:0:5 10,192 0.04950495049504951 4:0:0 59:0:2 12:0:0 0:0:0 6:0:0 9:0:2 13:0:2 nonsynonymous_SNV MODERATE B N N none - - none none - http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&org=human&position=chr1%3A69134-69133\n1 69270 A G A PASS synonymous_SNV exonic OR4F5 79501 . . 93:38:240 518,224 0.6981132075471698 5:5:11 63:30:86 12:5:28 1:0:2 2:2:18 7:3:46 7:2:52 synonymous_SNV LOW . . . rs201219564 - - none none http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?searchType=adhoc_search&type=rs&rs=rs201219564 http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&org=human&position=chr1%3A69270-69269\n1 69428 T G T PASS nonsynonymous_SNV exonic OR4F5 79501 D 0.891 676:44:15 74,1396 0.050340136054421766 43:0:2 313:16:10 88:7:3 6:0:0 44:8:0 102:9:0 102:4:2 nonsynonymous_SNV MODERATE D N N rs140739101 - - 14,3808 313,6535 http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?searchType=adhoc_search&type=rs&rs=rs140739101 http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&org=human&position=chr1%3A69428-69427\n")),(0,a.kt)("h4",{id:"parsing"},"Parsing"),(0,a.kt)("p",null,"We parse the GME tsv file and extract the following columns:"),(0,a.kt)("ul",null,(0,a.kt)("li",{parentName:"ul"},"chrom"),(0,a.kt)("li",{parentName:"ul"},"pos"),(0,a.kt)("li",{parentName:"ul"},"ref"),(0,a.kt)("li",{parentName:"ul"},"alt"),(0,a.kt)("li",{parentName:"ul"},"filter"),(0,a.kt)("li",{parentName:"ul"},"GME_AC"),(0,a.kt)("li",{parentName:"ul"},"GME_AF")),(0,a.kt)("h2",{id:"grch37-liftover"},"GRCh37 liftover"),(0,a.kt)("p",null,"The data is not available for GRCh38 on GME website. We performed a liftover from GRCh37 to GRCh38 using CrossMap."),(0,a.kt)("h2",{id:"download-url"},"Download URL"),(0,a.kt)("p",null,(0,a.kt)("a",{parentName:"p",href:"http://igm.ucsd.edu/gme/download.shtml"},"http://igm.ucsd.edu/gme/download.shtml")),(0,a.kt)("h2",{id:"json-output"},"JSON output"),(0,a.kt)(o.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/440d17b3.227c488c.js b/assets/js/440d17b3.227c488c.js new file mode 100644 index 00000000..efba3ed0 --- /dev/null +++ b/assets/js/440d17b3.227c488c.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4648],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>g});var a=n(7294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var p=a.createContext({}),u=function(t){var e=a.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},m=function(t){var e=u(t.components);return a.createElement(p.Provider,{value:e},t.children)},s="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},c=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),s=u(n),c=r,g=s["".concat(p,".").concat(c)]||s[c]||d[c]||l;return n?a.createElement(g,o(o({ref:e},m),{},{components:n})):a.createElement(g,o({ref:e},m))}));function g(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=c;var i={};for(var p in e)hasOwnProperty.call(e,p)&&(i[p]=e[p]);i.originalType=t,i[s]="string"==typeof t?t:r,o[1]=i;for(var u=2;u{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>s,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(7462),r=(n(7294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/1000Genomes-sv-json",id:"data-sources/1000Genomes-sv-json",title:"1000Genomes-sv-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/1000Genomes-sv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-sv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes-sv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/1000Genomes-sv-json.md",tags:[],version:"current",frontMatter:{}},p=[],u={toc:p},m="wrapper";function s(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},u,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":[\n {\n "chromosome":"1",\n "begin":1595369,\n "end":1612441,\n "variantType": "copy_number_variation",\n "id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",\n "allAn": 5008,\n "allAc": 2702,\n "allAf": 0.539537,\n "afrAf": 0.6052,\n "amrAf": 0.3675,\n "eurAf": 0.5357,\n "easAf": 0.5368,\n "sasAf": 0.5797,\n "reciprocalOverlap": 0.07555\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"range: 0 - 1.")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/440d17b3.d294cb39.js b/assets/js/440d17b3.d294cb39.js deleted file mode 100644 index 10b70ed4..00000000 --- a/assets/js/440d17b3.d294cb39.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4648],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>g});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var p=a.createContext({}),u=function(t){var e=a.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},m=function(t){var e=u(t.components);return a.createElement(p.Provider,{value:e},t.children)},s="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},c=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),s=u(n),c=r,g=s["".concat(p,".").concat(c)]||s[c]||d[c]||l;return n?a.createElement(g,o(o({ref:e},m),{},{components:n})):a.createElement(g,o({ref:e},m))}));function g(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=c;var i={};for(var p in e)hasOwnProperty.call(e,p)&&(i[p]=e[p]);i.originalType=t,i[s]="string"==typeof t?t:r,o[1]=i;for(var u=2;u{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>s,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/1000Genomes-sv-json",id:"data-sources/1000Genomes-sv-json",title:"1000Genomes-sv-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/1000Genomes-sv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-sv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes-sv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/1000Genomes-sv-json.md",tags:[],version:"current",frontMatter:{}},p=[],u={toc:p},m="wrapper";function s(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},u,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":[\n {\n "chromosome":"1",\n "begin":1595369,\n "end":1612441,\n "variantType": "copy_number_variation",\n "id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",\n "allAn": 5008,\n "allAc": 2702,\n "allAf": 0.539537,\n "afrAf": 0.6052,\n "amrAf": 0.3675,\n "eurAf": 0.5357,\n "easAf": 0.5368,\n "sasAf": 0.5797,\n "reciprocalOverlap": 0.07555\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"range: 0 - 1.")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/45e4bd3d.99c63d5e.js b/assets/js/45e4bd3d.99c63d5e.js deleted file mode 100644 index 545cb4a4..00000000 --- a/assets/js/45e4bd3d.99c63d5e.js +++ /dev/null @@ -1,2 +0,0 @@ -/*! For license information please see 45e4bd3d.99c63d5e.js.LICENSE.txt */ -(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1722],{17331:e=>{function t(){this._events=this._events||{},this._maxListeners=this._maxListeners||void 0}function r(e){return"function"==typeof e}function n(e){return"object"==typeof e&&null!==e}function i(e){return void 0===e}e.exports=t,t.prototype._events=void 0,t.prototype._maxListeners=void 0,t.defaultMaxListeners=10,t.prototype.setMaxListeners=function(e){if("number"!=typeof e||e<0||isNaN(e))throw TypeError("n must be a positive number");return this._maxListeners=e,this},t.prototype.emit=function(e){var t,a,s,c,u,o;if(this._events||(this._events={}),"error"===e&&(!this._events.error||n(this._events.error)&&!this._events.error.length)){if((t=arguments[1])instanceof Error)throw t;var h=new Error('Uncaught, unspecified "error" event. ('+t+")");throw h.context=t,h}if(i(a=this._events[e]))return!1;if(r(a))switch(arguments.length){case 1:a.call(this);break;case 2:a.call(this,arguments[1]);break;case 3:a.call(this,arguments[1],arguments[2]);break;default:c=Array.prototype.slice.call(arguments,1),a.apply(this,c)}else if(n(a))for(c=Array.prototype.slice.call(arguments,1),s=(o=a.slice()).length,u=0;u0&&this._events[e].length>s&&(this._events[e].warned=!0,console.error("(node) warning: possible EventEmitter memory leak detected. %d listeners added. Use emitter.setMaxListeners() to increase limit.",this._events[e].length),"function"==typeof console.trace&&console.trace()),this},t.prototype.on=t.prototype.addListener,t.prototype.once=function(e,t){if(!r(t))throw TypeError("listener must be a function");var n=!1;function i(){this.removeListener(e,i),n||(n=!0,t.apply(this,arguments))}return i.listener=t,this.on(e,i),this},t.prototype.removeListener=function(e,t){var i,a,s,c;if(!r(t))throw TypeError("listener must be a function");if(!this._events||!this._events[e])return this;if(s=(i=this._events[e]).length,a=-1,i===t||r(i.listener)&&i.listener===t)delete this._events[e],this._events.removeListener&&this.emit("removeListener",e,t);else if(n(i)){for(c=s;c-- >0;)if(i[c]===t||i[c].listener&&i[c].listener===t){a=c;break}if(a<0)return this;1===i.length?(i.length=0,delete this._events[e]):i.splice(a,1),this._events.removeListener&&this.emit("removeListener",e,t)}return this},t.prototype.removeAllListeners=function(e){var t,n;if(!this._events)return this;if(!this._events.removeListener)return 0===arguments.length?this._events={}:this._events[e]&&delete this._events[e],this;if(0===arguments.length){for(t in this._events)"removeListener"!==t&&this.removeAllListeners(t);return this.removeAllListeners("removeListener"),this._events={},this}if(r(n=this._events[e]))this.removeListener(e,n);else if(n)for(;n.length;)this.removeListener(e,n[n.length-1]);return delete this._events[e],this},t.prototype.listeners=function(e){return this._events&&this._events[e]?r(this._events[e])?[this._events[e]]:this._events[e].slice():[]},t.prototype.listenerCount=function(e){if(this._events){var t=this._events[e];if(r(t))return 1;if(t)return t.length}return 0},t.listenerCount=function(e,t){return e.listenerCount(t)}},8131:(e,t,r)=>{"use strict";var n=r(49374),i=r(17775),a=r(23076);function s(e,t,r){return new n(e,t,r)}s.version=r(24336),s.AlgoliaSearchHelper=n,s.SearchParameters=i,s.SearchResults=a,e.exports=s},68078:(e,t,r)=>{"use strict";var n=r(17331);function i(e,t){this.main=e,this.fn=t,this.lastResults=null}r(14853)(i,n),i.prototype.detach=function(){this.removeAllListeners(),this.main.detachDerivedHelper(this)},i.prototype.getModifiedState=function(e){return this.fn(e)},e.exports=i},82437:(e,t,r)=>{"use strict";var n=r(52344),i=r(90116),a=r(49803),s={addRefinement:function(e,t,r){if(s.isRefined(e,t,r))return e;var i=""+r,a=e[t]?e[t].concat(i):[i],c={};return c[t]=a,n({},c,e)},removeRefinement:function(e,t,r){if(void 0===r)return s.clearRefinement(e,(function(e,r){return t===r}));var n=""+r;return s.clearRefinement(e,(function(e,r){return t===r&&n===e}))},toggleRefinement:function(e,t,r){if(void 0===r)throw new Error("toggleRefinement should be used with a value");return s.isRefined(e,t,r)?s.removeRefinement(e,t,r):s.addRefinement(e,t,r)},clearRefinement:function(e,t,r){if(void 0===t)return i(e)?{}:e;if("string"==typeof t)return a(e,[t]);if("function"==typeof t){var n=!1,s=Object.keys(e).reduce((function(i,a){var s=e[a]||[],c=s.filter((function(e){return!t(e,a,r)}));return c.length!==s.length&&(n=!0),i[a]=c,i}),{});return n?s:e}},isRefined:function(e,t,r){var n=Boolean(e[t])&&e[t].length>0;if(void 0===r||!n)return n;var i=""+r;return-1!==e[t].indexOf(i)}};e.exports=s},17775:(e,t,r)=>{"use strict";var n=r(52344),i=r(7888),a=r(22686),s=r(60185),c=r(90116),u=r(49803),o=r(28023),h=r(46801),f=r(82437);function l(e,t){return Array.isArray(e)&&Array.isArray(t)?e.length===t.length&&e.every((function(e,r){return l(t[r],e)})):e===t}function m(e){var t=e?m._parseNumbers(e):{};void 0===t.userToken||h(t.userToken)||console.warn("[algoliasearch-helper] The `userToken` parameter is invalid. This can lead to wrong analytics.\n - Format: [a-zA-Z0-9_-]{1,64}"),this.facets=t.facets||[],this.disjunctiveFacets=t.disjunctiveFacets||[],this.hierarchicalFacets=t.hierarchicalFacets||[],this.facetsRefinements=t.facetsRefinements||{},this.facetsExcludes=t.facetsExcludes||{},this.disjunctiveFacetsRefinements=t.disjunctiveFacetsRefinements||{},this.numericRefinements=t.numericRefinements||{},this.tagRefinements=t.tagRefinements||[],this.hierarchicalFacetsRefinements=t.hierarchicalFacetsRefinements||{};var r=this;Object.keys(t).forEach((function(e){var n=-1!==m.PARAMETERS.indexOf(e),i=void 0!==t[e];!n&&i&&(r[e]=t[e])}))}m.PARAMETERS=Object.keys(new m),m._parseNumbers=function(e){if(e instanceof m)return e;var t={};if(["aroundPrecision","aroundRadius","getRankingInfo","minWordSizefor2Typos","minWordSizefor1Typo","page","maxValuesPerFacet","distinct","minimumAroundRadius","hitsPerPage","minProximity"].forEach((function(r){var n=e[r];if("string"==typeof n){var i=parseFloat(n);t[r]=isNaN(i)?n:i}})),Array.isArray(e.insideBoundingBox)&&(t.insideBoundingBox=e.insideBoundingBox.map((function(e){return Array.isArray(e)?e.map((function(e){return parseFloat(e)})):e}))),e.numericRefinements){var r={};Object.keys(e.numericRefinements).forEach((function(t){var n=e.numericRefinements[t]||{};r[t]={},Object.keys(n).forEach((function(e){var i=n[e].map((function(e){return Array.isArray(e)?e.map((function(e){return"string"==typeof e?parseFloat(e):e})):"string"==typeof e?parseFloat(e):e}));r[t][e]=i}))})),t.numericRefinements=r}return s({},e,t)},m.make=function(e){var t=new m(e);return(e.hierarchicalFacets||[]).forEach((function(e){if(e.rootPath){var r=t.getHierarchicalRefinement(e.name);r.length>0&&0!==r[0].indexOf(e.rootPath)&&(t=t.clearRefinements(e.name)),0===(r=t.getHierarchicalRefinement(e.name)).length&&(t=t.toggleHierarchicalFacetRefinement(e.name,e.rootPath))}})),t},m.validate=function(e,t){var r=t||{};return e.tagFilters&&r.tagRefinements&&r.tagRefinements.length>0?new Error("[Tags] Cannot switch from the managed tag API to the advanced API. It is probably an error, if it is really what you want, you should first clear the tags with clearTags method."):e.tagRefinements.length>0&&r.tagFilters?new Error("[Tags] Cannot switch from the advanced tag API to the managed API. It is probably an error, if it is not, you should first clear the tags with clearTags method."):e.numericFilters&&r.numericRefinements&&c(r.numericRefinements)?new Error("[Numeric filters] Can't switch from the advanced to the managed API. It is probably an error, if this is really what you want, you have to first clear the numeric filters."):c(e.numericRefinements)&&r.numericFilters?new Error("[Numeric filters] Can't switch from the managed API to the advanced. It is probably an error, if this is really what you want, you have to first clear the numeric filters."):null},m.prototype={constructor:m,clearRefinements:function(e){var t={numericRefinements:this._clearNumericRefinements(e),facetsRefinements:f.clearRefinement(this.facetsRefinements,e,"conjunctiveFacet"),facetsExcludes:f.clearRefinement(this.facetsExcludes,e,"exclude"),disjunctiveFacetsRefinements:f.clearRefinement(this.disjunctiveFacetsRefinements,e,"disjunctiveFacet"),hierarchicalFacetsRefinements:f.clearRefinement(this.hierarchicalFacetsRefinements,e,"hierarchicalFacet")};return t.numericRefinements===this.numericRefinements&&t.facetsRefinements===this.facetsRefinements&&t.facetsExcludes===this.facetsExcludes&&t.disjunctiveFacetsRefinements===this.disjunctiveFacetsRefinements&&t.hierarchicalFacetsRefinements===this.hierarchicalFacetsRefinements?this:this.setQueryParameters(t)},clearTags:function(){return void 0===this.tagFilters&&0===this.tagRefinements.length?this:this.setQueryParameters({tagFilters:void 0,tagRefinements:[]})},setIndex:function(e){return e===this.index?this:this.setQueryParameters({index:e})},setQuery:function(e){return e===this.query?this:this.setQueryParameters({query:e})},setPage:function(e){return e===this.page?this:this.setQueryParameters({page:e})},setFacets:function(e){return this.setQueryParameters({facets:e})},setDisjunctiveFacets:function(e){return this.setQueryParameters({disjunctiveFacets:e})},setHitsPerPage:function(e){return this.hitsPerPage===e?this:this.setQueryParameters({hitsPerPage:e})},setTypoTolerance:function(e){return this.typoTolerance===e?this:this.setQueryParameters({typoTolerance:e})},addNumericRefinement:function(e,t,r){var n=o(r);if(this.isNumericRefined(e,t,n))return this;var i=s({},this.numericRefinements);return i[e]=s({},i[e]),i[e][t]?(i[e][t]=i[e][t].slice(),i[e][t].push(n)):i[e][t]=[n],this.setQueryParameters({numericRefinements:i})},getConjunctiveRefinements:function(e){return this.isConjunctiveFacet(e)&&this.facetsRefinements[e]||[]},getDisjunctiveRefinements:function(e){return this.isDisjunctiveFacet(e)&&this.disjunctiveFacetsRefinements[e]||[]},getHierarchicalRefinement:function(e){return this.hierarchicalFacetsRefinements[e]||[]},getExcludeRefinements:function(e){return this.isConjunctiveFacet(e)&&this.facetsExcludes[e]||[]},removeNumericRefinement:function(e,t,r){var n=r;return void 0!==n?this.isNumericRefined(e,t,n)?this.setQueryParameters({numericRefinements:this._clearNumericRefinements((function(r,i){return i===e&&r.op===t&&l(r.val,o(n))}))}):this:void 0!==t?this.isNumericRefined(e,t)?this.setQueryParameters({numericRefinements:this._clearNumericRefinements((function(r,n){return n===e&&r.op===t}))}):this:this.isNumericRefined(e)?this.setQueryParameters({numericRefinements:this._clearNumericRefinements((function(t,r){return r===e}))}):this},getNumericRefinements:function(e){return this.numericRefinements[e]||{}},getNumericRefinement:function(e,t){return this.numericRefinements[e]&&this.numericRefinements[e][t]},_clearNumericRefinements:function(e){if(void 0===e)return c(this.numericRefinements)?{}:this.numericRefinements;if("string"==typeof e)return u(this.numericRefinements,[e]);if("function"==typeof e){var t=!1,r=this.numericRefinements,n=Object.keys(r).reduce((function(n,i){var a=r[i],s={};return a=a||{},Object.keys(a).forEach((function(r){var n=a[r]||[],c=[];n.forEach((function(t){e({val:t,op:r},i,"numeric")||c.push(t)})),c.length!==n.length&&(t=!0),s[r]=c})),n[i]=s,n}),{});return t?n:this.numericRefinements}},addFacet:function(e){return this.isConjunctiveFacet(e)?this:this.setQueryParameters({facets:this.facets.concat([e])})},addDisjunctiveFacet:function(e){return this.isDisjunctiveFacet(e)?this:this.setQueryParameters({disjunctiveFacets:this.disjunctiveFacets.concat([e])})},addHierarchicalFacet:function(e){if(this.isHierarchicalFacet(e.name))throw new Error("Cannot declare two hierarchical facets with the same name: `"+e.name+"`");return this.setQueryParameters({hierarchicalFacets:this.hierarchicalFacets.concat([e])})},addFacetRefinement:function(e,t){if(!this.isConjunctiveFacet(e))throw new Error(e+" is not defined in the facets attribute of the helper configuration");return f.isRefined(this.facetsRefinements,e,t)?this:this.setQueryParameters({facetsRefinements:f.addRefinement(this.facetsRefinements,e,t)})},addExcludeRefinement:function(e,t){if(!this.isConjunctiveFacet(e))throw new Error(e+" is not defined in the facets attribute of the helper configuration");return f.isRefined(this.facetsExcludes,e,t)?this:this.setQueryParameters({facetsExcludes:f.addRefinement(this.facetsExcludes,e,t)})},addDisjunctiveFacetRefinement:function(e,t){if(!this.isDisjunctiveFacet(e))throw new Error(e+" is not defined in the disjunctiveFacets attribute of the helper configuration");return f.isRefined(this.disjunctiveFacetsRefinements,e,t)?this:this.setQueryParameters({disjunctiveFacetsRefinements:f.addRefinement(this.disjunctiveFacetsRefinements,e,t)})},addTagRefinement:function(e){if(this.isTagRefined(e))return this;var t={tagRefinements:this.tagRefinements.concat(e)};return this.setQueryParameters(t)},removeFacet:function(e){return this.isConjunctiveFacet(e)?this.clearRefinements(e).setQueryParameters({facets:this.facets.filter((function(t){return t!==e}))}):this},removeDisjunctiveFacet:function(e){return this.isDisjunctiveFacet(e)?this.clearRefinements(e).setQueryParameters({disjunctiveFacets:this.disjunctiveFacets.filter((function(t){return t!==e}))}):this},removeHierarchicalFacet:function(e){return this.isHierarchicalFacet(e)?this.clearRefinements(e).setQueryParameters({hierarchicalFacets:this.hierarchicalFacets.filter((function(t){return t.name!==e}))}):this},removeFacetRefinement:function(e,t){if(!this.isConjunctiveFacet(e))throw new Error(e+" is not defined in the facets attribute of the helper configuration");return f.isRefined(this.facetsRefinements,e,t)?this.setQueryParameters({facetsRefinements:f.removeRefinement(this.facetsRefinements,e,t)}):this},removeExcludeRefinement:function(e,t){if(!this.isConjunctiveFacet(e))throw new Error(e+" is not defined in the facets attribute of the helper configuration");return f.isRefined(this.facetsExcludes,e,t)?this.setQueryParameters({facetsExcludes:f.removeRefinement(this.facetsExcludes,e,t)}):this},removeDisjunctiveFacetRefinement:function(e,t){if(!this.isDisjunctiveFacet(e))throw new Error(e+" is not defined in the disjunctiveFacets attribute of the helper configuration");return f.isRefined(this.disjunctiveFacetsRefinements,e,t)?this.setQueryParameters({disjunctiveFacetsRefinements:f.removeRefinement(this.disjunctiveFacetsRefinements,e,t)}):this},removeTagRefinement:function(e){if(!this.isTagRefined(e))return this;var t={tagRefinements:this.tagRefinements.filter((function(t){return t!==e}))};return this.setQueryParameters(t)},toggleRefinement:function(e,t){return this.toggleFacetRefinement(e,t)},toggleFacetRefinement:function(e,t){if(this.isHierarchicalFacet(e))return this.toggleHierarchicalFacetRefinement(e,t);if(this.isConjunctiveFacet(e))return this.toggleConjunctiveFacetRefinement(e,t);if(this.isDisjunctiveFacet(e))return this.toggleDisjunctiveFacetRefinement(e,t);throw new Error("Cannot refine the undeclared facet "+e+"; it should be added to the helper options facets, disjunctiveFacets or hierarchicalFacets")},toggleConjunctiveFacetRefinement:function(e,t){if(!this.isConjunctiveFacet(e))throw new Error(e+" is not defined in the facets attribute of the helper configuration");return this.setQueryParameters({facetsRefinements:f.toggleRefinement(this.facetsRefinements,e,t)})},toggleExcludeFacetRefinement:function(e,t){if(!this.isConjunctiveFacet(e))throw new Error(e+" is not defined in the facets attribute of the helper configuration");return this.setQueryParameters({facetsExcludes:f.toggleRefinement(this.facetsExcludes,e,t)})},toggleDisjunctiveFacetRefinement:function(e,t){if(!this.isDisjunctiveFacet(e))throw new Error(e+" is not defined in the disjunctiveFacets attribute of the helper configuration");return this.setQueryParameters({disjunctiveFacetsRefinements:f.toggleRefinement(this.disjunctiveFacetsRefinements,e,t)})},toggleHierarchicalFacetRefinement:function(e,t){if(!this.isHierarchicalFacet(e))throw new Error(e+" is not defined in the hierarchicalFacets attribute of the helper configuration");var r=this._getHierarchicalFacetSeparator(this.getHierarchicalFacetByName(e)),i={};return void 0!==this.hierarchicalFacetsRefinements[e]&&this.hierarchicalFacetsRefinements[e].length>0&&(this.hierarchicalFacetsRefinements[e][0]===t||0===this.hierarchicalFacetsRefinements[e][0].indexOf(t+r))?-1===t.indexOf(r)?i[e]=[]:i[e]=[t.slice(0,t.lastIndexOf(r))]:i[e]=[t],this.setQueryParameters({hierarchicalFacetsRefinements:n({},i,this.hierarchicalFacetsRefinements)})},addHierarchicalFacetRefinement:function(e,t){if(this.isHierarchicalFacetRefined(e))throw new Error(e+" is already refined.");if(!this.isHierarchicalFacet(e))throw new Error(e+" is not defined in the hierarchicalFacets attribute of the helper configuration.");var r={};return r[e]=[t],this.setQueryParameters({hierarchicalFacetsRefinements:n({},r,this.hierarchicalFacetsRefinements)})},removeHierarchicalFacetRefinement:function(e){if(!this.isHierarchicalFacetRefined(e))return this;var t={};return t[e]=[],this.setQueryParameters({hierarchicalFacetsRefinements:n({},t,this.hierarchicalFacetsRefinements)})},toggleTagRefinement:function(e){return this.isTagRefined(e)?this.removeTagRefinement(e):this.addTagRefinement(e)},isDisjunctiveFacet:function(e){return this.disjunctiveFacets.indexOf(e)>-1},isHierarchicalFacet:function(e){return void 0!==this.getHierarchicalFacetByName(e)},isConjunctiveFacet:function(e){return this.facets.indexOf(e)>-1},isFacetRefined:function(e,t){return!!this.isConjunctiveFacet(e)&&f.isRefined(this.facetsRefinements,e,t)},isExcludeRefined:function(e,t){return!!this.isConjunctiveFacet(e)&&f.isRefined(this.facetsExcludes,e,t)},isDisjunctiveFacetRefined:function(e,t){return!!this.isDisjunctiveFacet(e)&&f.isRefined(this.disjunctiveFacetsRefinements,e,t)},isHierarchicalFacetRefined:function(e,t){if(!this.isHierarchicalFacet(e))return!1;var r=this.getHierarchicalRefinement(e);return t?-1!==r.indexOf(t):r.length>0},isNumericRefined:function(e,t,r){if(void 0===r&&void 0===t)return Boolean(this.numericRefinements[e]);var n=this.numericRefinements[e]&&void 0!==this.numericRefinements[e][t];if(void 0===r||!n)return n;var a,s,c=o(r),u=void 0!==(a=this.numericRefinements[e][t],s=c,i(a,(function(e){return l(e,s)})));return n&&u},isTagRefined:function(e){return-1!==this.tagRefinements.indexOf(e)},getRefinedDisjunctiveFacets:function(){var e=this,t=a(Object.keys(this.numericRefinements).filter((function(t){return Object.keys(e.numericRefinements[t]).length>0})),this.disjunctiveFacets);return Object.keys(this.disjunctiveFacetsRefinements).filter((function(t){return e.disjunctiveFacetsRefinements[t].length>0})).concat(t).concat(this.getRefinedHierarchicalFacets()).sort()},getRefinedHierarchicalFacets:function(){var e=this;return a(this.hierarchicalFacets.map((function(e){return e.name})),Object.keys(this.hierarchicalFacetsRefinements).filter((function(t){return e.hierarchicalFacetsRefinements[t].length>0}))).sort()},getUnrefinedDisjunctiveFacets:function(){var e=this.getRefinedDisjunctiveFacets();return this.disjunctiveFacets.filter((function(t){return-1===e.indexOf(t)}))},managedParameters:["index","facets","disjunctiveFacets","facetsRefinements","hierarchicalFacets","facetsExcludes","disjunctiveFacetsRefinements","numericRefinements","tagRefinements","hierarchicalFacetsRefinements"],getQueryParams:function(){var e=this.managedParameters,t={},r=this;return Object.keys(this).forEach((function(n){var i=r[n];-1===e.indexOf(n)&&void 0!==i&&(t[n]=i)})),t},setQueryParameter:function(e,t){if(this[e]===t)return this;var r={};return r[e]=t,this.setQueryParameters(r)},setQueryParameters:function(e){if(!e)return this;var t=m.validate(this,e);if(t)throw t;var r=this,n=m._parseNumbers(e),i=Object.keys(this).reduce((function(e,t){return e[t]=r[t],e}),{}),a=Object.keys(n).reduce((function(e,t){var r=void 0!==e[t],i=void 0!==n[t];return r&&!i?u(e,[t]):(i&&(e[t]=n[t]),e)}),i);return new this.constructor(a)},resetPage:function(){return void 0===this.page?this:this.setPage(0)},_getHierarchicalFacetSortBy:function(e){return e.sortBy||["isRefined:desc","name:asc"]},_getHierarchicalFacetSeparator:function(e){return e.separator||" > "},_getHierarchicalRootPath:function(e){return e.rootPath||null},_getHierarchicalShowParentLevel:function(e){return"boolean"!=typeof e.showParentLevel||e.showParentLevel},getHierarchicalFacetByName:function(e){return i(this.hierarchicalFacets,(function(t){return t.name===e}))},getHierarchicalFacetBreadcrumb:function(e){if(!this.isHierarchicalFacet(e))return[];var t=this.getHierarchicalRefinement(e)[0];if(!t)return[];var r=this._getHierarchicalFacetSeparator(this.getHierarchicalFacetByName(e));return t.split(r).map((function(e){return e.trim()}))},toString:function(){return JSON.stringify(this,null,2)}},e.exports=m},10210:(e,t,r)=>{"use strict";e.exports=function(e){return function(t,r){var n=e.hierarchicalFacets[r],o=e.hierarchicalFacetsRefinements[n.name]&&e.hierarchicalFacetsRefinements[n.name][0]||"",h=e._getHierarchicalFacetSeparator(n),f=e._getHierarchicalRootPath(n),l=e._getHierarchicalShowParentLevel(n),m=a(e._getHierarchicalFacetSortBy(n)),d=t.every((function(e){return e.exhaustive})),p=function(e,t,r,n,a){return function(o,h,f){var l=o;if(f>0){var m=0;for(l=o;m{"use strict";var n=r(74587),i=r(52344),a=r(94039),s=r(7888),c=r(69725),u=r(82293),o=r(60185),h=r(42148),f=a.escapeFacetValue,l=a.unescapeFacetValue,m=r(10210);function d(e){var t={};return e.forEach((function(e,r){t[e]=r})),t}function p(e,t,r){t&&t[r]&&(e.stats=t[r])}function v(e,t,r){var a=t[0];this._rawResults=t;var u=this;Object.keys(a).forEach((function(e){u[e]=a[e]})),Object.keys(r||{}).forEach((function(e){u[e]=r[e]})),this.processingTimeMS=t.reduce((function(e,t){return void 0===t.processingTimeMS?e:e+t.processingTimeMS}),0),this.disjunctiveFacets=[],this.hierarchicalFacets=e.hierarchicalFacets.map((function(){return[]})),this.facets=[];var h=e.getRefinedDisjunctiveFacets(),f=d(e.facets),v=d(e.disjunctiveFacets),g=1,y=a.facets||{};Object.keys(y).forEach((function(t){var r,n,i=y[t],o=(r=e.hierarchicalFacets,n=t,s(r,(function(e){return(e.attributes||[]).indexOf(n)>-1})));if(o){var h=o.attributes.indexOf(t),l=c(e.hierarchicalFacets,(function(e){return e.name===o.name}));u.hierarchicalFacets[l][h]={attribute:t,data:i,exhaustive:a.exhaustiveFacetsCount}}else{var m,d=-1!==e.disjunctiveFacets.indexOf(t),g=-1!==e.facets.indexOf(t);d&&(m=v[t],u.disjunctiveFacets[m]={name:t,data:i,exhaustive:a.exhaustiveFacetsCount},p(u.disjunctiveFacets[m],a.facets_stats,t)),g&&(m=f[t],u.facets[m]={name:t,data:i,exhaustive:a.exhaustiveFacetsCount},p(u.facets[m],a.facets_stats,t))}})),this.hierarchicalFacets=n(this.hierarchicalFacets),h.forEach((function(r){var n=t[g],s=n&&n.facets?n.facets:{},h=e.getHierarchicalFacetByName(r);Object.keys(s).forEach((function(t){var r,f=s[t];if(h){r=c(e.hierarchicalFacets,(function(e){return e.name===h.name}));var m=c(u.hierarchicalFacets[r],(function(e){return e.attribute===t}));if(-1===m)return;u.hierarchicalFacets[r][m].data=o({},u.hierarchicalFacets[r][m].data,f)}else{r=v[t];var d=a.facets&&a.facets[t]||{};u.disjunctiveFacets[r]={name:t,data:i({},f,d),exhaustive:n.exhaustiveFacetsCount},p(u.disjunctiveFacets[r],n.facets_stats,t),e.disjunctiveFacetsRefinements[t]&&e.disjunctiveFacetsRefinements[t].forEach((function(n){!u.disjunctiveFacets[r].data[n]&&e.disjunctiveFacetsRefinements[t].indexOf(l(n))>-1&&(u.disjunctiveFacets[r].data[n]=0)}))}})),g++})),e.getRefinedHierarchicalFacets().forEach((function(r){var n=e.getHierarchicalFacetByName(r),a=e._getHierarchicalFacetSeparator(n),s=e.getHierarchicalRefinement(r);0===s.length||s[0].split(a).length<2||t.slice(g).forEach((function(t){var r=t&&t.facets?t.facets:{};Object.keys(r).forEach((function(t){var o=r[t],h=c(e.hierarchicalFacets,(function(e){return e.name===n.name})),f=c(u.hierarchicalFacets[h],(function(e){return e.attribute===t}));if(-1!==f){var l={};if(s.length>0){var m=s[0].split(a)[0];l[m]=u.hierarchicalFacets[h][f].data[m]}u.hierarchicalFacets[h][f].data=i(l,o,u.hierarchicalFacets[h][f].data)}})),g++}))})),Object.keys(e.facetsExcludes).forEach((function(t){var r=e.facetsExcludes[t],n=f[t];u.facets[n]={name:t,data:y[t],exhaustive:a.exhaustiveFacetsCount},r.forEach((function(e){u.facets[n]=u.facets[n]||{name:t},u.facets[n].data=u.facets[n].data||{},u.facets[n].data[e]=0}))})),this.hierarchicalFacets=this.hierarchicalFacets.map(m(e)),this.facets=n(this.facets),this.disjunctiveFacets=n(this.disjunctiveFacets),this._state=e}function g(e,t){function r(e){return e.name===t}if(e._state.isConjunctiveFacet(t)){var n=s(e.facets,r);return n?Object.keys(n.data).map((function(r){var i=f(r);return{name:r,escapedValue:i,count:n.data[r],isRefined:e._state.isFacetRefined(t,i),isExcluded:e._state.isExcludeRefined(t,r)}})):[]}if(e._state.isDisjunctiveFacet(t)){var i=s(e.disjunctiveFacets,r);return i?Object.keys(i.data).map((function(r){var n=f(r);return{name:r,escapedValue:n,count:i.data[r],isRefined:e._state.isDisjunctiveFacetRefined(t,n)}})):[]}if(e._state.isHierarchicalFacet(t)){var a=s(e.hierarchicalFacets,r);if(!a)return a;var c=e._state.getHierarchicalFacetByName(t),u=e._state._getHierarchicalFacetSeparator(c),o=l(e._state.getHierarchicalRefinement(t)[0]||"");0===o.indexOf(c.rootPath)&&(o=o.replace(c.rootPath+u,""));var h=o.split(u);return h.unshift(t),y(a,h,0),a}}function y(e,t,r){e.isRefined=e.name===t[r],e.data&&e.data.forEach((function(e){y(e,t,r+1)}))}function R(e,t,r,n){if(n=n||0,Array.isArray(t))return e(t,r[n]);if(!t.data||0===t.data.length)return t;var a=t.data.map((function(t){return R(e,t,r,n+1)})),s=e(a,r[n]);return i({data:s},t)}function F(e,t){var r=s(e,(function(e){return e.name===t}));return r&&r.stats}function b(e,t,r,n,i){var a=s(i,(function(e){return e.name===r})),c=a&&a.data&&a.data[n]?a.data[n]:0,u=a&&a.exhaustive||!1;return{type:t,attributeName:r,name:n,count:c,exhaustive:u}}v.prototype.getFacetByName=function(e){function t(t){return t.name===e}return s(this.facets,t)||s(this.disjunctiveFacets,t)||s(this.hierarchicalFacets,t)},v.DEFAULT_SORT=["isRefined:desc","count:desc","name:asc"],v.prototype.getFacetValues=function(e,t){var r=g(this,e);if(r){var n,a=i({},t,{sortBy:v.DEFAULT_SORT,facetOrdering:!(t&&t.sortBy)}),s=this;if(Array.isArray(r))n=[e];else n=s._state.getHierarchicalFacetByName(r.name).attributes;return R((function(e,t){if(a.facetOrdering){var r=function(e,t){return e.renderingContent&&e.renderingContent.facetOrdering&&e.renderingContent.facetOrdering.values&&e.renderingContent.facetOrdering.values[t]}(s,t);if(r)return function(e,t){var r=[],n=[],i=(t.order||[]).reduce((function(e,t,r){return e[t]=r,e}),{});e.forEach((function(e){var t=e.path||e.name;void 0!==i[t]?r[i[t]]=e:n.push(e)})),r=r.filter((function(e){return e}));var a,s=t.sortRemainingBy;return"hidden"===s?r:(a="alpha"===s?[["path","name"],["asc","asc"]]:[["count"],["desc"]],r.concat(h(n,a[0],a[1])))}(e,r)}if(Array.isArray(a.sortBy)){var n=u(a.sortBy,v.DEFAULT_SORT);return h(e,n[0],n[1])}if("function"==typeof a.sortBy)return function(e,t){return t.sort(e)}(a.sortBy,e);throw new Error("options.sortBy is optional but if defined it must be either an array of string (predicates) or a sorting function")}),r,n)}},v.prototype.getFacetStats=function(e){return this._state.isConjunctiveFacet(e)?F(this.facets,e):this._state.isDisjunctiveFacet(e)?F(this.disjunctiveFacets,e):void 0},v.prototype.getRefinements=function(){var e=this._state,t=this,r=[];return Object.keys(e.facetsRefinements).forEach((function(n){e.facetsRefinements[n].forEach((function(i){r.push(b(e,"facet",n,i,t.facets))}))})),Object.keys(e.facetsExcludes).forEach((function(n){e.facetsExcludes[n].forEach((function(i){r.push(b(e,"exclude",n,i,t.facets))}))})),Object.keys(e.disjunctiveFacetsRefinements).forEach((function(n){e.disjunctiveFacetsRefinements[n].forEach((function(i){r.push(b(e,"disjunctive",n,i,t.disjunctiveFacets))}))})),Object.keys(e.hierarchicalFacetsRefinements).forEach((function(n){e.hierarchicalFacetsRefinements[n].forEach((function(i){r.push(function(e,t,r,n){var i=e.getHierarchicalFacetByName(t),a=e._getHierarchicalFacetSeparator(i),c=r.split(a),u=s(n,(function(e){return e.name===t})),o=c.reduce((function(e,t){var r=e&&s(e.data,(function(e){return e.name===t}));return void 0!==r?r:e}),u),h=o&&o.count||0,f=o&&o.exhaustive||!1,l=o&&o.path||"";return{type:"hierarchical",attributeName:t,name:l,count:h,exhaustive:f}}(e,n,i,t.hierarchicalFacets))}))})),Object.keys(e.numericRefinements).forEach((function(t){var n=e.numericRefinements[t];Object.keys(n).forEach((function(e){n[e].forEach((function(n){r.push({type:"numeric",attributeName:t,name:n,numericValue:n,operator:e})}))}))})),e.tagRefinements.forEach((function(e){r.push({type:"tag",attributeName:"_tags",name:e})})),r},e.exports=v},49374:(e,t,r)=>{"use strict";var n=r(17331),i=r(68078),a=r(94039).escapeFacetValue,s=r(14853),c=r(60185),u=r(90116),o=r(49803),h=r(96394),f=r(17775),l=r(23076),m=r(24336);function d(e,t,r){"function"==typeof e.addAlgoliaAgent&&e.addAlgoliaAgent("JS Helper ("+m+")"),this.setClient(e);var n=r||{};n.index=t,this.state=f.make(n),this.lastResults=null,this._queryId=0,this._lastQueryIdReceived=-1,this.derivedHelpers=[],this._currentNbQueries=0}function p(e){if(e<0)throw new Error("Page requested below 0.");return this._change({state:this.state.setPage(e),isPageReset:!1}),this}function v(){return this.state.page}s(d,n),d.prototype.search=function(){return this._search({onlyWithDerivedHelpers:!1}),this},d.prototype.searchOnlyWithDerivedHelpers=function(){return this._search({onlyWithDerivedHelpers:!0}),this},d.prototype.getQuery=function(){var e=this.state;return h._getHitsSearchParams(e)},d.prototype.searchOnce=function(e,t){var r=e?this.state.setQueryParameters(e):this.state,n=h._getQueries(r.index,r),i=this;if(this._currentNbQueries++,this.emit("searchOnce",{state:r}),!t)return this.client.search(n).then((function(e){return i._currentNbQueries--,0===i._currentNbQueries&&i.emit("searchQueueEmpty"),{content:new l(r,e.results),state:r,_originalResponse:e}}),(function(e){throw i._currentNbQueries--,0===i._currentNbQueries&&i.emit("searchQueueEmpty"),e}));this.client.search(n).then((function(e){i._currentNbQueries--,0===i._currentNbQueries&&i.emit("searchQueueEmpty"),t(null,new l(r,e.results),r)})).catch((function(e){i._currentNbQueries--,0===i._currentNbQueries&&i.emit("searchQueueEmpty"),t(e,null,r)}))},d.prototype.findAnswers=function(e){console.warn("[algoliasearch-helper] answers is no longer supported");var t=this.state,r=this.derivedHelpers[0];if(!r)return Promise.resolve([]);var n=r.getModifiedState(t),i=c({attributesForPrediction:e.attributesForPrediction,nbHits:e.nbHits},{params:o(h._getHitsSearchParams(n),["attributesToSnippet","hitsPerPage","restrictSearchableAttributes","snippetEllipsisText"])}),a="search for answers was called, but this client does not have a function client.initIndex(index).findAnswers";if("function"!=typeof this.client.initIndex)throw new Error(a);var s=this.client.initIndex(n.index);if("function"!=typeof s.findAnswers)throw new Error(a);return s.findAnswers(n.query,e.queryLanguages,i)},d.prototype.searchForFacetValues=function(e,t,r,n){var i="function"==typeof this.client.searchForFacetValues,s="function"==typeof this.client.initIndex;if(!i&&!s&&"function"!=typeof this.client.search)throw new Error("search for facet values (searchable) was called, but this client does not have a function client.searchForFacetValues or client.initIndex(index).searchForFacetValues");var c=this.state.setQueryParameters(n||{}),u=c.isDisjunctiveFacet(e),o=h.getSearchForFacetQuery(e,t,r,c);this._currentNbQueries++;var f,l=this;return i?f=this.client.searchForFacetValues([{indexName:c.index,params:o}]):s?f=this.client.initIndex(c.index).searchForFacetValues(o):(delete o.facetName,f=this.client.search([{type:"facet",facet:e,indexName:c.index,params:o}]).then((function(e){return e.results[0]}))),this.emit("searchForFacetValues",{state:c,facet:e,query:t}),f.then((function(t){return l._currentNbQueries--,0===l._currentNbQueries&&l.emit("searchQueueEmpty"),(t=Array.isArray(t)?t[0]:t).facetHits.forEach((function(t){t.escapedValue=a(t.value),t.isRefined=u?c.isDisjunctiveFacetRefined(e,t.escapedValue):c.isFacetRefined(e,t.escapedValue)})),t}),(function(e){throw l._currentNbQueries--,0===l._currentNbQueries&&l.emit("searchQueueEmpty"),e}))},d.prototype.setQuery=function(e){return this._change({state:this.state.resetPage().setQuery(e),isPageReset:!0}),this},d.prototype.clearRefinements=function(e){return this._change({state:this.state.resetPage().clearRefinements(e),isPageReset:!0}),this},d.prototype.clearTags=function(){return this._change({state:this.state.resetPage().clearTags(),isPageReset:!0}),this},d.prototype.addDisjunctiveFacetRefinement=function(e,t){return this._change({state:this.state.resetPage().addDisjunctiveFacetRefinement(e,t),isPageReset:!0}),this},d.prototype.addDisjunctiveRefine=function(){return this.addDisjunctiveFacetRefinement.apply(this,arguments)},d.prototype.addHierarchicalFacetRefinement=function(e,t){return this._change({state:this.state.resetPage().addHierarchicalFacetRefinement(e,t),isPageReset:!0}),this},d.prototype.addNumericRefinement=function(e,t,r){return this._change({state:this.state.resetPage().addNumericRefinement(e,t,r),isPageReset:!0}),this},d.prototype.addFacetRefinement=function(e,t){return this._change({state:this.state.resetPage().addFacetRefinement(e,t),isPageReset:!0}),this},d.prototype.addRefine=function(){return this.addFacetRefinement.apply(this,arguments)},d.prototype.addFacetExclusion=function(e,t){return this._change({state:this.state.resetPage().addExcludeRefinement(e,t),isPageReset:!0}),this},d.prototype.addExclude=function(){return this.addFacetExclusion.apply(this,arguments)},d.prototype.addTag=function(e){return this._change({state:this.state.resetPage().addTagRefinement(e),isPageReset:!0}),this},d.prototype.removeNumericRefinement=function(e,t,r){return this._change({state:this.state.resetPage().removeNumericRefinement(e,t,r),isPageReset:!0}),this},d.prototype.removeDisjunctiveFacetRefinement=function(e,t){return this._change({state:this.state.resetPage().removeDisjunctiveFacetRefinement(e,t),isPageReset:!0}),this},d.prototype.removeDisjunctiveRefine=function(){return this.removeDisjunctiveFacetRefinement.apply(this,arguments)},d.prototype.removeHierarchicalFacetRefinement=function(e){return this._change({state:this.state.resetPage().removeHierarchicalFacetRefinement(e),isPageReset:!0}),this},d.prototype.removeFacetRefinement=function(e,t){return this._change({state:this.state.resetPage().removeFacetRefinement(e,t),isPageReset:!0}),this},d.prototype.removeRefine=function(){return this.removeFacetRefinement.apply(this,arguments)},d.prototype.removeFacetExclusion=function(e,t){return this._change({state:this.state.resetPage().removeExcludeRefinement(e,t),isPageReset:!0}),this},d.prototype.removeExclude=function(){return this.removeFacetExclusion.apply(this,arguments)},d.prototype.removeTag=function(e){return this._change({state:this.state.resetPage().removeTagRefinement(e),isPageReset:!0}),this},d.prototype.toggleFacetExclusion=function(e,t){return this._change({state:this.state.resetPage().toggleExcludeFacetRefinement(e,t),isPageReset:!0}),this},d.prototype.toggleExclude=function(){return this.toggleFacetExclusion.apply(this,arguments)},d.prototype.toggleRefinement=function(e,t){return this.toggleFacetRefinement(e,t)},d.prototype.toggleFacetRefinement=function(e,t){return this._change({state:this.state.resetPage().toggleFacetRefinement(e,t),isPageReset:!0}),this},d.prototype.toggleRefine=function(){return this.toggleFacetRefinement.apply(this,arguments)},d.prototype.toggleTag=function(e){return this._change({state:this.state.resetPage().toggleTagRefinement(e),isPageReset:!0}),this},d.prototype.nextPage=function(){var e=this.state.page||0;return this.setPage(e+1)},d.prototype.previousPage=function(){var e=this.state.page||0;return this.setPage(e-1)},d.prototype.setCurrentPage=p,d.prototype.setPage=p,d.prototype.setIndex=function(e){return this._change({state:this.state.resetPage().setIndex(e),isPageReset:!0}),this},d.prototype.setQueryParameter=function(e,t){return this._change({state:this.state.resetPage().setQueryParameter(e,t),isPageReset:!0}),this},d.prototype.setState=function(e){return this._change({state:f.make(e),isPageReset:!1}),this},d.prototype.overrideStateWithoutTriggeringChangeEvent=function(e){return this.state=new f(e),this},d.prototype.hasRefinements=function(e){return!!u(this.state.getNumericRefinements(e))||(this.state.isConjunctiveFacet(e)?this.state.isFacetRefined(e):this.state.isDisjunctiveFacet(e)?this.state.isDisjunctiveFacetRefined(e):!!this.state.isHierarchicalFacet(e)&&this.state.isHierarchicalFacetRefined(e))},d.prototype.isExcluded=function(e,t){return this.state.isExcludeRefined(e,t)},d.prototype.isDisjunctiveRefined=function(e,t){return this.state.isDisjunctiveFacetRefined(e,t)},d.prototype.hasTag=function(e){return this.state.isTagRefined(e)},d.prototype.isTagRefined=function(){return this.hasTagRefinements.apply(this,arguments)},d.prototype.getIndex=function(){return this.state.index},d.prototype.getCurrentPage=v,d.prototype.getPage=v,d.prototype.getTags=function(){return this.state.tagRefinements},d.prototype.getRefinements=function(e){var t=[];if(this.state.isConjunctiveFacet(e))this.state.getConjunctiveRefinements(e).forEach((function(e){t.push({value:e,type:"conjunctive"})})),this.state.getExcludeRefinements(e).forEach((function(e){t.push({value:e,type:"exclude"})}));else if(this.state.isDisjunctiveFacet(e)){this.state.getDisjunctiveRefinements(e).forEach((function(e){t.push({value:e,type:"disjunctive"})}))}var r=this.state.getNumericRefinements(e);return Object.keys(r).forEach((function(e){var n=r[e];t.push({value:n,operator:e,type:"numeric"})})),t},d.prototype.getNumericRefinement=function(e,t){return this.state.getNumericRefinement(e,t)},d.prototype.getHierarchicalFacetBreadcrumb=function(e){return this.state.getHierarchicalFacetBreadcrumb(e)},d.prototype._search=function(e){var t=this.state,r=[],n=[];e.onlyWithDerivedHelpers||(n=h._getQueries(t.index,t),r.push({state:t,queriesCount:n.length,helper:this}),this.emit("search",{state:t,results:this.lastResults}));var i=this.derivedHelpers.map((function(e){var n=e.getModifiedState(t),i=n.index?h._getQueries(n.index,n):[];return r.push({state:n,queriesCount:i.length,helper:e}),e.emit("search",{state:n,results:e.lastResults}),i})),a=Array.prototype.concat.apply(n,i),s=this._queryId++;if(this._currentNbQueries++,!a.length)return Promise.resolve({results:[]}).then(this._dispatchAlgoliaResponse.bind(this,r,s));try{this.client.search(a).then(this._dispatchAlgoliaResponse.bind(this,r,s)).catch(this._dispatchAlgoliaError.bind(this,s))}catch(c){this.emit("error",{error:c})}},d.prototype._dispatchAlgoliaResponse=function(e,t,r){if(!(t0},d.prototype._change=function(e){var t=e.state,r=e.isPageReset;t!==this.state&&(this.state=t,this.emit("change",{state:this.state,results:this.lastResults,isPageReset:r}))},d.prototype.clearCache=function(){return this.client.clearCache&&this.client.clearCache(),this},d.prototype.setClient=function(e){return this.client===e||("function"==typeof e.addAlgoliaAgent&&e.addAlgoliaAgent("JS Helper ("+m+")"),this.client=e),this},d.prototype.getClient=function(){return this.client},d.prototype.derive=function(e){var t=new i(this,e);return this.derivedHelpers.push(t),t},d.prototype.detachDerivedHelper=function(e){var t=this.derivedHelpers.indexOf(e);if(-1===t)throw new Error("Derived helper already detached");this.derivedHelpers.splice(t,1)},d.prototype.hasPendingRequests=function(){return this._currentNbQueries>0},e.exports=d},74587:e=>{"use strict";e.exports=function(e){return Array.isArray(e)?e.filter(Boolean):[]}},52344:e=>{"use strict";e.exports=function(){return Array.prototype.slice.call(arguments).reduceRight((function(e,t){return Object.keys(Object(t)).forEach((function(r){void 0!==t[r]&&(void 0!==e[r]&&delete e[r],e[r]=t[r])})),e}),{})}},94039:e=>{"use strict";e.exports={escapeFacetValue:function(e){return"string"!=typeof e?e:String(e).replace(/^-/,"\\-")},unescapeFacetValue:function(e){return"string"!=typeof e?e:e.replace(/^\\-/,"-")}}},7888:e=>{"use strict";e.exports=function(e,t){if(Array.isArray(e))for(var r=0;r{"use strict";e.exports=function(e,t){if(!Array.isArray(e))return-1;for(var r=0;r{"use strict";var n=r(7888);e.exports=function(e,t){var r=(t||[]).map((function(e){return e.split(":")}));return e.reduce((function(e,t){var i=t.split(":"),a=n(r,(function(e){return e[0]===i[0]}));return i.length>1||!a?(e[0].push(i[0]),e[1].push(i[1]),e):(e[0].push(a[0]),e[1].push(a[1]),e)}),[[],[]])}},14853:e=>{"use strict";e.exports=function(e,t){e.prototype=Object.create(t.prototype,{constructor:{value:e,enumerable:!1,writable:!0,configurable:!0}})}},22686:e=>{"use strict";e.exports=function(e,t){return e.filter((function(r,n){return t.indexOf(r)>-1&&e.indexOf(r)===n}))}},60185:e=>{"use strict";function t(e){return"function"==typeof e||Array.isArray(e)||"[object Object]"===Object.prototype.toString.call(e)}function r(e,n){if(e===n)return e;for(var i in n)if(Object.prototype.hasOwnProperty.call(n,i)&&"__proto__"!==i&&"constructor"!==i){var a=n[i],s=e[i];void 0!==s&&void 0===a||(t(s)&&t(a)?e[i]=r(s,a):e[i]="object"==typeof(c=a)&&null!==c?r(Array.isArray(c)?[]:{},c):c)}var c;return e}e.exports=function(e){t(e)||(e={});for(var n=1,i=arguments.length;n{"use strict";e.exports=function(e){return e&&Object.keys(e).length>0}},49803:e=>{"use strict";e.exports=function(e,t){if(null===e)return{};var r,n,i={},a=Object.keys(e);for(n=0;n=0||(i[r]=e[r]);return i}},42148:e=>{"use strict";function t(e,t){if(e!==t){var r=void 0!==e,n=null===e,i=void 0!==t,a=null===t;if(!a&&e>t||n&&i||!r)return 1;if(!n&&e=n.length?a:"desc"===n[i]?-a:a}return e.index-r.index})),i.map((function(e){return e.value}))}},28023:e=>{"use strict";e.exports=function e(t){if("number"==typeof t)return t;if("string"==typeof t)return parseFloat(t);if(Array.isArray(t))return t.map(e);throw new Error("The value should be a number, a parsable string or an array of those.")}},96394:(e,t,r)=>{"use strict";var n=r(60185);function i(e){return Object.keys(e).sort().reduce((function(t,r){return t[r]=e[r],t}),{})}var a={_getQueries:function(e,t){var r=[];return r.push({indexName:e,params:a._getHitsSearchParams(t)}),t.getRefinedDisjunctiveFacets().forEach((function(n){r.push({indexName:e,params:a._getDisjunctiveFacetSearchParams(t,n)})})),t.getRefinedHierarchicalFacets().forEach((function(n){var i=t.getHierarchicalFacetByName(n),s=t.getHierarchicalRefinement(n),c=t._getHierarchicalFacetSeparator(i);if(s.length>0&&s[0].split(c).length>1){var u=s[0].split(c).slice(0,-1).reduce((function(e,t,r){return e.concat({attribute:i.attributes[r],value:0===r?t:[e[e.length-1].value,t].join(c)})}),[]);u.forEach((function(n,s){var c=a._getDisjunctiveFacetSearchParams(t,n.attribute,0===s);function o(e){return i.attributes.some((function(t){return t===e.split(":")[0]}))}var h=(c.facetFilters||[]).reduce((function(e,t){if(Array.isArray(t)){var r=t.filter((function(e){return!o(e)}));r.length>0&&e.push(r)}return"string"!=typeof t||o(t)||e.push(t),e}),[]),f=u[s-1];c.facetFilters=s>0?h.concat(f.attribute+":"+f.value):h.length>0?h:void 0,r.push({indexName:e,params:c})}))}})),r},_getHitsSearchParams:function(e){var t=e.facets.concat(e.disjunctiveFacets).concat(a._getHitsHierarchicalFacetsAttributes(e)).sort(),r=a._getFacetFilters(e),s=a._getNumericFilters(e),c=a._getTagFilters(e),u={facets:t.indexOf("*")>-1?["*"]:t,tagFilters:c};return r.length>0&&(u.facetFilters=r),s.length>0&&(u.numericFilters=s),i(n({},e.getQueryParams(),u))},_getDisjunctiveFacetSearchParams:function(e,t,r){var s=a._getFacetFilters(e,t,r),c=a._getNumericFilters(e,t),u=a._getTagFilters(e),o={hitsPerPage:0,page:0,analytics:!1,clickAnalytics:!1};u.length>0&&(o.tagFilters=u);var h=e.getHierarchicalFacetByName(t);return o.facets=h?a._getDisjunctiveHierarchicalFacetAttribute(e,h,r):t,c.length>0&&(o.numericFilters=c),s.length>0&&(o.facetFilters=s),i(n({},e.getQueryParams(),o))},_getNumericFilters:function(e,t){if(e.numericFilters)return e.numericFilters;var r=[];return Object.keys(e.numericRefinements).forEach((function(n){var i=e.numericRefinements[n]||{};Object.keys(i).forEach((function(e){var a=i[e]||[];t!==n&&a.forEach((function(t){if(Array.isArray(t)){var i=t.map((function(t){return n+e+t}));r.push(i)}else r.push(n+e+t)}))}))})),r},_getTagFilters:function(e){return e.tagFilters?e.tagFilters:e.tagRefinements.join(",")},_getFacetFilters:function(e,t,r){var n=[],i=e.facetsRefinements||{};Object.keys(i).sort().forEach((function(e){(i[e]||[]).sort().forEach((function(t){n.push(e+":"+t)}))}));var a=e.facetsExcludes||{};Object.keys(a).sort().forEach((function(e){(a[e]||[]).sort().forEach((function(t){n.push(e+":-"+t)}))}));var s=e.disjunctiveFacetsRefinements||{};Object.keys(s).sort().forEach((function(e){var r=s[e]||[];if(e!==t&&r&&0!==r.length){var i=[];r.sort().forEach((function(t){i.push(e+":"+t)})),n.push(i)}}));var c=e.hierarchicalFacetsRefinements||{};return Object.keys(c).sort().forEach((function(i){var a=(c[i]||[])[0];if(void 0!==a){var s,u,o=e.getHierarchicalFacetByName(i),h=e._getHierarchicalFacetSeparator(o),f=e._getHierarchicalRootPath(o);if(t===i){if(-1===a.indexOf(h)||!f&&!0===r||f&&f.split(h).length===a.split(h).length)return;f?(u=f.split(h).length-1,a=f):(u=a.split(h).length-2,a=a.slice(0,a.lastIndexOf(h))),s=o.attributes[u]}else u=a.split(h).length-1,s=o.attributes[u];s&&n.push([s+":"+a])}})),n},_getHitsHierarchicalFacetsAttributes:function(e){return e.hierarchicalFacets.reduce((function(t,r){var n=e.getHierarchicalRefinement(r.name)[0];if(!n)return t.push(r.attributes[0]),t;var i=e._getHierarchicalFacetSeparator(r),a=n.split(i).length,s=r.attributes.slice(0,a+1);return t.concat(s)}),[])},_getDisjunctiveHierarchicalFacetAttribute:function(e,t,r){var n=e._getHierarchicalFacetSeparator(t);if(!0===r){var i=e._getHierarchicalRootPath(t),a=0;return i&&(a=i.split(n).length),[t.attributes[a]]}var s=(e.getHierarchicalRefinement(t.name)[0]||"").split(n).length-1;return t.attributes.slice(0,s+1)},getSearchForFacetQuery:function(e,t,r,s){var c=s.isDisjunctiveFacet(e)?s.clearRefinements(e):s,u={facetQuery:t,facetName:e};return"number"==typeof r&&(u.maxFacetHits=r),i(n({},a._getHitsSearchParams(c),u))}};e.exports=a},46801:e=>{"use strict";e.exports=function(e){return null!==e&&/^[a-zA-Z0-9_-]{1,64}$/.test(e)}},24336:e=>{"use strict";e.exports="3.15.0"},70290:function(e){e.exports=function(){"use strict";function e(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function t(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function r(r){for(var n=1;n=0||(i[r]=e[r]);return i}(e,t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(i[r]=e[r])}return i}function i(e,t){return function(e){if(Array.isArray(e))return e}(e)||function(e,t){if(Symbol.iterator in Object(e)||"[object Arguments]"===Object.prototype.toString.call(e)){var r=[],n=!0,i=!1,a=void 0;try{for(var s,c=e[Symbol.iterator]();!(n=(s=c.next()).done)&&(r.push(s.value),!t||r.length!==t);n=!0);}catch(e){i=!0,a=e}finally{try{n||null==c.return||c.return()}finally{if(i)throw a}}return r}}(e,t)||function(){throw new TypeError("Invalid attempt to destructure non-iterable instance")}()}function a(e){return function(e){if(Array.isArray(e)){for(var t=0,r=new Array(e.length);t2&&void 0!==arguments[2]?arguments[2]:{miss:function(){return Promise.resolve()}};return Promise.resolve().then((function(){c();var t=JSON.stringify(e);return a()[t]})).then((function(e){return Promise.all([e?e.value:t(),void 0!==e])})).then((function(e){var t=i(e,2),n=t[0],a=t[1];return Promise.all([n,a||r.miss(n)])})).then((function(e){return i(e,1)[0]}))},set:function(e,t){return Promise.resolve().then((function(){var i=a();return i[JSON.stringify(e)]={timestamp:(new Date).getTime(),value:t},n().setItem(r,JSON.stringify(i)),t}))},delete:function(e){return Promise.resolve().then((function(){var t=a();delete t[JSON.stringify(e)],n().setItem(r,JSON.stringify(t))}))},clear:function(){return Promise.resolve().then((function(){n().removeItem(r)}))}}}function c(e){var t=a(e.caches),r=t.shift();return void 0===r?{get:function(e,t){var r=arguments.length>2&&void 0!==arguments[2]?arguments[2]:{miss:function(){return Promise.resolve()}};return t().then((function(e){return Promise.all([e,r.miss(e)])})).then((function(e){return i(e,1)[0]}))},set:function(e,t){return Promise.resolve(t)},delete:function(e){return Promise.resolve()},clear:function(){return Promise.resolve()}}:{get:function(e,n){var i=arguments.length>2&&void 0!==arguments[2]?arguments[2]:{miss:function(){return Promise.resolve()}};return r.get(e,n,i).catch((function(){return c({caches:t}).get(e,n,i)}))},set:function(e,n){return r.set(e,n).catch((function(){return c({caches:t}).set(e,n)}))},delete:function(e){return r.delete(e).catch((function(){return c({caches:t}).delete(e)}))},clear:function(){return r.clear().catch((function(){return c({caches:t}).clear()}))}}}function u(){var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{serializable:!0},t={};return{get:function(r,n){var i=arguments.length>2&&void 0!==arguments[2]?arguments[2]:{miss:function(){return Promise.resolve()}},a=JSON.stringify(r);if(a in t)return Promise.resolve(e.serializable?JSON.parse(t[a]):t[a]);var s=n(),c=i&&i.miss||function(){return Promise.resolve()};return s.then((function(e){return c(e)})).then((function(){return s}))},set:function(r,n){return t[JSON.stringify(r)]=e.serializable?JSON.stringify(n):n,Promise.resolve(n)},delete:function(e){return delete t[JSON.stringify(e)],Promise.resolve()},clear:function(){return t={},Promise.resolve()}}}function o(e){for(var t=e.length-1;t>0;t--){var r=Math.floor(Math.random()*(t+1)),n=e[t];e[t]=e[r],e[r]=n}return e}function h(e,t){return t?(Object.keys(t).forEach((function(r){e[r]=t[r](e)})),e):e}function f(e){for(var t=arguments.length,r=new Array(t>1?t-1:0),n=1;n0?n:void 0,timeout:r.timeout||t,headers:r.headers||{},queryParameters:r.queryParameters||{},cacheable:r.cacheable}}var d={Read:1,Write:2,Any:3},p=1,v=2,g=3;function y(e){var t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:p;return r(r({},e),{},{status:t,lastUpdate:Date.now()})}function R(e){return"string"==typeof e?{protocol:"https",url:e,accept:d.Any}:{protocol:e.protocol||"https",url:e.url,accept:e.accept||d.Any}}var F="GET",b="POST";function P(e,t){return Promise.all(t.map((function(t){return e.get(t,(function(){return Promise.resolve(y(t))}))}))).then((function(e){var r=e.filter((function(e){return function(e){return e.status===p||Date.now()-e.lastUpdate>12e4}(e)})),n=e.filter((function(e){return function(e){return e.status===g&&Date.now()-e.lastUpdate<=12e4}(e)})),i=[].concat(a(r),a(n));return{getTimeout:function(e,t){return(0===n.length&&0===e?1:n.length+3+e)*t},statelessHosts:i.length>0?i.map((function(e){return R(e)})):t}}))}function j(e,t,n,i){var s=[],c=function(e,t){if(e.method!==F&&(void 0!==e.data||void 0!==t.data)){var n=Array.isArray(e.data)?e.data:r(r({},e.data),t.data);return JSON.stringify(n)}}(n,i),u=function(e,t){var n=r(r({},e.headers),t.headers),i={};return Object.keys(n).forEach((function(e){var t=n[e];i[e.toLowerCase()]=t})),i}(e,i),o=n.method,h=n.method!==F?{}:r(r({},n.data),i.data),f=r(r(r({"x-algolia-agent":e.userAgent.value},e.queryParameters),h),i.queryParameters),l=0,m=function t(r,a){var h=r.pop();if(void 0===h)throw{name:"RetryError",message:"Unreachable hosts - your application id may be incorrect. If the error persists, contact support@algolia.com.",transporterStackTrace:O(s)};var m={data:c,headers:u,method:o,url:E(h,n.path,f),connectTimeout:a(l,e.timeouts.connect),responseTimeout:a(l,i.timeout)},d=function(e){var t={request:m,response:e,host:h,triesLeft:r.length};return s.push(t),t},p={onSuccess:function(e){return function(e){try{return JSON.parse(e.content)}catch(t){throw function(e,t){return{name:"DeserializationError",message:e,response:t}}(t.message,e)}}(e)},onRetry:function(n){var i=d(n);return n.isTimedOut&&l++,Promise.all([e.logger.info("Retryable failure",w(i)),e.hostsCache.set(h,y(h,n.isTimedOut?g:v))]).then((function(){return t(r,a)}))},onFail:function(e){throw d(e),function(e,t){var r=e.content,n=e.status,i=r;try{i=JSON.parse(r).message}catch(e){}return function(e,t,r){return{name:"ApiError",message:e,status:t,transporterStackTrace:r}}(i,n,t)}(e,O(s))}};return e.requester.send(m).then((function(e){return function(e,t){return function(e){var t=e.status;return e.isTimedOut||function(e){var t=e.isTimedOut,r=e.status;return!t&&0==~~r}(e)||2!=~~(t/100)&&4!=~~(t/100)}(e)?t.onRetry(e):2==~~(e.status/100)?t.onSuccess(e):t.onFail(e)}(e,p)}))};return P(e.hostsCache,t).then((function(e){return m(a(e.statelessHosts).reverse(),e.getTimeout)}))}function _(e){var t={value:"Algolia for JavaScript (".concat(e,")"),add:function(e){var r="; ".concat(e.segment).concat(void 0!==e.version?" (".concat(e.version,")"):"");return-1===t.value.indexOf(r)&&(t.value="".concat(t.value).concat(r)),t}};return t}function E(e,t,r){var n=x(r),i="".concat(e.protocol,"://").concat(e.url,"/").concat("/"===t.charAt(0)?t.substr(1):t);return n.length&&(i+="?".concat(n)),i}function x(e){return Object.keys(e).map((function(t){return f("%s=%s",t,(r=e[t],"[object Object]"===Object.prototype.toString.call(r)||"[object Array]"===Object.prototype.toString.call(r)?JSON.stringify(e[t]):e[t]));var r})).join("&")}function O(e){return e.map((function(e){return w(e)}))}function w(e){var t=e.request.headers["x-algolia-api-key"]?{"x-algolia-api-key":"*****"}:{};return r(r({},e),{},{request:r(r({},e.request),{},{headers:r(r({},e.request.headers),t)})})}var N=function(e){var t=e.appId,n=function(e,t,r){var n={"x-algolia-api-key":r,"x-algolia-application-id":t};return{headers:function(){return e===l.WithinHeaders?n:{}},queryParameters:function(){return e===l.WithinQueryParameters?n:{}}}}(void 0!==e.authMode?e.authMode:l.WithinHeaders,t,e.apiKey),a=function(e){var t=e.hostsCache,r=e.logger,n=e.requester,a=e.requestsCache,s=e.responsesCache,c=e.timeouts,u=e.userAgent,o=e.hosts,h=e.queryParameters,f={hostsCache:t,logger:r,requester:n,requestsCache:a,responsesCache:s,timeouts:c,userAgent:u,headers:e.headers,queryParameters:h,hosts:o.map((function(e){return R(e)})),read:function(e,t){var r=m(t,f.timeouts.read),n=function(){return j(f,f.hosts.filter((function(e){return 0!=(e.accept&d.Read)})),e,r)};if(!0!==(void 0!==r.cacheable?r.cacheable:e.cacheable))return n();var a={request:e,mappedRequestOptions:r,transporter:{queryParameters:f.queryParameters,headers:f.headers}};return f.responsesCache.get(a,(function(){return f.requestsCache.get(a,(function(){return f.requestsCache.set(a,n()).then((function(e){return Promise.all([f.requestsCache.delete(a),e])}),(function(e){return Promise.all([f.requestsCache.delete(a),Promise.reject(e)])})).then((function(e){var t=i(e,2);return t[0],t[1]}))}))}),{miss:function(e){return f.responsesCache.set(a,e)}})},write:function(e,t){return j(f,f.hosts.filter((function(e){return 0!=(e.accept&d.Write)})),e,m(t,f.timeouts.write))}};return f}(r(r({hosts:[{url:"".concat(t,"-dsn.algolia.net"),accept:d.Read},{url:"".concat(t,".algolia.net"),accept:d.Write}].concat(o([{url:"".concat(t,"-1.algolianet.com")},{url:"".concat(t,"-2.algolianet.com")},{url:"".concat(t,"-3.algolianet.com")}]))},e),{},{headers:r(r(r({},n.headers()),{"content-type":"application/x-www-form-urlencoded"}),e.headers),queryParameters:r(r({},n.queryParameters()),e.queryParameters)}));return h({transporter:a,appId:t,addAlgoliaAgent:function(e,t){a.userAgent.add({segment:e,version:t})},clearCache:function(){return Promise.all([a.requestsCache.clear(),a.responsesCache.clear()]).then((function(){}))}},e.methods)},A=function(e){return function(t,r){return t.method===F?e.transporter.read(t,r):e.transporter.write(t,r)}},H=function(e){return function(t){var r=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{};return h({transporter:e.transporter,appId:e.appId,indexName:t},r.methods)}},S=function(e){return function(t,n){var i=t.map((function(e){return r(r({},e),{},{params:x(e.params||{})})}));return e.transporter.read({method:b,path:"1/indexes/*/queries",data:{requests:i},cacheable:!0},n)}},T=function(e){return function(t,i){return Promise.all(t.map((function(t){var a=t.params,s=a.facetName,c=a.facetQuery,u=n(a,["facetName","facetQuery"]);return H(e)(t.indexName,{methods:{searchForFacetValues:k}}).searchForFacetValues(s,c,r(r({},i),u))})))}},Q=function(e){return function(t,r,n){return e.transporter.read({method:b,path:f("1/answers/%s/prediction",e.indexName),data:{query:t,queryLanguages:r},cacheable:!0},n)}},C=function(e){return function(t,r){return e.transporter.read({method:b,path:f("1/indexes/%s/query",e.indexName),data:{query:t},cacheable:!0},r)}},k=function(e){return function(t,r,n){return e.transporter.read({method:b,path:f("1/indexes/%s/facets/%s/query",e.indexName,t),data:{facetQuery:r},cacheable:!0},n)}},I=1,D=2,q=3;function L(e,t,n){var i,a={appId:e,apiKey:t,timeouts:{connect:1,read:2,write:30},requester:{send:function(e){return new Promise((function(t){var r=new XMLHttpRequest;r.open(e.method,e.url,!0),Object.keys(e.headers).forEach((function(t){return r.setRequestHeader(t,e.headers[t])}));var n,i=function(e,n){return setTimeout((function(){r.abort(),t({status:0,content:n,isTimedOut:!0})}),1e3*e)},a=i(e.connectTimeout,"Connection timeout");r.onreadystatechange=function(){r.readyState>r.OPENED&&void 0===n&&(clearTimeout(a),n=i(e.responseTimeout,"Socket timeout"))},r.onerror=function(){0===r.status&&(clearTimeout(a),clearTimeout(n),t({content:r.responseText||"Network request failed",status:r.status,isTimedOut:!1}))},r.onload=function(){clearTimeout(a),clearTimeout(n),t({content:r.responseText,status:r.status,isTimedOut:!1})},r.send(e.data)}))}},logger:(i=q,{debug:function(e,t){return I>=i&&console.debug(e,t),Promise.resolve()},info:function(e,t){return D>=i&&console.info(e,t),Promise.resolve()},error:function(e,t){return console.error(e,t),Promise.resolve()}}),responsesCache:u(),requestsCache:u({serializable:!1}),hostsCache:c({caches:[s({key:"".concat("4.20.0","-").concat(e)}),u()]}),userAgent:_("4.20.0").add({segment:"Browser",version:"lite"}),authMode:l.WithinQueryParameters};return N(r(r(r({},a),n),{},{methods:{search:S,searchForFacetValues:T,multipleQueries:S,multipleSearchForFacetValues:T,customRequest:A,initIndex:function(e){return function(t){return H(e)(t,{methods:{search:C,searchForFacetValues:k,findAnswers:Q}})}}}}))}return L.version="4.20.0",L}()},39172:(e,t,r)=>{"use strict";r.r(t),r.d(t,{default:()=>F});var n=r(67294),i=r(70290),a=r.n(i),s=r(8131),c=r.n(s),u=r(86010),o=r(12859),h=r(39960),f=r(10412),l=r(53810),m=r(52263),d=r(80907),p=r(99565),v=r(18882),g=r(95999);const y={searchQueryInput:"searchQueryInput_dLdO",searchVersionInput:"searchVersionInput_oJeg",searchResultsColumn:"searchResultsColumn_V1kT",algoliaLogo:"algoliaLogo_ieE9",algoliaLogoPathFill:"algoliaLogoPathFill_NLBU",searchResultItem:"searchResultItem_f0c5",searchResultItemHeading:"searchResultItemHeading_59Ih",searchResultItemPath:"searchResultItemPath_utd2",searchResultItemSummary:"searchResultItemSummary_EzNh",searchQueryColumn:"searchQueryColumn_qeTZ",searchVersionColumn:"searchVersionColumn_2Kfj",searchLogoColumn:"searchLogoColumn_8GYL",loadingSpinner:"loadingSpinner_CN74",loadingspin:"loadingspin_ANjV",loader:"loader_-Se+"};function R(e){let{docsSearchVersionsHelpers:t}=e;const r=Object.entries(t.allDocsData).filter((e=>{let[,t]=e;return t.versions.length>1}));return n.createElement("div",{className:(0,u.Z)("col","col--3","padding-left--none",y.searchVersionColumn)},r.map((e=>{let[i,a]=e;const s=r.length>1?`${i}: `:"";return n.createElement("select",{key:i,onChange:e=>t.setSearchVersion(i,e.target.value),defaultValue:t.searchVersions[i],className:y.searchVersionInput},a.versions.map(((e,t)=>n.createElement("option",{key:t,label:`${s}${e.label}`,value:e.name}))))})))}const F=function(){const{siteConfig:{themeConfig:{algolia:{appId:e,apiKey:t,indexName:r,externalUrlRegex:i}}},i18n:{currentLocale:s}}=(0,m.Z)(),F=function(){const{selectMessage:e}=(0,l.c2)();return t=>e(t,(0,g.I)({id:"theme.SearchPage.documentsFound.plurals",description:'Pluralized label for "{count} documents found". Use as much plural forms (separated by "|") as your language support (see https://www.unicode.org/cldr/cldr-aux/charts/34/supplemental/language_plural_rules.html)',message:"One document found|{count} documents found"},{count:t}))}(),b=function(){const e=(0,d._r)(),[t,r]=(0,n.useState)((()=>Object.entries(e).reduce(((e,t)=>{let[r,n]=t;return{...e,[r]:n.versions[0].name}}),{}))),i=Object.values(e).some((e=>e.versions.length>1));return{allDocsData:e,versioningEnabled:i,searchVersions:t,setSearchVersion:(e,t)=>r((r=>({...r,[e]:t})))}}(),{searchQuery:P,setSearchQuery:j}=(0,p.Z)(),_={items:[],query:null,totalResults:null,totalPages:null,lastPage:null,hasMore:null,loading:null},[E,x]=(0,n.useReducer)(((e,t)=>{switch(t.type){case"reset":return _;case"loading":return{...e,loading:!0};case"update":return P!==t.value.query?e:{...t.value,items:0===t.value.lastPage?t.value.items:e.items.concat(t.value.items)};case"advance":{const t=e.totalPages>e.lastPage+1;return{...e,lastPage:t?e.lastPage+1:e.lastPage,hasMore:t}}default:return e}}),_),O=a()(e,t),w=c()(O,r,{hitsPerPage:15,advancedSyntax:!0,disjunctiveFacets:["language","docusaurus_tag"]});w.on("result",(e=>{let{results:{query:t,hits:r,page:n,nbHits:a,nbPages:s}}=e;if(""===t||!(r instanceof Array))return void x({type:"reset"});const c=e=>e.replace(/algolia-docsearch-suggestion--highlight/g,"search-result-match"),u=r.map((e=>{let{url:t,_highlightResult:{hierarchy:r},_snippetResult:n={}}=e;const a=new URL(t),s=Object.keys(r).map((e=>c(r[e].value)));return{title:s.pop(),url:(0,l.Fx)(i,a.href)?a.href:a.pathname+a.hash,summary:n.content?`${c(n.content.value)}...`:"",breadcrumbs:s}}));x({type:"update",value:{items:u,query:t,totalResults:a,totalPages:s,lastPage:n,hasMore:s>n+1,loading:!1}})}));const[N,A]=(0,n.useState)(null),H=(0,n.useRef)(0),S=(0,n.useRef)(f.Z.canUseDOM&&new IntersectionObserver((e=>{const{isIntersecting:t,boundingClientRect:{y:r}}=e[0];t&&H.current>r&&x({type:"advance"}),H.current=r}),{threshold:1})),T=()=>P?(0,g.I)({id:"theme.SearchPage.existingResultsTitle",message:'Search results for "{query}"',description:"The search page title for non-empty query"},{query:P}):(0,g.I)({id:"theme.SearchPage.emptyResultsTitle",message:"Search the documentation",description:"The search page title for empty query"}),Q=(0,l.ed)((function(e){void 0===e&&(e=0),w.addDisjunctiveFacetRefinement("docusaurus_tag","default"),w.addDisjunctiveFacetRefinement("language",s),Object.entries(b.searchVersions).forEach((e=>{let[t,r]=e;w.addDisjunctiveFacetRefinement("docusaurus_tag",`docs-${t}-${r}`)})),w.setQuery(P).setPage(e).search()}));return(0,n.useEffect)((()=>{if(!N)return;const e=S.current;return e?(e.observe(N),()=>e.unobserve(N)):()=>!0}),[N]),(0,n.useEffect)((()=>{x({type:"reset"}),P&&(x({type:"loading"}),setTimeout((()=>{Q()}),300))}),[P,b.searchVersions,Q]),(0,n.useEffect)((()=>{E.lastPage&&0!==E.lastPage&&Q(E.lastPage)}),[Q,E.lastPage]),n.createElement(v.Z,{wrapperClassName:"search-page-wrapper"},n.createElement(o.Z,null,n.createElement("title",null,(0,l.pe)(T())),n.createElement("meta",{property:"robots",content:"noindex, follow"})),n.createElement("div",{className:"container margin-vert--lg"},n.createElement("h1",null,T()),n.createElement("form",{className:"row",onSubmit:e=>e.preventDefault()},n.createElement("div",{className:(0,u.Z)("col",y.searchQueryColumn,{"col--9":b.versioningEnabled,"col--12":!b.versioningEnabled})},n.createElement("input",{type:"search",name:"q",className:y.searchQueryInput,placeholder:(0,g.I)({id:"theme.SearchPage.inputPlaceholder",message:"Type your search here",description:"The placeholder for search page input"}),"aria-label":(0,g.I)({id:"theme.SearchPage.inputLabel",message:"Search",description:"The ARIA label for search page input"}),onChange:e=>j(e.target.value),value:P,autoComplete:"off",autoFocus:!0})),b.versioningEnabled&&n.createElement(R,{docsSearchVersionsHelpers:b})),n.createElement("div",{className:"row"},n.createElement("div",{className:(0,u.Z)("col","col--8",y.searchResultsColumn)},!!E.totalResults&&F(E.totalResults)),n.createElement("div",{className:(0,u.Z)("col","col--4","text--right",y.searchLogoColumn)},n.createElement("a",{target:"_blank",rel:"noopener noreferrer",href:"https://www.algolia.com/","aria-label":(0,g.I)({id:"theme.SearchPage.algoliaLabel",message:"Search by Algolia",description:"The ARIA label for Algolia mention"})},n.createElement("svg",{viewBox:"0 0 168 24",className:y.algoliaLogo},n.createElement("g",{fill:"none"},n.createElement("path",{className:y.algoliaLogoPathFill,d:"M120.925 18.804c-4.386.02-4.386-3.54-4.386-4.106l-.007-13.336 2.675-.424v13.254c0 .322 0 2.358 1.718 2.364v2.248zm-10.846-2.18c.821 0 1.43-.047 1.855-.129v-2.719a6.334 6.334 0 0 0-1.574-.199 5.7 5.7 0 0 0-.897.069 2.699 2.699 0 0 0-.814.24c-.24.116-.439.28-.582.491-.15.212-.219.335-.219.656 0 .628.219.991.616 1.23s.938.362 1.615.362zm-.233-9.7c.883 0 1.629.109 2.231.328.602.218 1.088.525 1.444.915.363.396.609.922.76 1.483.157.56.232 1.175.232 1.85v6.874a32.5 32.5 0 0 1-1.868.314c-.834.123-1.772.185-2.813.185-.69 0-1.327-.069-1.895-.198a4.001 4.001 0 0 1-1.471-.636 3.085 3.085 0 0 1-.951-1.134c-.226-.465-.343-1.12-.343-1.803 0-.656.13-1.073.384-1.525a3.24 3.24 0 0 1 1.047-1.106c.445-.287.95-.492 1.532-.615a8.8 8.8 0 0 1 1.82-.185 8.404 8.404 0 0 1 1.972.24v-.438c0-.307-.035-.6-.11-.874a1.88 1.88 0 0 0-.384-.73 1.784 1.784 0 0 0-.724-.493 3.164 3.164 0 0 0-1.143-.205c-.616 0-1.177.075-1.69.164a7.735 7.735 0 0 0-1.26.307l-.321-2.192c.335-.117.834-.233 1.478-.349a10.98 10.98 0 0 1 2.073-.178zm52.842 9.626c.822 0 1.43-.048 1.854-.13V13.7a6.347 6.347 0 0 0-1.574-.199c-.294 0-.595.021-.896.069a2.7 2.7 0 0 0-.814.24 1.46 1.46 0 0 0-.582.491c-.15.212-.218.335-.218.656 0 .628.218.991.615 1.23.404.245.938.362 1.615.362zm-.226-9.694c.883 0 1.629.108 2.231.327.602.219 1.088.526 1.444.915.355.39.609.923.759 1.483a6.8 6.8 0 0 1 .233 1.852v6.873c-.41.088-1.034.19-1.868.314-.834.123-1.772.184-2.813.184-.69 0-1.327-.068-1.895-.198a4.001 4.001 0 0 1-1.471-.635 3.085 3.085 0 0 1-.951-1.134c-.226-.465-.343-1.12-.343-1.804 0-.656.13-1.073.384-1.524.26-.45.608-.82 1.047-1.107.445-.286.95-.491 1.532-.614a8.803 8.803 0 0 1 2.751-.13c.329.034.671.096 1.04.185v-.437a3.3 3.3 0 0 0-.109-.875 1.873 1.873 0 0 0-.384-.731 1.784 1.784 0 0 0-.724-.492 3.165 3.165 0 0 0-1.143-.205c-.616 0-1.177.075-1.69.164a7.75 7.75 0 0 0-1.26.307l-.321-2.193c.335-.116.834-.232 1.478-.348a11.633 11.633 0 0 1 2.073-.177zm-8.034-1.271a1.626 1.626 0 0 1-1.628-1.62c0-.895.725-1.62 1.628-1.62.904 0 1.63.725 1.63 1.62 0 .895-.733 1.62-1.63 1.62zm1.348 13.22h-2.689V7.27l2.69-.423v11.956zm-4.714 0c-4.386.02-4.386-3.54-4.386-4.107l-.008-13.336 2.676-.424v13.254c0 .322 0 2.358 1.718 2.364v2.248zm-8.698-5.903c0-1.156-.253-2.119-.746-2.788-.493-.677-1.183-1.01-2.067-1.01-.882 0-1.574.333-2.065 1.01-.493.676-.733 1.632-.733 2.788 0 1.168.246 1.953.74 2.63.492.683 1.183 1.018 2.066 1.018.882 0 1.574-.342 2.067-1.019.492-.683.738-1.46.738-2.63zm2.737-.007c0 .902-.13 1.584-.397 2.33a5.52 5.52 0 0 1-1.128 1.906 4.986 4.986 0 0 1-1.752 1.223c-.685.286-1.739.45-2.265.45-.528-.006-1.574-.157-2.252-.45a5.096 5.096 0 0 1-1.744-1.223c-.487-.527-.863-1.162-1.137-1.906a6.345 6.345 0 0 1-.41-2.33c0-.902.123-1.77.397-2.508a5.554 5.554 0 0 1 1.15-1.892 5.133 5.133 0 0 1 1.75-1.216c.679-.287 1.425-.423 2.232-.423.808 0 1.553.142 2.237.423a4.88 4.88 0 0 1 1.753 1.216 5.644 5.644 0 0 1 1.135 1.892c.287.738.431 1.606.431 2.508zm-20.138 0c0 1.12.246 2.363.738 2.882.493.52 1.13.78 1.91.78.424 0 .828-.062 1.204-.178.377-.116.677-.253.917-.417V9.33a10.476 10.476 0 0 0-1.766-.226c-.971-.028-1.71.37-2.23 1.004-.513.636-.773 1.75-.773 2.788zm7.438 5.274c0 1.824-.466 3.156-1.404 4.004-.936.846-2.367 1.27-4.296 1.27-.705 0-2.17-.137-3.34-.396l.431-2.118c.98.205 2.272.26 2.95.26 1.074 0 1.84-.219 2.299-.656.459-.437.684-1.086.684-1.948v-.437a8.07 8.07 0 0 1-1.047.397c-.43.13-.93.198-1.492.198-.739 0-1.41-.116-2.018-.349a4.206 4.206 0 0 1-1.567-1.025c-.431-.45-.774-1.017-1.013-1.694-.24-.677-.363-1.885-.363-2.773 0-.834.13-1.88.384-2.577.26-.696.629-1.298 1.129-1.796.493-.498 1.095-.881 1.8-1.162a6.605 6.605 0 0 1 2.428-.457c.87 0 1.67.109 2.45.24.78.129 1.444.265 1.985.415V18.17zM6.972 6.677v1.627c-.712-.446-1.52-.67-2.425-.67-.585 0-1.045.13-1.38.391a1.24 1.24 0 0 0-.502 1.03c0 .425.164.765.494 1.02.33.256.835.532 1.516.83.447.192.795.356 1.045.495.25.138.537.332.862.582.324.25.563.548.718.894.154.345.23.741.23 1.188 0 .947-.334 1.691-1.004 2.234-.67.542-1.537.814-2.601.814-1.18 0-2.16-.229-2.936-.686v-1.708c.84.628 1.814.942 2.92.942.585 0 1.048-.136 1.388-.407.34-.271.51-.646.51-1.125 0-.287-.1-.55-.302-.79-.203-.24-.42-.42-.655-.542-.234-.123-.585-.29-1.053-.503a61.27 61.27 0 0 1-.582-.271 13.67 13.67 0 0 1-.55-.287 4.275 4.275 0 0 1-.567-.351 6.92 6.92 0 0 1-.455-.4c-.18-.17-.31-.34-.39-.51-.08-.17-.155-.37-.224-.598a2.553 2.553 0 0 1-.104-.742c0-.915.333-1.638.998-2.17.664-.532 1.523-.798 2.576-.798.968 0 1.793.17 2.473.51zm7.468 5.696v-.287c-.022-.607-.187-1.088-.495-1.444-.309-.357-.75-.535-1.324-.535-.532 0-.99.194-1.373.583-.382.388-.622.949-.717 1.683h3.909zm1.005 2.792v1.404c-.596.34-1.383.51-2.362.51-1.255 0-2.255-.377-3-1.132-.744-.755-1.116-1.744-1.116-2.968 0-1.297.34-2.316 1.021-3.055.68-.74 1.548-1.11 2.6-1.11 1.033 0 1.852.323 2.458.966.606.644.91 1.572.91 2.784 0 .33-.033.676-.096 1.038h-5.314c.107.702.405 1.239.894 1.611.49.372 1.106.558 1.85.558.862 0 1.58-.202 2.155-.606zm6.605-1.77h-1.212c-.596 0-1.045.116-1.349.35-.303.234-.454.532-.454.894 0 .372.117.664.35.877.235.213.575.32 1.022.32.51 0 .912-.142 1.204-.424.293-.281.44-.651.44-1.108v-.91zm-4.068-2.554V9.325c.627-.361 1.457-.542 2.489-.542 2.116 0 3.175 1.026 3.175 3.08V17h-1.548v-.957c-.415.68-1.143 1.02-2.186 1.02-.766 0-1.38-.22-1.843-.661-.462-.442-.694-1.003-.694-1.684 0-.776.293-1.38.878-1.81.585-.431 1.404-.647 2.457-.647h1.34V11.8c0-.554-.133-.971-.399-1.253-.266-.282-.707-.423-1.324-.423a4.07 4.07 0 0 0-2.345.718zm9.333-1.93v1.42c.394-1 1.101-1.5 2.123-1.5.148 0 .313.016.494.048v1.531a1.885 1.885 0 0 0-.75-.143c-.542 0-.989.24-1.34.718-.351.479-.527 1.048-.527 1.707V17h-1.563V8.91h1.563zm5.01 4.084c.022.82.272 1.492.75 2.019.479.526 1.15.79 2.01.79.639 0 1.235-.176 1.788-.527v1.404c-.521.319-1.186.479-1.995.479-1.265 0-2.276-.4-3.031-1.197-.755-.798-1.133-1.792-1.133-2.984 0-1.16.38-2.151 1.14-2.975.761-.825 1.79-1.237 3.088-1.237.702 0 1.346.149 1.93.447v1.436a3.242 3.242 0 0 0-1.77-.495c-.84 0-1.513.266-2.019.798-.505.532-.758 1.213-.758 2.042zM40.24 5.72v4.579c.458-1 1.293-1.5 2.505-1.5.787 0 1.42.245 1.899.734.479.49.718 1.17.718 2.042V17h-1.564v-5.106c0-.553-.14-.98-.422-1.284-.282-.303-.652-.455-1.11-.455-.531 0-1.002.202-1.411.606-.41.405-.615 1.022-.615 1.851V17h-1.563V5.72h1.563zm14.966 10.02c.596 0 1.096-.253 1.5-.758.404-.506.606-1.157.606-1.955 0-.915-.202-1.62-.606-2.114-.404-.495-.92-.742-1.548-.742-.553 0-1.05.224-1.491.67-.442.447-.662 1.133-.662 2.058 0 .958.212 1.67.638 2.138.425.469.946.703 1.563.703zM53.004 5.72v4.42c.574-.894 1.388-1.341 2.44-1.341 1.022 0 1.857.383 2.506 1.149.649.766.973 1.781.973 3.047 0 1.138-.309 2.109-.925 2.912-.617.803-1.463 1.205-2.537 1.205-1.075 0-1.894-.447-2.457-1.34V17h-1.58V5.72h1.58zm9.908 11.104l-3.223-7.913h1.739l1.005 2.632 1.26 3.415c.096-.32.48-1.458 1.15-3.415l.909-2.632h1.66l-2.92 7.866c-.777 2.074-1.963 3.11-3.559 3.11a2.92 2.92 0 0 1-.734-.079v-1.34c.17.042.351.064.543.064 1.032 0 1.755-.57 2.17-1.708z"}),n.createElement("path",{fill:"#5468FF",d:"M78.988.938h16.594a2.968 2.968 0 0 1 2.966 2.966V20.5a2.967 2.967 0 0 1-2.966 2.964H78.988a2.967 2.967 0 0 1-2.966-2.964V3.897A2.961 2.961 0 0 1 78.988.938z"}),n.createElement("path",{fill:"white",d:"M89.632 5.967v-.772a.978.978 0 0 0-.978-.977h-2.28a.978.978 0 0 0-.978.977v.793c0 .088.082.15.171.13a7.127 7.127 0 0 1 1.984-.28c.65 0 1.295.088 1.917.259.082.02.164-.04.164-.13m-6.248 1.01l-.39-.389a.977.977 0 0 0-1.382 0l-.465.465a.973.973 0 0 0 0 1.38l.383.383c.062.061.15.047.205-.014.226-.307.472-.601.746-.874.281-.28.568-.526.883-.751.068-.042.075-.137.02-.2m4.16 2.453v3.341c0 .096.104.165.192.117l2.97-1.537c.068-.034.089-.117.055-.184a3.695 3.695 0 0 0-3.08-1.866c-.068 0-.136.054-.136.13m0 8.048a4.489 4.489 0 0 1-4.49-4.482 4.488 4.488 0 0 1 4.49-4.482 4.488 4.488 0 0 1 4.489 4.482 4.484 4.484 0 0 1-4.49 4.482m0-10.85a6.363 6.363 0 1 0 0 12.729 6.37 6.37 0 0 0 6.372-6.368 6.358 6.358 0 0 0-6.371-6.36"})))))),E.items.length>0?n.createElement("main",null,E.items.map(((e,t)=>{let{title:r,url:i,summary:a,breadcrumbs:s}=e;return n.createElement("article",{key:t,className:y.searchResultItem},n.createElement("h2",{className:y.searchResultItemHeading},n.createElement(h.Z,{to:i,dangerouslySetInnerHTML:{__html:r}})),s.length>0&&n.createElement("nav",{"aria-label":"breadcrumbs"},n.createElement("ul",{className:(0,u.Z)("breadcrumbs",y.searchResultItemPath)},s.map(((e,t)=>n.createElement("li",{key:t,className:"breadcrumbs__item",dangerouslySetInnerHTML:{__html:e}}))))),a&&n.createElement("p",{className:y.searchResultItemSummary,dangerouslySetInnerHTML:{__html:a}}))}))):[P&&!E.loading&&n.createElement("p",{key:"no-results"},n.createElement(g.Z,{id:"theme.SearchPage.noResultsText",description:"The paragraph for empty search result"},"No results were found")),!!E.loading&&n.createElement("div",{key:"spinner",className:y.loadingSpinner})],E.hasMore&&n.createElement("div",{className:y.loader,ref:A},n.createElement(g.Z,{id:"theme.SearchPage.fetchingNewResults",description:"The paragraph for fetching new search results"},"Fetching new results..."))))}}}]); \ No newline at end of file diff --git a/assets/js/4608.0c317238.js b/assets/js/4608.0c317238.js deleted file mode 100644 index 40ec456c..00000000 --- a/assets/js/4608.0c317238.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4608],{24608:(e,t,n)=>{n.r(t),n.d(t,{default:()=>i});var a=n(67294),o=n(18882),l=n(95999);const i=function(){return a.createElement(o.Z,{title:(0,l.I)({id:"theme.NotFound.title",message:"Page Not Found"})},a.createElement("main",{className:"container margin-vert--xl"},a.createElement("div",{className:"row"},a.createElement("div",{className:"col col--6 col--offset-3"},a.createElement("h1",{className:"hero__title"},a.createElement(l.Z,{id:"theme.NotFound.title",description:"The title of the 404 page"},"Page Not Found")),a.createElement("p",null,a.createElement(l.Z,{id:"theme.NotFound.p1",description:"The first paragraph of the 404 page"},"We could not find what you were looking for.")),a.createElement("p",null,a.createElement(l.Z,{id:"theme.NotFound.p2",description:"The 2nd paragraph of the 404 page"},"Please contact the owner of the site that linked you to the original URL and let them know their link is broken."))))))}}}]); \ No newline at end of file diff --git a/assets/js/4608.99c6c795.js b/assets/js/4608.99c6c795.js new file mode 100644 index 00000000..abb7502a --- /dev/null +++ b/assets/js/4608.99c6c795.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4608],{4608:(e,t,n)=>{n.r(t),n.d(t,{default:()=>i});var a=n(7294),o=n(8882),l=n(5999);const i=function(){return a.createElement(o.Z,{title:(0,l.I)({id:"theme.NotFound.title",message:"Page Not Found"})},a.createElement("main",{className:"container margin-vert--xl"},a.createElement("div",{className:"row"},a.createElement("div",{className:"col col--6 col--offset-3"},a.createElement("h1",{className:"hero__title"},a.createElement(l.Z,{id:"theme.NotFound.title",description:"The title of the 404 page"},"Page Not Found")),a.createElement("p",null,a.createElement(l.Z,{id:"theme.NotFound.p1",description:"The first paragraph of the 404 page"},"We could not find what you were looking for.")),a.createElement("p",null,a.createElement(l.Z,{id:"theme.NotFound.p2",description:"The 2nd paragraph of the 404 page"},"Please contact the owner of the site that linked you to the original URL and let them know their link is broken."))))))}}}]); \ No newline at end of file diff --git a/assets/js/463e69e4.dfb2fdb9.js b/assets/js/463e69e4.dfb2fdb9.js deleted file mode 100644 index ed6c38ff..00000000 --- a/assets/js/463e69e4.dfb2fdb9.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7278],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>h});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var c=a.createContext({}),s=function(e){var t=a.useContext(c),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},p=function(e){var t=s(e.components);return a.createElement(c.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,c=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),u=s(n),m=r,h=u["".concat(c,".").concat(m)]||u[m]||d[m]||i;return n?a.createElement(h,o(o({ref:t},p),{},{components:n})):a.createElement(h,o({ref:t},p))}));function h(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=m;var l={};for(var c in t)hasOwnProperty.call(t,c)&&(l[c]=t[c]);l.originalType=e,l[u]="string"==typeof e?e:r,o[1]=l;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>u,frontMatter:()=>i,metadata:()=>l,toc:()=>c});var a=n(87462),r=(n(67294),n(3905));const i={title:"Canonical Transcripts"},o=void 0,l={unversionedId:"core-functionality/canonical-transcripts",id:"core-functionality/canonical-transcripts",title:"Canonical Transcripts",description:"Overview",source:"@site/docs/core-functionality/canonical-transcripts.md",sourceDirName:"core-functionality",slug:"/core-functionality/canonical-transcripts",permalink:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/canonical-transcripts",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/core-functionality/canonical-transcripts.md",tags:[],version:"current",frontMatter:{title:"Canonical Transcripts"},sidebar:"docs",previous:{title:"Custom Annotations",permalink:"/IlluminaConnectedAnnotationsDocumentation/file-formats/custom-annotations"},next:{title:"Transcript Consequence Impact",permalink:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/transcript-consequence-impacts"}},c=[{value:"Overview",id:"overview",children:[],level:2},{value:"Known Algorithms",id:"known-algorithms",children:[{value:"UCSC",id:"ucsc",children:[],level:3},{value:"Ensembl",id:"ensembl",children:[],level:3},{value:"ACMG",id:"acmg",children:[],level:3},{value:"ClinVar",id:"clinvar",children:[],level:3}],level:2},{value:"Unified Approach",id:"unified-approach",children:[],level:2}],s={toc:c},p="wrapper";function u(e){let{components:t,...i}=e;return(0,r.kt)(p,(0,a.Z)({},s,i,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"One of the more polarizing topics within annotation is the notion of canonical transcripts. Because of alternative splicing, we often have several transcripts for each gene. In the human genome, there are an average of 3.4 transcripts per gene (Tung, 2020). As scientists, we seem to have a need for identifying a representative example of a gene - even if there's no biological basis for the motivation."),(0,r.kt)("p",null,(0,r.kt)("img",{src:n(73424).Z})),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Golden Helix Blog")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"A few years ago, the guys over at Golden Helix wrote an excellent post about the pitfalls and issues surrounding the identification of canonical transcripts: ",(0,r.kt)("a",{parentName:"p",href:"https://blog.goldenhelix.com/whats-in-a-name-the-intricacies-of-identifying-variants/"},"What\u2019s in a Name: The Intricacies of Identifying Variants"),"."))),(0,r.kt)("p",null,"In Illumina Connected Annotations, we wanted to identify an algorithm for determining the canonical transcript and apply it consistently to all of our transcript data sources."),(0,r.kt)("h2",{id:"known-algorithms"},"Known Algorithms"),(0,r.kt)("h3",{id:"ucsc"},"UCSC"),(0,r.kt)("p",null,"UCSC publishes a list of canonical transcripts in its ",(0,r.kt)("inlineCode",{parentName:"p"},"knownCanonical")," table which is available via the ",(0,r.kt)("a",{parentName:"p",href:"https://genome.ucsc.edu/cgi-bin/hgTables"},"TableBrowser"),". Of the RefSeq data sources, it was the only one we could find that provided canonical transcripts:"),(0,r.kt)("blockquote",null,(0,r.kt)("p",{parentName:"blockquote"},"The canonical transcript is defined as either the longest CDS, if the gene has translated transcripts, or the longest cDNA.")),(0,r.kt)("p",null,"If you were to implement this and compare it with the knownCanonical table, you would see a lot of exceptions to the rule."),(0,r.kt)("h3",{id:"ensembl"},"Ensembl"),(0,r.kt)("p",null,"The ",(0,r.kt)("a",{parentName:"p",href:"http://uswest.ensembl.org/Help/Glossary"},"Ensembl glossary")," states:"),(0,r.kt)("blockquote",null,(0,r.kt)("p",{parentName:"blockquote"},"The canonical transcript is used in the gene tree analysis in Ensembl and does not necessarily reflect the most biologically relevant transcript of a gene. For human, the canonical transcript for a gene is set according to the following hierarchy:"),(0,r.kt)("ol",{parentName:"blockquote"},(0,r.kt)("li",{parentName:"ol"},"Longest CCDS translation with no stop codons."),(0,r.kt)("li",{parentName:"ol"},"If no (1), choose the longest Ensembl/Havana merged translation with no stop codons."),(0,r.kt)("li",{parentName:"ol"},"If no (2), choose the longest translation with no stop codons."),(0,r.kt)("li",{parentName:"ol"},"If no translation, choose the longest non-protein-coding transcript."))),(0,r.kt)("h3",{id:"acmg"},"ACMG"),(0,r.kt)("p",null,"From the ACMG Guidelines for the Interpretation of Sequence Variants:"),(0,r.kt)("blockquote",null,(0,r.kt)("p",{parentName:"blockquote"},"A reference transcript for each gene should be used and provided in the report when describing coding variants. The transcript should represent either the longest known transcript and/or the most clinically relevant transcript.")),(0,r.kt)("h3",{id:"clinvar"},"ClinVar"),(0,r.kt)("p",null,"From the ClinVar paper:"),(0,r.kt)("blockquote",null,(0,r.kt)("p",{parentName:"blockquote"},"When there are multiple transcripts for a gene, ClinVar selects one HGVS expression to construct a preferred name. By default, this selection is based on the first reference standard transcript identified by the RefSeqGene/LRG (Locus Reference Genomic) collaboration.")),(0,r.kt)("h2",{id:"unified-approach"},"Unified Approach"),(0,r.kt)("p",null,"Our approach is almost identical to the one Golden Helix discussed in their article:"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"If we're looking at RefSeq, only consider NM & NR transcripts as candidates for canonical transcripts."),(0,r.kt)("li",{parentName:"ol"},"Sort the transcripts in the following order:",(0,r.kt)("ol",{parentName:"li"},(0,r.kt)("li",{parentName:"ol"},(0,r.kt)("a",{parentName:"li",href:"https://www.lrg-sequence.org/"},"Locus Reference Genomic (LRG)")," entries occur before non-LRG entries"),(0,r.kt)("li",{parentName:"ol"},"Descending CDS length"),(0,r.kt)("li",{parentName:"ol"},"Descending transcript length"),(0,r.kt)("li",{parentName:"ol"},"Ascending accession number"))),(0,r.kt)("li",{parentName:"ol"},"Grab the first entry")))}u.isMDXComponent=!0},73424:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/hk1-transcripts-a5b85474d3b002553687715dbd004907.png"}}]); \ No newline at end of file diff --git a/assets/js/463e69e4.e390222c.js b/assets/js/463e69e4.e390222c.js new file mode 100644 index 00000000..3278246a --- /dev/null +++ b/assets/js/463e69e4.e390222c.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7278],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>h});var a=n(7294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var c=a.createContext({}),s=function(e){var t=a.useContext(c),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},p=function(e){var t=s(e.components);return a.createElement(c.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,c=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),u=s(n),m=r,h=u["".concat(c,".").concat(m)]||u[m]||d[m]||i;return n?a.createElement(h,o(o({ref:t},p),{},{components:n})):a.createElement(h,o({ref:t},p))}));function h(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=m;var l={};for(var c in t)hasOwnProperty.call(t,c)&&(l[c]=t[c]);l.originalType=e,l[u]="string"==typeof e?e:r,o[1]=l;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>u,frontMatter:()=>i,metadata:()=>l,toc:()=>c});var a=n(7462),r=(n(7294),n(3905));const i={title:"Canonical Transcripts"},o=void 0,l={unversionedId:"core-functionality/canonical-transcripts",id:"core-functionality/canonical-transcripts",title:"Canonical Transcripts",description:"Overview",source:"@site/docs/core-functionality/canonical-transcripts.md",sourceDirName:"core-functionality",slug:"/core-functionality/canonical-transcripts",permalink:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/canonical-transcripts",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/core-functionality/canonical-transcripts.md",tags:[],version:"current",frontMatter:{title:"Canonical Transcripts"},sidebar:"docs",previous:{title:"Custom Annotations",permalink:"/IlluminaConnectedAnnotationsDocumentation/file-formats/custom-annotations"},next:{title:"Transcript Consequence Impact",permalink:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/transcript-consequence-impacts"}},c=[{value:"Overview",id:"overview",children:[],level:2},{value:"Known Algorithms",id:"known-algorithms",children:[{value:"UCSC",id:"ucsc",children:[],level:3},{value:"Ensembl",id:"ensembl",children:[],level:3},{value:"ACMG",id:"acmg",children:[],level:3},{value:"ClinVar",id:"clinvar",children:[],level:3}],level:2},{value:"Unified Approach",id:"unified-approach",children:[],level:2}],s={toc:c},p="wrapper";function u(e){let{components:t,...i}=e;return(0,r.kt)(p,(0,a.Z)({},s,i,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"One of the more polarizing topics within annotation is the notion of canonical transcripts. Because of alternative splicing, we often have several transcripts for each gene. In the human genome, there are an average of 3.4 transcripts per gene (Tung, 2020). As scientists, we seem to have a need for identifying a representative example of a gene - even if there's no biological basis for the motivation."),(0,r.kt)("p",null,(0,r.kt)("img",{src:n(5714).Z})),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Golden Helix Blog")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"A few years ago, the guys over at Golden Helix wrote an excellent post about the pitfalls and issues surrounding the identification of canonical transcripts: ",(0,r.kt)("a",{parentName:"p",href:"https://blog.goldenhelix.com/whats-in-a-name-the-intricacies-of-identifying-variants/"},"What\u2019s in a Name: The Intricacies of Identifying Variants"),"."))),(0,r.kt)("p",null,"In Illumina Connected Annotations, we wanted to identify an algorithm for determining the canonical transcript and apply it consistently to all of our transcript data sources."),(0,r.kt)("h2",{id:"known-algorithms"},"Known Algorithms"),(0,r.kt)("h3",{id:"ucsc"},"UCSC"),(0,r.kt)("p",null,"UCSC publishes a list of canonical transcripts in its ",(0,r.kt)("inlineCode",{parentName:"p"},"knownCanonical")," table which is available via the ",(0,r.kt)("a",{parentName:"p",href:"https://genome.ucsc.edu/cgi-bin/hgTables"},"TableBrowser"),". Of the RefSeq data sources, it was the only one we could find that provided canonical transcripts:"),(0,r.kt)("blockquote",null,(0,r.kt)("p",{parentName:"blockquote"},"The canonical transcript is defined as either the longest CDS, if the gene has translated transcripts, or the longest cDNA.")),(0,r.kt)("p",null,"If you were to implement this and compare it with the knownCanonical table, you would see a lot of exceptions to the rule."),(0,r.kt)("h3",{id:"ensembl"},"Ensembl"),(0,r.kt)("p",null,"The ",(0,r.kt)("a",{parentName:"p",href:"http://uswest.ensembl.org/Help/Glossary"},"Ensembl glossary")," states:"),(0,r.kt)("blockquote",null,(0,r.kt)("p",{parentName:"blockquote"},"The canonical transcript is used in the gene tree analysis in Ensembl and does not necessarily reflect the most biologically relevant transcript of a gene. For human, the canonical transcript for a gene is set according to the following hierarchy:"),(0,r.kt)("ol",{parentName:"blockquote"},(0,r.kt)("li",{parentName:"ol"},"Longest CCDS translation with no stop codons."),(0,r.kt)("li",{parentName:"ol"},"If no (1), choose the longest Ensembl/Havana merged translation with no stop codons."),(0,r.kt)("li",{parentName:"ol"},"If no (2), choose the longest translation with no stop codons."),(0,r.kt)("li",{parentName:"ol"},"If no translation, choose the longest non-protein-coding transcript."))),(0,r.kt)("h3",{id:"acmg"},"ACMG"),(0,r.kt)("p",null,"From the ACMG Guidelines for the Interpretation of Sequence Variants:"),(0,r.kt)("blockquote",null,(0,r.kt)("p",{parentName:"blockquote"},"A reference transcript for each gene should be used and provided in the report when describing coding variants. The transcript should represent either the longest known transcript and/or the most clinically relevant transcript.")),(0,r.kt)("h3",{id:"clinvar"},"ClinVar"),(0,r.kt)("p",null,"From the ClinVar paper:"),(0,r.kt)("blockquote",null,(0,r.kt)("p",{parentName:"blockquote"},"When there are multiple transcripts for a gene, ClinVar selects one HGVS expression to construct a preferred name. By default, this selection is based on the first reference standard transcript identified by the RefSeqGene/LRG (Locus Reference Genomic) collaboration.")),(0,r.kt)("h2",{id:"unified-approach"},"Unified Approach"),(0,r.kt)("p",null,"Our approach is almost identical to the one Golden Helix discussed in their article:"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"If we're looking at RefSeq, only consider NM & NR transcripts as candidates for canonical transcripts."),(0,r.kt)("li",{parentName:"ol"},"Sort the transcripts in the following order:",(0,r.kt)("ol",{parentName:"li"},(0,r.kt)("li",{parentName:"ol"},(0,r.kt)("a",{parentName:"li",href:"https://www.lrg-sequence.org/"},"Locus Reference Genomic (LRG)")," entries occur before non-LRG entries"),(0,r.kt)("li",{parentName:"ol"},"Descending CDS length"),(0,r.kt)("li",{parentName:"ol"},"Descending transcript length"),(0,r.kt)("li",{parentName:"ol"},"Ascending accession number"))),(0,r.kt)("li",{parentName:"ol"},"Grab the first entry")))}u.isMDXComponent=!0},5714:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/hk1-transcripts-a5b85474d3b002553687715dbd004907.png"}}]); \ No newline at end of file diff --git a/assets/js/4688c68b.8e19f171.js b/assets/js/4688c68b.8e19f171.js deleted file mode 100644 index 9bc98899..00000000 --- a/assets/js/4688c68b.8e19f171.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[878],{3905:(e,n,t)=>{t.d(n,{Zo:()=>p,kt:()=>h});var a=t(67294);function i(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function r(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function o(e){for(var n=1;n=0||(i[t]=e[t]);return i}(e,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(i[t]=e[t])}return i}var l=a.createContext({}),c=function(e){var n=a.useContext(l),t=n;return e&&(t="function"==typeof e?e(n):o(o({},n),e)),t},p=function(e){var n=c(e.components);return a.createElement(l.Provider,{value:n},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},u=a.forwardRef((function(e,n){var t=e.components,i=e.mdxType,r=e.originalType,l=e.parentName,p=s(e,["components","mdxType","originalType","parentName"]),d=c(t),u=i,h=d["".concat(l,".").concat(u)]||d[u]||m[u]||r;return t?a.createElement(h,o(o({ref:n},p),{},{components:t})):a.createElement(h,o({ref:n},p))}));function h(e,n){var t=arguments,i=n&&n.mdxType;if("string"==typeof e||i){var r=t.length,o=new Array(r);o[0]=u;var s={};for(var l in n)hasOwnProperty.call(n,l)&&(s[l]=n[l]);s.originalType=e,s[d]="string"==typeof e?e:i,o[1]=s;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>o,default:()=>d,frontMatter:()=>r,metadata:()=>s,toc:()=>l});var a=t(87462),i=(t(67294),t(3905));const r={title:"Gene Fusion Detection"},o=void 0,s={unversionedId:"core-functionality/gene-fusions",id:"version-3.21/core-functionality/gene-fusions",title:"Gene Fusion Detection",description:"Overview",source:"@site/versioned_docs/version-3.21/core-functionality/gene-fusions.md",sourceDirName:"core-functionality",slug:"/core-functionality/gene-fusions",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/core-functionality/gene-fusions",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/core-functionality/gene-fusions.md",tags:[],version:"3.21",frontMatter:{title:"Gene Fusion Detection"},sidebar:"docs",previous:{title:"Canonical Transcripts",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/core-functionality/canonical-transcripts"},next:{title:"MNV Recomposition",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/core-functionality/mnv-recomposition"}},l=[{value:"Overview",id:"overview",children:[],level:2},{value:"Approach",id:"approach",children:[{value:"Variant Types",id:"variant-types",children:[],level:3},{value:"Criteria",id:"criteria",children:[],level:3}],level:2},{value:"ETV6/RUNX1 Example",id:"etv6runx1-example",children:[{value:"VCF",id:"vcf",children:[],level:3},{value:"JSON Output",id:"json-output",children:[{value:"Gene Fusion Data Sources",id:"gene-fusion-data-sources",children:[],level:4},{value:"Consequences",id:"consequences",children:[],level:4},{value:"Gene Fusions Section",id:"gene-fusions-section",children:[],level:4}],level:3}],level:2}],c={toc:l},p="wrapper";function d(e){let{components:n,...r}=e;return(0,i.kt)(p,(0,a.Z)({},c,r,{components:n,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"Gene fusions often result from large genomic rearrangements such as structural variants. While WGS secondary analysis pipelines typically contain alignment and variant calling stages, very few of them contain dedicated gene fusion callers. When they are included, they are usually associated with RNA-Seq pipelines where gene fusions can be readily observed."),(0,i.kt)("p",null,"Since gene fusions are frequently observed in cancer and since many sequencing experiments do not include paired RNA-Seq data, we have added gene fusion detection and annotation to Nirvana."),(0,i.kt)("p",null,"The rich diversity in gene fusion architectures and their likely mechanisms can be seen below:"),(0,i.kt)("p",null,(0,i.kt)("img",{src:t(88138).Z})),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Kumar-Sinha, C., Kalyana-Sundaram, S. & Chinnaiyan, A.M. ",(0,i.kt)("a",{parentName:"p",href:"https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-015-0252-1"},"Landscape of gene fusions in epithelial cancers: seq and ye shall find"),". Genome Med 7, 129 (2015)"))),(0,i.kt)("h2",{id:"approach"},"Approach"),(0,i.kt)("p",null,"Nirvana uses structural variant calls to evaluate if they form either putative intra-chromosomal or inter-chromosomal gene fusions. Let's consider two transcripts, ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_014206.3")," (",(0,i.kt)("strong",{parentName:"p"},"TMEM258"),") and ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_013402.4")," (",(0,i.kt)("strong",{parentName:"p"},"FADS1"),"). Both of these genes are on the reverse strand in the genome. The vertical bar indicates the breakpoint where these transcripts are fused:"),(0,i.kt)("p",null,(0,i.kt)("img",{alt:"TMEM258 & FADS1 transcripts",src:t(62801).Z})),(0,i.kt)("p",null,"The above explains where the transcripts are fused together, but it doesn't explain in which orientation. By using the directionality encoded in the translocation breakend, we can rearrange these two transcripts in four ways:"),(0,i.kt)("p",null,(0,i.kt)("img",{alt:"TMEM258 & FADS1 gene fusions",src:t(69879).Z})),(0,i.kt)("p",null,"Only two of the combinations yields a fusion containing both the transcription start site (TSS) and the stop codon. In one case, we can even detect an in-frame gene fusion.\nIf only unidirectional gene fusions are desired, only these two fusions can be detected. If ",(0,i.kt)("inlineCode",{parentName:"p"},"enable-bidirectional-fusions")," is enabled, all four cases can be identified."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Interpreting translocation breakends")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"At first glance, translocation breakends are a bit daunting. However, once you understand how they work, they're actually quite simple. For more information, we recommend reading section 5.4 in the ",(0,i.kt)("a",{parentName:"p",href:"https://samtools.github.io/hts-specs/VCFv4.2.pdf"},"VCF 4.2 specification"),"."),(0,i.kt)("table",{parentName:"div"},(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"REF"),(0,i.kt)("th",{parentName:"tr",align:"left"},"ALT"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Meaning"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"t[p["),(0,i.kt)("td",{parentName:"tr",align:"left"},"piece extending to the right of p is joined after t")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"t]p]"),(0,i.kt)("td",{parentName:"tr",align:"left"},"reverse comp piece extending left of p is joined after t")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"]p]t"),(0,i.kt)("td",{parentName:"tr",align:"left"},"piece extending to the left of p is joined before t")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"[p[t"),(0,i.kt)("td",{parentName:"tr",align:"left"},"reverse comp piece extending right of p is joined before t")))))),(0,i.kt)("h3",{id:"variant-types"},"Variant Types"),(0,i.kt)("p",null,"Specifically we can identify gene fusions from the following structural variant types:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"deletions (",(0,i.kt)("inlineCode",{parentName:"li"},""),")"),(0,i.kt)("li",{parentName:"ul"},"tandem_duplications (",(0,i.kt)("inlineCode",{parentName:"li"},""),")"),(0,i.kt)("li",{parentName:"ul"},"inversions (",(0,i.kt)("inlineCode",{parentName:"li"},""),")"),(0,i.kt)("li",{parentName:"ul"},"translocation breakpoints (",(0,i.kt)("inlineCode",{parentName:"li"},"AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911["),") ")),(0,i.kt)("h3",{id:"criteria"},"Criteria"),(0,i.kt)("p",null,"The following criteria must be met for Nirvana to identify a gene fusion:"),(0,i.kt)("ol",null,(0,i.kt)("li",{parentName:"ol"},"After accounting for gene orientation and genomic rearrangements, both transcripts must have the same orientation if ",(0,i.kt)("inlineCode",{parentName:"li"},"enable-bidirectional-fusions")," is not enabled. They can have the same or different orientations if ",(0,i.kt)("inlineCode",{parentName:"li"},"enable-bidirectional-fusions")," is set."),(0,i.kt)("li",{parentName:"ol"},"Both transcripts must be from the same transcript source (i.e. we won't mix and match between RefSeq and Ensembl transcripts)"),(0,i.kt)("li",{parentName:"ol"},"Both transcripts must belong to different genes"),(0,i.kt)("li",{parentName:"ol"},"Both transcripts cannot have a coding region that already overlaps without the variant (i.e. in cases where two genes naturally overlap, we don't want to call a gene fusion)")),(0,i.kt)("h2",{id:"etv6runx1-example"},"ETV6/RUNX1 Example"),(0,i.kt)("p",null,"ETV6/RUNX1 is the most common gene fusion in childhood B-cell precursor acute lymphoblastic leukemia (ALL). Patients with this translocation are associated with a good prognosis and excellent response to treatment."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Sun C., Chang L., Zhu X. ",(0,i.kt)("a",{parentName:"p",href:"https://www.oncotarget.com/article/16367/text/"},"Pathogenesis of ETV6/RUNX1-positive childhood acute lymphoblastic leukemia and mechanisms underlying its relapse"),". Oncotarget. 2017; 8: 35445-35459"))),(0,i.kt)("h3",{id:"vcf"},"VCF"),(0,i.kt)("p",null,"Here's a simplified representation of the translocation breakends called by the Manta structural variant caller:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\nchr12 12026270 . C [chr21:36420865[C . PASS SVTYPE=BND\nchr12 12026305 . A A]chr21:36420571] . PASS SVTYPE=BND\nchr21 36420571 . C C]chr12:12026305] . PASS SVTYPE=BND\nchr21 36420865 . C [chr12:12026270[C . PASS SVTYPE=BND\n")),(0,i.kt)("p",null,"When you put these calls together, the resulting genomic rearrangement looks something like this:"),(0,i.kt)("p",null,(0,i.kt)("img",{src:t(99801).Z})),(0,i.kt)("h3",{id:"json-output"},"JSON Output"),(0,i.kt)("p",null,"The annotation for the first variant in the VCF looks like this:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{139,141-205,218,220-230}","{139,141-205,218,220-230}":!0},'{\n "chromosome": "chr12",\n "position": 12026270,\n "refAllele": "C",\n "altAlleles": [\n "[chr21:36420865[C"\n ],\n "filters": [\n "PASS"\n ],\n "cytogeneticBand": "12p13.2",\n "clingen": [\n {\n "chromosome": "12",\n "begin": 173786,\n "end": 34835837,\n "variantType": "copy_number_gain",\n "id": "nsv995956",\n "clinicalInterpretation": "pathogenic",\n "phenotypes": [\n "Decreased calvarial ossification",\n "Delayed gross motor development",\n "Feeding difficulties",\n "Frontal bossing",\n "Morphological abnormality of the central nervous system",\n "Patchy alopecia"\n ],\n "phenotypeIds": [\n "HP:0002007",\n "HP:0002011",\n "HP:0002194",\n "HP:0002232",\n "HP:0005474",\n "HP:0011968",\n "MedGen:C0232466",\n "MedGen:C1862862",\n "MedGen:CN001816",\n "MedGen:CN001820",\n "MedGen:CN001989",\n "MedGen:CN004852"\n ],\n "observedGains": 1,\n "validated": true\n }\n ],\n "variants": [\n {\n "vid": "12-12026270-C-[chr21:36420865[C",\n "chromosome": "chr12",\n "begin": 12026270,\n "end": 12026270,\n "isStructuralVariant": true,\n "refAllele": "C",\n "altAllele": "[chr21:36420865[C",\n "variantType": "translocation_breakend",\n "cosmicGeneFusions": [\n {\n "id": "COSF2245",\n "numSamples": 249,\n "geneSymbols": [\n "ETV6",\n "RUNX1"\n ],\n "hgvsr": "ENST00000396373.4(ETV6):r.1_1283::ENST00000300305.3(RUNX1):r.504_6222",\n "histologies": [\n {\n "name": "acute lymphoblastic B cell leukaemia",\n "numSamples": 169\n },\n {\n "name": "acute lymphoblastic leukaemia",\n "numSamples": 80\n }\n ],\n "sites": [\n {\n "name": "haematopoietic and lymphoid tissue",\n "numSamples": 249\n }\n ],\n "pubMedIds": [\n 7761424,\n 7780150,\n 8609706,\n 8751464,\n 8982044,\n 9067587,\n 9207408,\n 9226156,\n 9628428,\n 10463610,\n 10774753,\n 11091202,\n 12621238,\n 12661004,\n 12750722,\n 15104290,\n 15642392,\n 24557455,\n 26925663\n ]\n }\n ],\n "fusionCatcher": [\n {\n "genes": {\n "first": {\n "hgnc": "ETV6",\n "isOncogene": true\n },\n "second": {\n "hgnc": "RUNX1",\n "isOncogene": true\n }\n },\n "somaticSources": [\n "DepMap CCLE",\n "Cancer Genome Project",\n "ChimerKB 4.0",\n "ChimerPub 4.0",\n "ChimerSeq 4.0",\n "Known",\n "Mitelman DB",\n "OncoKB",\n "TICdb"\n ]\n }\n ],\n "transcripts": [\n {\n "transcript": "ENST00000396373.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "introns": "5/7",\n "geneId": "ENSG00000139083",\n "hgnc": "ETV6",\n "consequence": [\n "transcript_variant",\n "unidirectional_gene_fusion"\n ],\n "geneFusions": [\n {\n "transcript": "ENST00000437180.1",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000437180.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n },\n {\n "transcript": "ENST00000300305.3",\n "bioType": "protein_coding",\n "intron": 1,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000300305.3(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n },\n {\n "transcript": "ENST00000482318.1",\n "bioType": "nonsense_mediated_decay",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000482318.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n },\n {\n "transcript": "ENST00000486278.2",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000486278.2(RUNX1):r.?_-15+274::ENST00000396373.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n },\n {\n "transcript": "ENST00000455571.1",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000455571.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n },\n {\n "transcript": "ENST00000475045.2",\n "bioType": "protein_coding",\n "intron": 11,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000475045.2(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n },\n {\n "transcript": "ENST00000416754.1",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000416754.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n }\n ],\n "isCanonical": true,\n "proteinId": "ENSP00000379658.3"\n },\n {\n "transcript": "NM_001987.4",\n "source": "RefSeq",\n "bioType": "protein_coding",\n "introns": "5/7",\n "geneId": "2120",\n "hgnc": "ETV6",\n "consequence": [\n "transcript_variant",\n "unidirectional_gene_fusion"\n ],\n "geneFusions": [\n {\n "transcript": "NM_001754.4",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "861",\n "hgnc": "RUNX1",\n "hgvsr": "NM_001754.4(RUNX1):r.?_58+274::NM_001987.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n }\n ],\n "isCanonical": true,\n "proteinId": "NP_001978.1"\n }\n ]\n }\n ]\n}\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"transcript"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"transcript ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"bioType"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"descriptions of the ",(0,i.kt)("a",{parentName:"td",href:"https://uswest.ensembl.org/info/genome/genebuild/biotypes.html"},"biotypes from Ensembl"))),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"exon"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"},"exon that contained fusion breakpoint")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"intron"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"},"intron that contained fusion breakpoint")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"geneId"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"gene ID. e.g. ENSG00000116062")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hgvsr"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"HGVS RNA nomenclature")))),(0,i.kt)("h4",{id:"gene-fusion-data-sources"},"Gene Fusion Data Sources"),(0,i.kt)("p",null,"To provide more context to our gene fusions, we provide the following gene fusion data sources:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"../data-sources/cosmic"},"COSMIC")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"../data-sources/fusioncatcher"},"FusionCatcher"))),(0,i.kt)("h4",{id:"consequences"},"Consequences"),(0,i.kt)("p",null,"When a gene fusion is identified, we add the following Sequence Ontology consequence:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{3}","{3}":!0},' "consequence": [\n "transcript_variant",\n "gene_fusion"\n ],\n')),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"If both transcripts have the same orientation, we label it as ",(0,i.kt)("inlineCode",{parentName:"li"},"unidirectional_gene_fusion"),", if they have different orientations, we label it as ",(0,i.kt)("inlineCode",{parentName:"li"},"bidirectional_gene_fusion")),(0,i.kt)("li",{parentName:"ul"},"If both unidirectional and bidirectional ones are detected, we label it as ",(0,i.kt)("inlineCode",{parentName:"li"},"gene_fusion"),".")),(0,i.kt)("h4",{id:"gene-fusions-section"},"Gene Fusions Section"),(0,i.kt)("p",null,"The ",(0,i.kt)("inlineCode",{parentName:"p"},"geneFusions")," section is contained within the object of the originating transcript. It will contain all the pairwise gene fusions that obey the criteria outline above. In the case of ",(0,i.kt)("inlineCode",{parentName:"p"},"ENST00000396373.4"),", there 7 other Ensembl transcripts that would produce a gene fusion. For ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001987.4"),", there was only one transcript (",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001754.4"),") that produce a gene fusion."),(0,i.kt)("p",null,"For each originating transcript, we report the following for each partner transcript:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"transcript ID"),(0,i.kt)("li",{parentName:"ul"},"gene ID"),(0,i.kt)("li",{parentName:"ul"},"HGNC gene symbol"),(0,i.kt)("li",{parentName:"ul"},"transcript bio type (e.g. protein_coding)"),(0,i.kt)("li",{parentName:"ul"},"intron or exon number containing the breakpoint"),(0,i.kt)("li",{parentName:"ul"},"HGVS RNA notation"),(0,i.kt)("li",{parentName:"ul"},"gene fusion directionality")),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Before Nirvana 3.15, we provided HGVS coding notation. However, HGVS r. notation is more appropriate for these types fusion splicing events (see ",(0,i.kt)("a",{parentName:"p",href:"https://varnomen.hgvs.org/bg-material/consultation/svd-wg007"},"HGVS SVD-WG007"),")."))),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{8}","{8}":!0},' "geneFusions": [\n {\n "transcript": "NM_001754.4",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "861",\n "hgnc": "RUNX1",\n "hgvsr": "NM_001754.4(RUNX1):r.?_58+274::NM_001987.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n }\n ],\n')),(0,i.kt)("p",null,"The HGVS RNA notation above indicates that the gene fusion starts with ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001754.4")," (RUNX1) until CDS position 58 and continues with ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001987.4")," (ETV6). ",(0,i.kt)("inlineCode",{parentName:"p"},"1009+3367")," indicates that the fusion occurred 3367 bp within intron 2."))}d.isMDXComponent=!0},69879:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/TMEM258_FADS1_GeneFusions-e5e3758ea9d2c07d3591e3801b2bf7e3.svg"},62801:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/TMEM258_FADS1_Transcripts-fe1b9c6be1f7cbfefbce887f8cec5d58.svg"},99801:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/etv6-runx1-fusion-ec8f4312c9aca496bde0d6e2b1bbd50d.svg"},88138:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/gene-fusions-fig2-1cce8ac31b00465c8d36bdc47ec3309e.svg"}}]); \ No newline at end of file diff --git a/assets/js/46de40ad.be900e64.js b/assets/js/46de40ad.be900e64.js deleted file mode 100644 index 0bde9dd0..00000000 --- a/assets/js/46de40ad.be900e64.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3082],{3905:(e,n,t)=>{t.d(n,{Zo:()=>d,kt:()=>u});var a=t(67294);function l(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function i(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function r(e){for(var n=1;n=0||(l[t]=e[t]);return l}(e,n);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(l[t]=e[t])}return l}var p=a.createContext({}),m=function(e){var n=a.useContext(p),t=n;return e&&(t="function"==typeof e?e(n):r(r({},n),e)),t},d=function(e){var n=m(e.components);return a.createElement(p.Provider,{value:n},e.children)},s="mdxType",c={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},v=a.forwardRef((function(e,n){var t=e.components,l=e.mdxType,i=e.originalType,p=e.parentName,d=o(e,["components","mdxType","originalType","parentName"]),s=m(t),v=l,u=s["".concat(p,".").concat(v)]||s[v]||c[v]||i;return t?a.createElement(u,r(r({ref:n},d),{},{components:t})):a.createElement(u,r({ref:n},d))}));function u(e,n){var t=arguments,l=n&&n.mdxType;if("string"==typeof e||l){var i=t.length,r=new Array(i);r[0]=v;var o={};for(var p in n)hasOwnProperty.call(n,p)&&(o[p]=n[p]);o.originalType=e,o[s]="string"==typeof e?e:l,r[1]=o;for(var m=2;m{t.r(n),t.d(n,{contentTitle:()=>r,default:()=>s,frontMatter:()=>i,metadata:()=>o,toc:()=>p});var a=t(87462),l=(t(67294),t(3905));const i={title:"Variant IDs"},r=void 0,o={unversionedId:"core-functionality/variant-ids",id:"version-3.2.5/core-functionality/variant-ids",title:"Variant IDs",description:"Overview",source:"@site/versioned_docs/version-3.2.5/core-functionality/variant-ids.md",sourceDirName:"core-functionality",slug:"/core-functionality/variant-ids",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/core-functionality/variant-ids",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/core-functionality/variant-ids.md",tags:[],version:"3.2.5",frontMatter:{title:"Variant IDs"},sidebar:"version-3.2.5/docs",previous:{title:"Nirvana JSON File Format",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/file-formats/nirvana-json-file-format"},next:{title:"Gene Fusion Detection",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/core-functionality/gene-fusions"}},p=[{value:"Overview",id:"overview",children:[],level:2},{value:"SNV",id:"snv",children:[{value:"VCF Example",id:"vcf-example",children:[],level:4},{value:"Format",id:"format",children:[],level:4},{value:"VID Example",id:"vid-example",children:[],level:4}],level:2},{value:"Insertion",id:"insertion",children:[{value:"VCF Example",id:"vcf-example-1",children:[],level:4},{value:"Format",id:"format-1",children:[],level:4},{value:"VID Example",id:"vid-example-1",children:[],level:4}],level:2},{value:"Deletion",id:"deletion",children:[{value:"VCF Example",id:"vcf-example-2",children:[],level:4},{value:"Format",id:"format-2",children:[],level:4},{value:"VID Example",id:"vid-example-2",children:[],level:4}],level:2},{value:"Delins",id:"delins",children:[{value:"VCF Example",id:"vcf-example-3",children:[],level:4},{value:"Format",id:"format-3",children:[],level:4},{value:"VID Example",id:"vid-example-3",children:[],level:4}],level:2},{value:"MNV",id:"mnv",children:[{value:"VCF Example",id:"vcf-example-4",children:[],level:4},{value:"Format",id:"format-4",children:[],level:4},{value:"VID Example",id:"vid-example-4",children:[],level:4}],level:2},{value:"CNV",id:"cnv",children:[{value:"VCF Example",id:"vcf-example-5",children:[],level:4},{value:"Format",id:"format-5",children:[],level:4},{value:"VID Example",id:"vid-example-5",children:[],level:4}],level:2},{value:"Inversion (SV)",id:"inversion-sv",children:[{value:"VCF Example",id:"vcf-example-6",children:[],level:4},{value:"Format",id:"format-6",children:[],level:4},{value:"VID Example",id:"vid-example-6",children:[],level:4}],level:2},{value:"Translocation (SV)",id:"translocation-sv",children:[{value:"VCF Example",id:"vcf-example-7",children:[],level:4},{value:"Format",id:"format-7",children:[],level:4},{value:"VID Example",id:"vid-example-7",children:[],level:4}],level:2},{value:"Deletion (SV)",id:"deletion-sv",children:[{value:"VCF Example",id:"vcf-example-8",children:[],level:4},{value:"Format",id:"format-8",children:[],level:4},{value:"VID Example",id:"vid-example-8",children:[],level:4}],level:2},{value:"Insertion (SV)",id:"insertion-sv",children:[{value:"VCF Example",id:"vcf-example-9",children:[],level:4},{value:"Format",id:"format-9",children:[],level:4},{value:"VID Example",id:"vid-example-9",children:[],level:4}],level:2},{value:"Tandem Duplication (SV)",id:"tandem-duplication-sv",children:[{value:"VCF Example",id:"vcf-example-10",children:[],level:4},{value:"Format",id:"format-10",children:[],level:4},{value:"VID Example",id:"vid-example-10",children:[],level:4}],level:2}],m={toc:p},d="wrapper";function s(e){let{components:n,...t}=e;return(0,l.kt)(d,(0,a.Z)({},m,t,{components:n,mdxType:"MDXLayout"}),(0,l.kt)("h2",{id:"overview"},"Overview"),(0,l.kt)("p",null,"Many downstream tools use a variant identifier to store annotation results. "),(0,l.kt)("div",{className:"admonition admonition-warning alert alert--danger"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M5.05.31c.81 2.17.41 3.38-.52 4.31C3.55 5.67 1.98 6.45.9 7.98c-1.45 2.05-1.7 6.53 3.53 7.7-2.2-1.16-2.67-4.52-.3-6.61-.61 2.03.53 3.33 1.94 2.86 1.39-.47 2.3.53 2.27 1.67-.02.78-.31 1.44-1.13 1.81 3.42-.59 4.78-3.42 4.78-5.56 0-2.84-2.53-3.22-1.25-5.61-1.52.13-2.03 1.13-1.89 2.75.09 1.08-1.02 1.8-1.86 1.33-.67-.41-.66-1.19-.06-1.78C8.18 5.31 8.68 2.45 5.05.32L5.03.3l.02.01z"}))),"Deprecated")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"This initial variant ID (VID) scheme was designed to be parsimonious and was not meant to be used to reconstitute the original VCF variant. In later versions of Nirvana, we migrated to the identifier scheme used at the Broad Institute (with some extensions to handle structural variants)."))),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Conventions")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("ul",{parentName:"div"},(0,l.kt)("li",{parentName:"ul"},"all chromosomes use Ensembl style notation (i.e. 22 instead of chr22)"),(0,l.kt)("li",{parentName:"ul"},"for a reference variant (i.e. no alt allele), replace the period (.) with the reference base"),(0,l.kt)("li",{parentName:"ul"},"padding bases are used, neither the reference nor alternate allele can be empty"),(0,l.kt)("li",{parentName:"ul"},"some large variant callers lazily output N for the reference allele. If this is the case, replace it with the true reference base")))),(0,l.kt)("h2",{id:"snv"},"SNV"),(0,l.kt)("h4",{id:"vcf-example"},"VCF Example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 69224 . A C . . .\n")),(0,l.kt)("h4",{id:"format"},"Format"),(0,l.kt)("p",null,(0,l.kt)("inlineCode",{parentName:"p"},"chromosome"),":",(0,l.kt)("inlineCode",{parentName:"p"},"position"),":",(0,l.kt)("inlineCode",{parentName:"p"},"alternate allele")),(0,l.kt)("h4",{id:"vid-example"},"VID Example"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"1:69224:C"))),(0,l.kt)("h2",{id:"insertion"},"Insertion"),(0,l.kt)("h4",{id:"vcf-example-1"},"VCF Example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 69567 . A AT . . .\n")),(0,l.kt)("h4",{id:"format-1"},"Format"),(0,l.kt)("p",null,(0,l.kt)("inlineCode",{parentName:"p"},"chromosome"),":",(0,l.kt)("inlineCode",{parentName:"p"},"position after insertion"),":",(0,l.kt)("inlineCode",{parentName:"p"},"position before insertion"),":",(0,l.kt)("inlineCode",{parentName:"p"},"alternate allele OR MD5 hash")),(0,l.kt)("p",null,"If more than 32 bases are being inserted, the VID scheme uses an MD5 checksum instead"),(0,l.kt)("h4",{id:"vid-example-1"},"VID Example"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"1:69568:69567:T")),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"1:69568:69567:B9ECE18C950AFBFA6B0FDBFA4FF731D3"))),(0,l.kt)("h2",{id:"deletion"},"Deletion"),(0,l.kt)("h4",{id:"vcf-example-2"},"VCF Example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 136647 . GG G . . .\n")),(0,l.kt)("h4",{id:"format-2"},"Format"),(0,l.kt)("p",null,(0,l.kt)("inlineCode",{parentName:"p"},"chromosome"),":",(0,l.kt)("inlineCode",{parentName:"p"},"start position"),":",(0,l.kt)("inlineCode",{parentName:"p"},"end position")),(0,l.kt)("h4",{id:"vid-example-2"},"VID Example"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"1:136645:136645"))),(0,l.kt)("h2",{id:"delins"},"Delins"),(0,l.kt)("h4",{id:"vcf-example-3"},"VCF Example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 965025 . GCAGTGCATGGTGCTGTGAGATCAGCATGTGTG GTGCAGTGCATGGTGCTGTGAGATCAGCA . . .\n")),(0,l.kt)("h4",{id:"format-3"},"Format"),(0,l.kt)("p",null,(0,l.kt)("inlineCode",{parentName:"p"},"chromosome"),":",(0,l.kt)("inlineCode",{parentName:"p"},"start position"),":",(0,l.kt)("inlineCode",{parentName:"p"},"end position"),":",(0,l.kt)("inlineCode",{parentName:"p"},"inserted bases")),(0,l.kt)("p",null,"If more than 32 bases are being inserted, the VID scheme uses an MD5 checksum instead"),(0,l.kt)("h4",{id:"vid-example-3"},"VID Example"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"1:965026:965057:TGCAGTGCATGGTGCTGTGAGATCAGCA")),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"1:965026:965057:5DC27E17BE0B0F184325DC8654E34F1F"))),(0,l.kt)("h2",{id:"mnv"},"MNV"),(0,l.kt)("h4",{id:"vcf-example-4"},"VCF Example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 979210 . TGG TTT . . .\n")),(0,l.kt)("h4",{id:"format-4"},"Format"),(0,l.kt)("p",null,(0,l.kt)("inlineCode",{parentName:"p"},"chromosome"),":",(0,l.kt)("inlineCode",{parentName:"p"},"start position"),":",(0,l.kt)("inlineCode",{parentName:"p"},"end position"),":",(0,l.kt)("inlineCode",{parentName:"p"},"alternate allele")),(0,l.kt)("p",null,"If more than 32 bases are being inserted, the VID scheme uses an MD5 checksum instead"),(0,l.kt)("h4",{id:"vid-example-4"},"VID Example"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"1:979211:979212:TT")),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"1:979211:979212:DF1F3EDB9115ACB0A1E04209B7A9937B"))),(0,l.kt)("h2",{id:"cnv"},"CNV"),(0,l.kt)("h4",{id:"vcf-example-5"},"VCF Example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 854895 . N , . PASS SVTYPE=CNV;END=861879;CNVLEN=6984;CIPOS=-291,291;CIEND=-291,291 GT:RC:BC:CN:MCC:MCCQ:QS:FT:DQ 1/2:165.40:12:3:3:16.80:16.71:PASS:.\nchr1 814866 . N 4 q10;CLT10kb SVTYPE=CNV;END=824517 RC:BC:CN 214:7:4\n")),(0,l.kt)("h4",{id:"format-5"},"Format"),(0,l.kt)("p",null,(0,l.kt)("inlineCode",{parentName:"p"},"chromosome"),":",(0,l.kt)("inlineCode",{parentName:"p"},"start position"),":",(0,l.kt)("inlineCode",{parentName:"p"},"end position"),":",(0,l.kt)("inlineCode",{parentName:"p"},'copy number or "CNV"')),(0,l.kt)("h4",{id:"vid-example-5"},"VID Example"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"1:854896:861879:3")),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"1:814867:824517:CNV"))),(0,l.kt)("h2",{id:"inversion-sv"},"Inversion (SV)"),(0,l.kt)("h4",{id:"vcf-example-6"},"VCF Example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 17051724 . C 3070 MaxDepth END=234912187;SVTYPE=INV;SVLEN=217860463 GT:GQ:PR:SR 0/1:3070:77,69:84,76\n")),(0,l.kt)("h4",{id:"format-6"},"Format"),(0,l.kt)("p",null,(0,l.kt)("inlineCode",{parentName:"p"},"chromosome"),":",(0,l.kt)("inlineCode",{parentName:"p"},"start position"),":",(0,l.kt)("inlineCode",{parentName:"p"},"end position"),":",(0,l.kt)("inlineCode",{parentName:"p"},"Inverse")),(0,l.kt)("h4",{id:"vid-example-6"},"VID Example"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"1:17051725:234912187:Inverse"))),(0,l.kt)("h2",{id:"translocation-sv"},"Translocation (SV)"),(0,l.kt)("h4",{id:"vcf-example-7"},"VCF Example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 797265 . G G]chr8:245687] 55 PASS SVTYPE=BND;CIPOS=0,31 GT:GQ:PR:SR 0/1:55:39,6:20,3\n")),(0,l.kt)("h4",{id:"format-7"},"Format"),(0,l.kt)("p",null,(0,l.kt)("inlineCode",{parentName:"p"},"chromosome 1"),":",(0,l.kt)("inlineCode",{parentName:"p"},"breakpoint 1"),":",(0,l.kt)("inlineCode",{parentName:"p"},"orientation 1"),":",(0,l.kt)("inlineCode",{parentName:"p"},"chromosome 2"),":",(0,l.kt)("inlineCode",{parentName:"p"},"breakpoint 2"),":",(0,l.kt)("inlineCode",{parentName:"p"},"orientation 2")),(0,l.kt)("h4",{id:"vid-example-7"},"VID Example"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"1:797265:+:8:245687:-"))),(0,l.kt)("h2",{id:"deletion-sv"},"Deletion (SV)"),(0,l.kt)("h4",{id:"vcf-example-8"},"VCF Example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 2053194 . G 38 PASS END=2055480;SVTYPE=DEL;SVLEN=-2286;IMPRECISE;CIPOS=-143,144;CIEND=-102,102 GT:GQ:PR 0/1:38:3,5\n")),(0,l.kt)("h4",{id:"format-8"},"Format"),(0,l.kt)("p",null,(0,l.kt)("inlineCode",{parentName:"p"},"chromosome"),":",(0,l.kt)("inlineCode",{parentName:"p"},"start position"),":",(0,l.kt)("inlineCode",{parentName:"p"},"end position")),(0,l.kt)("h4",{id:"vid-example-8"},"VID Example"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"1:2053195:2055480"))),(0,l.kt)("h2",{id:"insertion-sv"},"Insertion (SV)"),(0,l.kt)("h4",{id:"vcf-example-9"},"VCF Example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 1925144 . G 1439 PASS END=1925144;SVTYPE=INS;CIPOS=0,14;CIEND=0,14 GT:GQ:PR:SR 1/1:72:2,7:0,33\n")),(0,l.kt)("h4",{id:"format-9"},"Format"),(0,l.kt)("p",null,(0,l.kt)("inlineCode",{parentName:"p"},"chromosome"),":",(0,l.kt)("inlineCode",{parentName:"p"},"start position"),":",(0,l.kt)("inlineCode",{parentName:"p"},"end position"),":INS"),(0,l.kt)("h4",{id:"vid-example-9"},"VID Example"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"1:1925145:1925144:INS"))),(0,l.kt)("h2",{id:"tandem-duplication-sv"},"Tandem Duplication (SV)"),(0,l.kt)("h4",{id:"vcf-example-10"},"VCF Example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 2454149 . G 976 MaxDepth END=2454244;SVTYPE=DUP;SVLEN=95;CIPOS=0,10;CIEND=0,10 GT:GQ:PR:SR 0/1:976:6,0:80,52\n")),(0,l.kt)("h4",{id:"format-10"},"Format"),(0,l.kt)("p",null,(0,l.kt)("inlineCode",{parentName:"p"},"chromosome"),":",(0,l.kt)("inlineCode",{parentName:"p"},"start position"),":",(0,l.kt)("inlineCode",{parentName:"p"},"end position"),":TDUP"),(0,l.kt)("h4",{id:"vid-example-10"},"VID Example"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"1:2454150:2454244:TDUP"))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/48476061.fbe2e48b.js b/assets/js/48476061.fbe2e48b.js deleted file mode 100644 index 7035214b..00000000 --- a/assets/js/48476061.fbe2e48b.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8762],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function o(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var p=r.createContext({}),u=function(e){var t=r.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},c=function(e){var t=u(e.components);return r.createElement(p.Provider,{value:t},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},s=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,l=e.originalType,p=e.parentName,c=i(e,["components","mdxType","originalType","parentName"]),d=u(n),s=a,f=d["".concat(p,".").concat(s)]||d[s]||m[s]||l;return n?r.createElement(f,o(o({ref:t},c),{},{components:n})):r.createElement(f,o({ref:t},c))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var l=n.length,o=new Array(l);o[0]=s;var i={};for(var p in t)hasOwnProperty.call(t,p)&&(i[p]=t[p]);i.originalType=e,i[d]="string"==typeof e?e:a,o[1]=i;for(var u=2;u{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var r=n(87462),a=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/topmed-json",id:"version-3.21/data-sources/topmed-json",title:"topmed-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/topmed-json.md",sourceDirName:"data-sources",slug:"/data-sources/topmed-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/topmed-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/topmed-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],u={toc:p},c="wrapper";function d(e){let{components:t,...n}=e;return(0,a.kt)(c,(0,r.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"topmed":{ \n "allAc":20,\n "allAn":125568,\n "allAf":0.000159,\n "allHc":0,\n "failedFilter":true\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAc"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"TOPMed allele count")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAn"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"TOPMed allele number. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAf"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"TOPMed allele frequency (computed by Nirvana)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allHc"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"TOPMed homozygous count")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,a.kt)("td",{parentName:"tr",align:null},"bool"),(0,a.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/48830c0f.d8f06f8b.js b/assets/js/48830c0f.d8f06f8b.js deleted file mode 100644 index 79b83255..00000000 --- a/assets/js/48830c0f.d8f06f8b.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[42,6923],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>u});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function r(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),m=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):r(r({},t),e)),n},p=function(e){var t=m(e.components);return a.createElement(s.Provider,{value:t},e.children)},d="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},h=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,o=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),d=m(n),h=i,u=d["".concat(s,".").concat(h)]||d[h]||c[h]||o;return n?a.createElement(u,r(r({ref:t},p),{},{components:n})):a.createElement(u,r({ref:t},p))}));function u(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var o=n.length,r=new Array(o);r[0]=h;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[d]="string"==typeof e?e:i,r[1]=l;for(var m=2;m{n.r(t),n.d(t,{contentTitle:()=>r,default:()=>d,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const o={},r=void 0,l={unversionedId:"data-sources/omim-json",id:"version-3.18/data-sources/omim-json",title:"omim-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/omim-json.md",sourceDirName:"data-sources",slug:"/data-sources/omim-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/omim-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/omim-json.md",tags:[],version:"3.18",frontMatter:{}},s=[{value:"Phenotype",id:"phenotype",children:[],level:4},{value:"Mapping",id:"mapping",children:[],level:4},{value:"Inheritance",id:"inheritance",children:[],level:4},{value:"Comments",id:"comments",children:[],level:4}],m={toc:s},p="wrapper";function d(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"omim":[ \n { \n "mimNumber":600678,\n "geneName":"MutS, E. coli, homolog of, 6",\n "description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",\n "phenotypes":[ \n { \n "mimNumber":614350,\n "phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",\n "description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal dominant"\n ]\n },\n { \n "mimNumber":608089,\n "phenotype":"Endometrial cancer, familial",\n "mapping":"molecular basis of the disorder is known"\n },\n { \n "mimNumber":276300,\n "phenotype":"Mismatch repair cancer syndrome",\n "description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal recessive"\n ],\n "comments" : [\n "contribute to susceptibility to multifactorial disorders or to susceptibility to infection",\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"},"OMIM ID for gene")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"geneName"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"gene name")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,i.kt)("td",{parentName:"tr",align:"left"},"object array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#phenotype"},"Phenotype entry below"))))),(0,i.kt)("h4",{id:"phenotype"},"Phenotype"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"mapping"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#mapping"},"possible values below"))),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"inheritance"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#inheritance"},"possible values below"))),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"comments"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#comments"},"possible values below"))))),(0,i.kt)("h4",{id:"mapping"},"Mapping"),(0,i.kt)("ol",null,(0,i.kt)("li",{parentName:"ol"},"disorder was positioned by mapping of the wild type gene"),(0,i.kt)("li",{parentName:"ol"},"disease phenotype itself was mapped"),(0,i.kt)("li",{parentName:"ol"},"molecular basis of the disorder is known"),(0,i.kt)("li",{parentName:"ol"},"disorder is a chromosome deletion or duplication syndrome")),(0,i.kt)("h4",{id:"inheritance"},"Inheritance"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"autosomal recessive"),(0,i.kt)("li",{parentName:"ul"},"autosomal dominant")),(0,i.kt)("h4",{id:"comments"},"Comments"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"contributes to the susceptibility to multifactorial disorders"),(0,i.kt)("li",{parentName:"ul"},"variations that lead to apparently abnormal laboratory test values"),(0,i.kt)("li",{parentName:"ul"},"unconfirmed mapping")))}d.isMDXComponent=!0},7071:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>c,frontMatter:()=>r,metadata:()=>s,toc:()=>m});var a=n(87462),i=(n(67294),n(3905)),o=n(43056);const r={title:"OMIM"},l=void 0,s={unversionedId:"data-sources/omim",id:"version-3.18/data-sources/omim",title:"OMIM",description:"Overview",source:"@site/versioned_docs/version-3.18/data-sources/omim.mdx",sourceDirName:"data-sources",slug:"/data-sources/omim",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/omim",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/omim.mdx",tags:[],version:"3.18",frontMatter:{title:"OMIM"},sidebar:"docs",previous:{title:"MITOMAP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/mitomap"},next:{title:"PhyloP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/phylop"}},m=[{value:"Overview",id:"overview",children:[],level:2},{value:"Parse OMIM data",id:"parse-omim-data",children:[{value:"mim2gene.txt",id:"mim2genetxt",children:[],level:3},{value:"OMIM API",id:"omim-api",children:[{value:"Mapping key to content",id:"mapping-key-to-content",children:[],level:4},{value:"Phenotype character to comment",id:"phenotype-character-to-comment",children:[],level:4}],level:3},{value:"Remove links in OMIM descriptions",id:"remove-links-in-omim-descriptions",children:[],level:3}],level:2},{value:"JSON output",id:"json-output",children:[],level:2},{value:"Building the supplementary files",id:"building-the-supplementary-files",children:[],level:2}],p={toc:m},d="wrapper";function c(e){let{components:t,...n}=e;return(0,i.kt)(d,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"OMIM is a comprehensive, authoritative compendium of human genes and genetic phenotypes that is freely available and updated daily."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publications")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Amberger JS, Bocchini CA, Scott AF, Hamosh A. OMIM.org: leveraging knowledge across phenotype-gene relationships. Nucleic Acids Res. 2019 Jan 8;47(D1):D1038-D1043. doi:10.1093/nar/gky1151. PMID: ",(0,i.kt)("a",{parentName:"p",href:"https://pubmed.ncbi.nlm.nih.gov/30445645/"},"30445645"),"."),(0,i.kt)("p",{parentName:"div"},"Amberger JS, Bocchini CA, Schiettecatte FJM, Scott AF, Hamosh A. OMIM.org: Online Mendelian Inheritance in Man (OMIM\xae), an online catalog of human genes and genetic disorders. Nucleic Acids Res. 2015 Jan;43(Database issue):D789-98. PMID: ",(0,i.kt)("a",{parentName:"p",href:"https://pubmed.ncbi.nlm.nih.gov/25428349/"},"25428349"),"."))),(0,i.kt)("h2",{id:"parse-omim-data"},"Parse OMIM data"),(0,i.kt)("p",null,"Nirvana uses gene symbols as the gene identifiers internally. To generate the OMIM database, we first map the MIM numbers, which are the primary identifiers used by OMIM, to gene symbols supported by Nirvana. Please note that there can be multiple MIM numbers mapped to one gene symbol. Only MIM numbers successfully mapped to a Nirvana gene symbol are further processed. The OMIM API is used to fetch all the information associated with a gene MIM number, except the gene symbols."),(0,i.kt)("h3",{id:"mim2genetxt"},"mim2gene.txt"),(0,i.kt)("p",null,"This mim2gene.txt (",(0,i.kt)("a",{parentName:"p",href:"http://omim.org/static/omim/data/mim2gene.txt"},"http://omim.org/static/omim/data/mim2gene.txt"),") file provides the mapping between MIM numbers and gene symbols. An example of this file is given below:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre"},"# MIM Number MIM Entry Type (see FAQ 1.3 at https://omim.org/help/faq) Entrez Gene ID (NCBI) Approved Gene Symbol (HGNC) Ensembl Gene ID (Ensembl)\n100050 predominantly phenotypes\n100070 phenotype 100329167\n100100 phenotype\n100200 predominantly phenotypes\n100300 phenotype\n100500 moved/removed\n100600 phenotype\n100640 gene 216 ALDH1A1 ENSG00000165092\n100650 gene/phenotype 217 ALDH2 ENSG00000111275\n100660 gene 218 ALDH3A1 ENSG00000108602\n100670 gene 219 ALDH1B1 ENSG00000137124\n100675 predominantly phenotypes\n100678 gene 39 ACAT2 ENSG00000120437\n")),(0,i.kt)("p",null,'The information in the "Entrez Gene ID (NCBI)", "Approved Gene Symbol (HGNC)" and "Ensembl Gene ID (Ensembl)" columns are used to find the proper gene symbol supported by Nirvana, which may or may not be the same as the gene symbol listed here.'),(0,i.kt)("h3",{id:"omim-api"},"OMIM API"),(0,i.kt)("p",null,"Nirvana retrieves the OMIM annotations from the ",(0,i.kt)("a",{parentName:"p",href:"https://www.omim.org/api"},"OMIM API"),' JSON responses. The "entry" handler is used to fetch all the annotations associated with a given OMIM gene. A sample JSON response from the API is provided there.'),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{\n "omim": {\n "version": "1.0",\n "entryList": [\n {\n "entry": {\n "prefix": "*",\n "mimNumber": 100640,\n "status": "live",\n "titles": {\n "preferredTitle": "ALDEHYDE DEHYDROGENASE 1 FAMILY, MEMBER A1; ALDH1A1",\n "alternativeTitles": "ALDEHYDE DEHYDROGENASE 1; ALDH1;;\\nACETALDEHYDE DEHYDROGENASE 1;;\\nALDH, LIVER CYTOSOLIC;;\\nRETINAL DEHYDROGENASE 1; RALDH1"\n },\n "textSectionList": [\n {\n "textSection": {\n "textSectionName": "description",\n "textSectionTitle": "Description",\n "textSectionContent": "The ALDH1A1 gene encodes a liver cytosolic isoform of acetaldehyde dehydrogenase ({EC 1.2.1.3}), an enzyme involved in the major pathway of alcohol metabolism after alcohol dehydrogenase (ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650}), variation in which has been implicated in different responses to alcohol ingestion.\\n\\nALDH1 is associated with a low Km for NAD, a high Km for acetaldehyde, and is strongly inactivated by disulfiram. ALDH2 is associated with a high Km for NAD, and low Km for acetaldehyde, and is insensitive to inhibition by disulfiram ({4:Hsu et al., 1985})."\n }\n }\n ],\n "geneMap": {\n "sequenceID": 7709,\n "chromosome": 9,\n "chromosomeSymbol": "9",\n "chromosomeSort": 225,\n "chromosomeLocationStart": 72900670,\n "chromosomeLocationEnd": 72953052,\n "transcript": "ENST00000297785.7",\n "cytoLocation": "9q21",\n "computedCytoLocation": "9q21.13",\n "mimNumber": 100640,\n "geneSymbols": "ALDH1A1",\n "geneName": "Aldehyde dehydrogenase-1 family, member A1, soluble",\n "mappingMethod": "REa, A",\n "confidence": "P",\n "mouseGeneSymbol": "Aldh1a1",\n "mouseMgiID": "MGI:1353450",\n "geneInheritance": null\n },\n "externalLinks": {\n "geneIDs": "216",\n "hgncID": "402",\n "ensemblIDs": "ENSG00000165092,ENST00000297785.8",\n "approvedGeneSymbols": "ALDH1A1",\n "ncbiReferenceSequences": "1519246465",\n "proteinSequences": "194378740,211947843,2183299,178400,119582947,119582948,178372,40807656,194375548,30582681,209402710,4262707,194739599,4261625,178394,261487497,16306661,21361176,32815082,118495,62089228",\n "uniGenes": "Hs.76392",\n "swissProtIDs": "P00352",\n "decipherGene": false,\n "umlsIDs": "C1412333",\n "gtr": true,\n "cmgGene": false,\n "keggPathways": true,\n "gwasCatalog": false,\n\n }\n }\n },\n {\n "entry": {\n "prefix": "*",\n "mimNumber": 102560,\n "status": "live",\n "titles": {\n "preferredTitle": "ACTIN, GAMMA-1; ACTG1",\n "alternativeTitles": "ACTIN, GAMMA; ACTG;;\\nCYTOSKELETAL GAMMA-ACTIN;;\\nACTIN, CYTOPLASMIC, 2"\n },\n "textSectionList": [\n {\n "textSection": {\n "textSectionName": "description",\n "textSectionTitle": "Description",\n "textSectionContent": "Actins are a family of highly conserved cytoskeletal proteins that play fundamental roles in nearly all aspects of eukaryotic cell biology. The ability of a cell to divide, move, endocytose, generate contractile force, and maintain shape is reliant upon functional actin-based structures. Actin isoforms are grouped according to expression patterns: muscle actins predominate in striated and smooth muscle (e.g., ACTA1, {102610}, and ACTA2, {102620}, respectively), whereas the 2 cytoplasmic nonmuscle actins, gamma-actin (ACTG1) and beta-actin (ACTB; {102630}), are found in all cells ({13:Sonnemann et al., 2006})."\n }\n }\n ],\n "geneMap": {\n "sequenceID": 13666,\n "chromosome": 17,\n "chromosomeSymbol": "17",\n "chromosomeSort": 947,\n "chromosomeLocationStart": 81509970,\n "chromosomeLocationEnd": 81512798,\n "transcript": "ENST00000331925.7",\n "cytoLocation": "17q25.3",\n "computedCytoLocation": "17q25.3",\n "mimNumber": 102560,\n "geneSymbols": "ACTG1, DFNA20, DFNA26, BRWS2",\n "geneName": "Actin, gamma-1",\n "mappingMethod": "REa, A, Fd",\n "confidence": "C",\n "mouseGeneSymbol": "Actg1",\n "mouseMgiID": "MGI:87906",\n "geneInheritance": null,\n "phenotypeMapList": [\n {\n "phenotypeMap": {\n "mimNumber": 102560,\n "phenotype": "Baraitser-Winter syndrome 2",\n "phenotypeMimNumber": 614583,\n "phenotypicSeriesNumber": "PS243310",\n "phenotypeMappingKey": 3,\n "phenotypeInheritance": "Autosomal dominant"\n }\n },\n {\n "phenotypeMap": {\n "mimNumber": 102560,\n "phenotype": "Deafness, autosomal dominant 20/26",\n "phenotypeMimNumber": 604717,\n "phenotypicSeriesNumber": "PS124900",\n "phenotypeMappingKey": 3,\n "phenotypeInheritance": "Autosomal dominant"\n }\n }\n ]\n }\n }\n }\n ]\n }\n}\n')),(0,i.kt)("p",null,"Content from the OMIM API JSON response is reorganized as shown in the Nirvana ",(0,i.kt)("a",{parentName:"p",href:"#json-output"},"JSON Output")),(0,i.kt)("p",null,"Mappings between the Nirvana JSON output and OMIM JSON API are listed in the table below:"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Nirvana JSON key chain"),(0,i.kt)("th",{parentName:"tr",align:"left"},"OMIM API JSON key chain"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:mimNumber")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:geneName"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:geneName")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:textSectionList:textSection:textSectionContent")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:mimNumber")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:phenotype"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:textSectionList:textSection:textSectionContent")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:mapping"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotypeMappingKey (",(0,i.kt)("a",{parentName:"td",href:"#mapping-key-to-content"},"see mapping below"),")")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:inheritances"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotypeInheritance")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:comments"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotype (",(0,i.kt)("a",{parentName:"td",href:"#phenotype-character-to-comment"},"see mapping below"),")")))),(0,i.kt)("h4",{id:"mapping-key-to-content"},"Mapping key to content"),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"1")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"disorder was positioned by mapping of the wild type gene"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"2")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"disease phenotype itself was mapped"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"3")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"molecular basis of the disorder is known"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"4")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"disorder is a chromosome deletion or duplication syndrome"),(0,i.kt)("br",null)),(0,i.kt)("h4",{id:"phenotype-character-to-comment"},"Phenotype character to comment"),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"?")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"unconfirmed or possibly spurious mapping"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"["),"/",(0,i.kt)("inlineCode",{parentName:"p"},"]")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"nondiseases"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"{"),"/",(0,i.kt)("inlineCode",{parentName:"p"},"}")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"contribute to susceptibility to multifactorial disorders or to susceptibility to infection"),(0,i.kt)("br",null)),(0,i.kt)("h3",{id:"remove-links-in-omim-descriptions"},"Remove links in OMIM descriptions"),(0,i.kt)("p",null,"There are different types of link in the OMIM description section. For example, in above JSON response, we have the description of MIM entry 100640:"),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"The ALDH1A1 gene encodes a liver cytosolic isoform of acetaldehyde dehydrogenase ({EC 1.2.1.3}), an enzyme involved in the major pathway of alcohol metabolism after alcohol dehydrogenase (ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650}), variation in which has been implicated in different responses to alcohol ingestion.\\n\\nALDH1 is associated with a low Km for NAD, a high Km for acetaldehyde, and is strongly inactivated by disulfiram. ALDH2 is associated with a high Km for NAD, and low Km for acetaldehyde, and is insensitive to inhibition by disulfiram ({4:Hsu et al., 1985}).")),(0,i.kt)("p",null,"As the descriptions will be shown as plain text, we remove the curry brackets surrounding links and try to make the text still readable with minimal modifications. Briefly:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},'Links referring to another MIM entry (e.g. {100650}) will be removed. Any word(s) specifically associated with the removed link will also be removed. For example, "(ADH, see {103700})" will become "(ADH)" after the process.'),(0,i.kt)("li",{parentName:"ul"},'Links referring to a literature reference will be processed to remove the internal index and curry brackets. For example, "{4:Hsu et al., 1985}" becomes "Hsu et al., 1985".'),(0,i.kt)("li",{parentName:"ul"},'All the other links will simple have their curry brackets removed. For example, "{EC 1.2.1.3}" becomes "EC 1.2.1.3".'),(0,i.kt)("li",{parentName:"ul"},'If the content within a pair of parentheses becomes empty after being processed, the parentheses need to be removed as well and its surrounding white spaces should be properly processed. For example, "ALDH2 ({100650})," will become "ALDH2,".')),(0,i.kt)("p",null,"Here is a list of examples about how the description section supposed to be processed:"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Original text"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Processed text"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"({516030}, {516040}, and {516050})"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., D1, {168461}; D2, {123833}; D3, {123834})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., D1; D2; D3)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(desmocollins; see DSC2, {125645})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(desmocollins; see DSC2)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., see {102700}, {300755})"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(ADH). See also liver mitochondrial ALDH2")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(see, e.g., CACNA1A; {601011})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(see, e.g., CACNA1A)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., GSTA1; {138359}), mu (e.g., {138350})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., GSTA1), mu")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(NFKB; see {164011})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(NFKB)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(see ISGF3G, {147574})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(see ISGF3G)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(DCK; {EC 2.7.1.74}; {125450})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(DCK; EC 2.7.1.74)")))),(0,i.kt)("h2",{id:"json-output"},"JSON output"),(0,i.kt)(o.default,{mdxType:"JSON"}),(0,i.kt)("h2",{id:"building-the-supplementary-files"},"Building the supplementary files"),(0,i.kt)("p",null,"The first step in builing the OMIM ",(0,i.kt)("inlineCode",{parentName:"p"},".nga")," files is to use the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's subcommand ",(0,i.kt)("inlineCode",{parentName:"p"},"downloadOMIM")," to download the necessary data. In order to download the data the user must possess an API key obtained from OMIM. This key has to be set as the environment variable ",(0,i.kt)("em",{parentName:"p"},"OmimApiKey"),"."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"export OmimApiKey=\ndotnet NirvanaBuild/SAUtils.dll downloadOMIM\n---------------------------------------------------------------------------\nSAUtils (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll downloadomim [options]\nDownload the OMIM gene annotation data\n\nOPTIONS:\n --uga, -u universal gene archive path\n --ref, -r input reference filename\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\ndotnet NirvanaBuild/SAUtils.dll downloadOMIM --ref References/7/Homo_sapiens.GRCh38.Nirvana.dat --uga Cache/27/UGA.tsv.gz --out ExternalDataSources/OMIM/2021-06-14\n---------------------------------------------------------------------------\nSAUtils (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0\n---------------------------------------------------------------------------\n\nUnable to resolve gene symbol conflict for CD300H: Ensembl: [ENSG00000284690]: AC079325.2, Entrez Gene: [100130520]: LOC100130520\nUnable to resolve gene symbol conflict for STRIT1: Ensembl: [ENSG00000240045]: DWORF, Entrez Gene: [100507537]: LOC100507537\nUnable to resolve gene symbol conflict for WAKMAR2: Ensembl: [ENSG00000237499]: AL357060.2, Entrez Gene: [100130476]: LOC100130476\nUnable to resolve gene symbol conflict for PERCC1: Ensembl: [ENSG00000284395]: AL032819.3, Entrez Gene: [105371045]: LOC105371045\nUnable to resolve gene symbol conflict for LASTR: Ensembl: [ENSG00000242147]: AL365356.5, Entrez Gene: [105376382]: LOC105376382\nUnable to resolve gene symbol conflict for PRANCR: Ensembl: [ENSG00000257815]: LINC01481, Entrez Gene: [101928062]: LOC101928062\nUnable to resolve gene symbol conflict for THORLNC: Ensembl: [ENSG00000226856]: AC093901.1, Entrez Gene: [100506797]: LOC100506797\nGene Symbol Update Statistics\n============================================\n# of gene symbols already up-to-date: 15,952\n# of gene symbols updated: 330\n# of genes where both IDs are null: 0\n# of gene symbols not in cache: 148\n# of resolved gene symbol conflicts: 15\n# of unresolved gene symbol conflicts: 7\n\nTime: 00:02:38.2\n")),(0,i.kt)("p",null,"Once the download has succeeded, the ",(0,i.kt)("inlineCode",{parentName:"p"},"nga")," files can be produced using the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's subcommand ",(0,i.kt)("inlineCode",{parentName:"p"},"omim"),"."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet NirvanaBuild/SAUtils.dll omim\n---------------------------------------------------------------------------\nSAUtils (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll omim [options]\nCreates a gene annotation database from OMIM data\n\nOPTIONS:\n --m2g, -m MimToGeneSymbol tsv file\n --json, -j OMIM entry json file\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\n\ndotnet NirvanaBuild/SAUtils.dll omim --m2g ExternalDataSources/OMIM/2021-06-14/MimToGeneSymbol.tsv --json ExternalDataSources/OMIM/2021-06-14/MimEntries.json.gz --out SupplementaryDatabase/63/\n---------------------------------------------------------------------------\nSAUtils (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0\n---------------------------------------------------------------------------\n\n\nTime: 00:00:04.5\n")))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/488ebf8f.7fbb1cea.js b/assets/js/488ebf8f.7fbb1cea.js deleted file mode 100644 index c9fd5726..00000000 --- a/assets/js/488ebf8f.7fbb1cea.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[156,3130],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>g});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},p=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},c="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),c=d(n),m=r,g=c["".concat(s,".").concat(m)]||c[m]||u[m]||o;return n?a.createElement(g,i(i({ref:t},p),{},{components:n})):a.createElement(g,i({ref:t},p))}));function g(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[c]="string"==typeof e?e:r,i[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>c,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/decipher-json",id:"version-3.18/data-sources/decipher-json",title:"decipher-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/decipher-json.md",sourceDirName:"data-sources",slug:"/data-sources/decipher-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/decipher-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/decipher-json.md",tags:[],version:"3.18",frontMatter:{}},s=[],d={toc:s},p="wrapper";function c(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"decipher":[\n {\n "chromosome":"1",\n "begin":13516,\n "end":91073,\n "numDeletions":27,\n "deletionFrequency":0.675,\n "numDuplications":27,\n "duplicationFrequency":0.675,\n "sampleSize":40,\n "reciprocalOverlap": 0.27555,\n "annotationOverlap": 0.5901\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"numDeletions"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"# of observed deletions")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"deletionFrequency"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"deletion frequency")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"numDuplications"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"# of observed duplications")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"duplicationFrequency"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"duplication frequency")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sampleSize"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"total # of samples")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% annotation overlap")))))}c.isMDXComponent=!0},57429:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>i,metadata:()=>s,toc:()=>d});var a=n(87462),r=(n(67294),n(3905)),o=n(22529);const i={title:"DECIPHER"},l=void 0,s={unversionedId:"data-sources/decipher",id:"version-3.18/data-sources/decipher",title:"DECIPHER",description:"Overview",source:"@site/versioned_docs/version-3.18/data-sources/decipher.mdx",sourceDirName:"data-sources",slug:"/data-sources/decipher",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/decipher",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/decipher.mdx",tags:[],version:"3.18",frontMatter:{title:"DECIPHER"},sidebar:"docs",previous:{title:"dbSNP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/dbsnp"},next:{title:"FusionCatcher",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/fusioncatcher"}},d=[{value:"Overview",id:"overview",children:[{value:"TSV Extraction",id:"tsv-extraction",children:[{value:"Parsing",id:"parsing",children:[],level:4}],level:3}],level:2},{value:"Download URL",id:"download-url",children:[{value:"JSON output",id:"json-output",children:[],level:3}],level:2}],p={toc:d},c="wrapper";function u(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://www.deciphergenomics.org/"},"DECIPHER")," (DatabasE of genomiC varIation and Phenotype in Humans using Ensembl Resources) is an interactive web-based database which incorporates a suite of tools designed to aid the interpretation of genomic variants."),(0,r.kt)("p",null,"DECIPHER enhances clinical diagnosis by retrieving information from a variety of bioinformatics resources relevant to the variant found in the patient. The patient's variant is displayed in the context of both normal variation and pathogenic variation reported at that locus thereby facilitating interpretation."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"DECIPHER: Database of Chromosomal Imbalance and Phenotype in Humans using Ensembl Resources. Firth, H.V. et al., 2009. Am.J.Hum.Genet 84, 524-533 (DOI: dx.doi.org/10/1016/j.ajhg.2009.03.010)"))),(0,r.kt)("h3",{id:"tsv-extraction"},"TSV Extraction"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#population_cnv_id chr start end deletion_observations deletion_frequency deletion_standard_error duplication_observations duplication_frequency duplication_standard_error observations frequency standard_error type sample_size study\n1 1 10529 177368 0 0 1 3 0.075 0.555277708 3 0.075 0.555277708 1 40 42M calls\n2 1 13516 91073 0 0 1 27 0.675 0.109713431 27 0.675 0.109713431 1 40 42M calls\n3 1 18888 35451 0 0 1 2 0.002366864 0.706269473 2 0.002366864 0.706269473 1 845 DDD\n")),(0,r.kt)("h4",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"We parse the DECIPHER tsv file and extract the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"chr"),(0,r.kt)("li",{parentName:"ul"},"start"),(0,r.kt)("li",{parentName:"ul"},"end"),(0,r.kt)("li",{parentName:"ul"},"deletion_observations"),(0,r.kt)("li",{parentName:"ul"},"deletion_frequency"),(0,r.kt)("li",{parentName:"ul"},"duplication_observations"),(0,r.kt)("li",{parentName:"ul"},"duplication_frequency"),(0,r.kt)("li",{parentName:"ul"},"sample_size")),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://www.deciphergenomics.org/files/downloads/population_cnv_grch38.txt.gz"},"https://www.deciphergenomics.org/files/downloads/population_cnv_grch38.txt.gz"),"\n",(0,r.kt)("a",{parentName:"p",href:"https://www.deciphergenomics.org/files/downloads/population_cnv_grch37.txt.gz"},"https://www.deciphergenomics.org/files/downloads/population_cnv_grch37.txt.gz")),(0,r.kt)("h3",{id:"json-output"},"JSON output"),(0,r.kt)(o.default,{mdxType:"JSON"}))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/49488eae.180a7e0d.js b/assets/js/49488eae.180a7e0d.js deleted file mode 100644 index 7d712abd..00000000 --- a/assets/js/49488eae.180a7e0d.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4368],{3905:(t,e,n)=>{n.d(e,{Zo:()=>c,kt:()=>k});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function i(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function o(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var p=r.createContext({}),d=function(t){var e=r.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},c=function(t){var e=d(t.components);return r.createElement(p.Provider,{value:e},t.children)},m="mdxType",s={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},u=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,i=t.originalType,p=t.parentName,c=l(t,["components","mdxType","originalType","parentName"]),m=d(n),u=a,k=m["".concat(p,".").concat(u)]||m[u]||s[u]||i;return n?r.createElement(k,o(o({ref:e},c),{},{components:n})):r.createElement(k,o({ref:e},c))}));function k(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var p in e)hasOwnProperty.call(e,p)&&(l[p]=e[p]);l.originalType=t,l[m]="string"==typeof t?t:a,o[1]=l;for(var d=2;d{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>m,frontMatter:()=>i,metadata:()=>l,toc:()=>p});var r=n(87462),a=(n(67294),n(3905));const i={title:"Dependencies"},o=void 0,l={unversionedId:"introduction/dependencies",id:"version-3.17/introduction/dependencies",title:"Dependencies",description:"All of the following dependencies have been included in this repository.",source:"@site/versioned_docs/version-3.17/introduction/dependencies.md",sourceDirName:"introduction",slug:"/introduction/dependencies",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/introduction/dependencies",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/introduction/dependencies.md",tags:[],version:"3.17",frontMatter:{title:"Dependencies"},sidebar:"version-3.17/docs",previous:{title:"Introduction",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/"},next:{title:"Getting Started",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/introduction/getting-started"}},p=[],d={toc:p},c="wrapper";function m(t){let{components:e,...n}=t;return(0,a.kt)(c,(0,r.Z)({},d,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("p",null,"All of the following dependencies have been included in this repository."),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Name"),(0,a.kt)("th",{parentName:"tr",align:"center"},"License"),(0,a.kt)("th",{parentName:"tr",align:null},"Usage"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/aws/aws-extensions-for-dotnet-cli"},"Amazon.Lambda")),(0,a.kt)("td",{parentName:"tr",align:"center"},"Apache"),(0,a.kt)("td",{parentName:"tr",align:null},"AWS extensions for .NET CLI")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/aws/aws-sdk-net/"},"AWSSDK")),(0,a.kt)("td",{parentName:"tr",align:"center"},"Apache"),(0,a.kt)("td",{parentName:"tr",align:null},"AWS Lambda, S3, SNS support")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://www.newtonsoft.com/json"},"Json.NET")),(0,a.kt)("td",{parentName:"tr",align:"center"},"MIT"),(0,a.kt)("td",{parentName:"tr",align:null},"JASIX utility")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/ebiggers/libdeflate"},"libdeflate")),(0,a.kt)("td",{parentName:"tr",align:"center"},"MIT"),(0,a.kt)("td",{parentName:"tr",align:null},"BlockCompression library")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/moq/moq4"},"Moq")),(0,a.kt)("td",{parentName:"tr",align:"center"},"BSD"),(0,a.kt)("td",{parentName:"tr",align:null},"Mocking framework for unit tests")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"http://www.ndesk.org/Options"},"NDesk.Options")),(0,a.kt)("td",{parentName:"tr",align:"center"},"MIT/X11"),(0,a.kt)("td",{parentName:"tr",align:null},"CommandLine library")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/xunit/xunit"},"xUnit")),(0,a.kt)("td",{parentName:"tr",align:"center"},"Apache"),(0,a.kt)("td",{parentName:"tr",align:null},"Unit testing framework")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/Dead2/zlib-ng"},"zlib-ng")),(0,a.kt)("td",{parentName:"tr",align:"center"},"zlib"),(0,a.kt)("td",{parentName:"tr",align:null},"BlockCompression library")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/facebook/zstd"},"zstd")),(0,a.kt)("td",{parentName:"tr",align:"center"},"BSD"),(0,a.kt)("td",{parentName:"tr",align:null},"BlockCompression library")))))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/5ad3cfa1.63d986aa.js b/assets/js/494b7fcc.1203f844.js similarity index 64% rename from assets/js/5ad3cfa1.63d986aa.js rename to assets/js/494b7fcc.1203f844.js index 1e5279f2..80bfb1fe 100644 --- a/assets/js/5ad3cfa1.63d986aa.js +++ b/assets/js/494b7fcc.1203f844.js @@ -1 +1 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[986],{3905:(t,e,r)=>{r.d(e,{Zo:()=>m,kt:()=>f});var n=r(67294);function a(t,e,r){return e in t?Object.defineProperty(t,e,{value:r,enumerable:!0,configurable:!0,writable:!0}):t[e]=r,t}function o(t,e){var r=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),r.push.apply(r,n)}return r}function i(t){for(var e=1;e=0||(a[r]=t[r]);return a}(t,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,r)&&(a[r]=t[r])}return a}var p=n.createContext({}),c=function(t){var e=n.useContext(p),r=e;return t&&(r="function"==typeof t?t(e):i(i({},e),t)),r},m=function(t){var e=c(t.components);return n.createElement(p.Provider,{value:e},t.children)},s="mdxType",u={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},d=n.forwardRef((function(t,e){var r=t.components,a=t.mdxType,o=t.originalType,p=t.parentName,m=l(t,["components","mdxType","originalType","parentName"]),s=c(r),d=a,f=s["".concat(p,".").concat(d)]||s[d]||u[d]||o;return r?n.createElement(f,i(i({ref:e},m),{},{components:r})):n.createElement(f,i({ref:e},m))}));function f(t,e){var r=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var o=r.length,i=new Array(o);i[0]=d;var l={};for(var p in e)hasOwnProperty.call(e,p)&&(l[p]=e[p]);l.originalType=t,l[s]="string"==typeof t?t:a,i[1]=l;for(var c=2;c{r.r(e),r.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>o,metadata:()=>l,toc:()=>p});var n=r(87462),a=(r(67294),r(3905));const o={},i=void 0,l={unversionedId:"data-sources/mitomap-structural-variants-json",id:"version-3.21/data-sources/mitomap-structural-variants-json",title:"mitomap-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/mitomap-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/mitomap-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/mitomap-structural-variants-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],c={toc:p},m="wrapper";function s(t){let{components:e,...r}=t;return(0,a.kt)(m,(0,n.Z)({},c,r,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "chromosome":"MT",\n "begin":3166,\n "end":14152,\n "variantType":"deletion",\n "reciprocalOverlap":0.18068,\n "annotationOverlap":0.42405\n }\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,a.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"end"),(0,a.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"reciprocalOverlap"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"annotationOverlap"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")))))}s.isMDXComponent=!0}}]); \ No newline at end of file +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8462],{3905:(t,e,r)=>{r.d(e,{Zo:()=>m,kt:()=>f});var n=r(7294);function a(t,e,r){return e in t?Object.defineProperty(t,e,{value:r,enumerable:!0,configurable:!0,writable:!0}):t[e]=r,t}function o(t,e){var r=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),r.push.apply(r,n)}return r}function i(t){for(var e=1;e=0||(a[r]=t[r]);return a}(t,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,r)&&(a[r]=t[r])}return a}var c=n.createContext({}),p=function(t){var e=n.useContext(c),r=e;return t&&(r="function"==typeof t?t(e):i(i({},e),t)),r},m=function(t){var e=p(t.components);return n.createElement(c.Provider,{value:e},t.children)},s="mdxType",u={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},d=n.forwardRef((function(t,e){var r=t.components,a=t.mdxType,o=t.originalType,c=t.parentName,m=l(t,["components","mdxType","originalType","parentName"]),s=p(r),d=a,f=s["".concat(c,".").concat(d)]||s[d]||u[d]||o;return r?n.createElement(f,i(i({ref:e},m),{},{components:r})):n.createElement(f,i({ref:e},m))}));function f(t,e){var r=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var o=r.length,i=new Array(o);i[0]=d;var l={};for(var c in e)hasOwnProperty.call(e,c)&&(l[c]=e[c]);l.originalType=t,l[s]="string"==typeof t?t:a,i[1]=l;for(var p=2;p{r.r(e),r.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>o,metadata:()=>l,toc:()=>c});var n=r(7462),a=(r(7294),r(3905));const o={},i=void 0,l={unversionedId:"data-sources/mitomap-structural-variants-json",id:"data-sources/mitomap-structural-variants-json",title:"mitomap-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/mitomap-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/mitomap-structural-variants-json.md",tags:[],version:"current",frontMatter:{}},c=[],p={toc:c},m="wrapper";function s(t){let{components:e,...r}=t;return(0,a.kt)(m,(0,n.Z)({},p,r,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "chromosome":"MT",\n "begin":3166,\n "end":14152,\n "variantType":"deletion",\n "reciprocalOverlap":0.18068,\n "annotationOverlap":0.42405\n }\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,a.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"end"),(0,a.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"reciprocalOverlap"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"annotationOverlap"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/494b7fcc.c5f45e59.js b/assets/js/494b7fcc.c5f45e59.js deleted file mode 100644 index 6c51c606..00000000 --- a/assets/js/494b7fcc.c5f45e59.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8462],{3905:(t,e,r)=>{r.d(e,{Zo:()=>m,kt:()=>f});var n=r(67294);function a(t,e,r){return e in t?Object.defineProperty(t,e,{value:r,enumerable:!0,configurable:!0,writable:!0}):t[e]=r,t}function o(t,e){var r=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),r.push.apply(r,n)}return r}function i(t){for(var e=1;e=0||(a[r]=t[r]);return a}(t,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,r)&&(a[r]=t[r])}return a}var c=n.createContext({}),p=function(t){var e=n.useContext(c),r=e;return t&&(r="function"==typeof t?t(e):i(i({},e),t)),r},m=function(t){var e=p(t.components);return n.createElement(c.Provider,{value:e},t.children)},s="mdxType",u={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},d=n.forwardRef((function(t,e){var r=t.components,a=t.mdxType,o=t.originalType,c=t.parentName,m=l(t,["components","mdxType","originalType","parentName"]),s=p(r),d=a,f=s["".concat(c,".").concat(d)]||s[d]||u[d]||o;return r?n.createElement(f,i(i({ref:e},m),{},{components:r})):n.createElement(f,i({ref:e},m))}));function f(t,e){var r=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var o=r.length,i=new Array(o);i[0]=d;var l={};for(var c in e)hasOwnProperty.call(e,c)&&(l[c]=e[c]);l.originalType=t,l[s]="string"==typeof t?t:a,i[1]=l;for(var p=2;p{r.r(e),r.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>o,metadata:()=>l,toc:()=>c});var n=r(87462),a=(r(67294),r(3905));const o={},i=void 0,l={unversionedId:"data-sources/mitomap-structural-variants-json",id:"data-sources/mitomap-structural-variants-json",title:"mitomap-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/mitomap-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/mitomap-structural-variants-json.md",tags:[],version:"current",frontMatter:{}},c=[],p={toc:c},m="wrapper";function s(t){let{components:e,...r}=t;return(0,a.kt)(m,(0,n.Z)({},p,r,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "chromosome":"MT",\n "begin":3166,\n "end":14152,\n "variantType":"deletion",\n "reciprocalOverlap":0.18068,\n "annotationOverlap":0.42405\n }\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,a.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"end"),(0,a.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"reciprocalOverlap"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"annotationOverlap"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/4ba9c6a2.a2109074.js b/assets/js/4ba9c6a2.a2109074.js deleted file mode 100644 index 3f9f3828..00000000 --- a/assets/js/4ba9c6a2.a2109074.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2027,7857],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>u});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),m=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},p=function(e){var t=m(e.components);return a.createElement(s.Provider,{value:t},e.children)},d="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},h=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),d=m(n),h=i,u=d["".concat(s,".").concat(h)]||d[h]||c[h]||r;return n?a.createElement(u,o(o({ref:t},p),{},{components:n})):a.createElement(u,o({ref:t},p))}));function u(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,o=new Array(r);o[0]=h;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[d]="string"==typeof e?e:i,o[1]=l;for(var m=2;m{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={},o=void 0,l={unversionedId:"data-sources/omim-json",id:"version-3.14/data-sources/omim-json",title:"omim-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/omim-json.md",sourceDirName:"data-sources",slug:"/data-sources/omim-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/omim-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/omim-json.md",tags:[],version:"3.14",frontMatter:{}},s=[{value:"Phenotype",id:"phenotype",children:[],level:4},{value:"Mapping",id:"mapping",children:[],level:4},{value:"Inheritance",id:"inheritance",children:[],level:4},{value:"Comments",id:"comments",children:[],level:4}],m={toc:s},p="wrapper";function d(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"omim":[ \n { \n "mimNumber":600678,\n "geneName":"MutS, E. coli, homolog of, 6",\n "description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",\n "phenotypes":[ \n { \n "mimNumber":614350,\n "phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",\n "description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal dominant"\n ]\n },\n { \n "mimNumber":608089,\n "phenotype":"Endometrial cancer, familial",\n "mapping":"molecular basis of the disorder is known"\n },\n { \n "mimNumber":276300,\n "phenotype":"Mismatch repair cancer syndrome",\n "description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal recessive"\n ],\n "comments" : [\n "contribute to susceptibility to multifactorial disorders or to susceptibility to infection",\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"},"OMIM ID for gene")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"geneName"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"gene name")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,i.kt)("td",{parentName:"tr",align:"left"},"object array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#phenotype"},"Phenotype entry below"))))),(0,i.kt)("h4",{id:"phenotype"},"Phenotype"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"mapping"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#mapping"},"possible values below"))),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"inheritance"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#inheritance"},"possible values below"))),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"comments"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#comments"},"possible values below"))))),(0,i.kt)("h4",{id:"mapping"},"Mapping"),(0,i.kt)("ol",null,(0,i.kt)("li",{parentName:"ol"},"disorder was positioned by mapping of the wild type gene"),(0,i.kt)("li",{parentName:"ol"},"disease phenotype itself was mapped"),(0,i.kt)("li",{parentName:"ol"},"molecular basis of the disorder is known"),(0,i.kt)("li",{parentName:"ol"},"disorder is a chromosome deletion or duplication syndrome")),(0,i.kt)("h4",{id:"inheritance"},"Inheritance"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"autosomal recessive"),(0,i.kt)("li",{parentName:"ul"},"autosomal dominant")),(0,i.kt)("h4",{id:"comments"},"Comments"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"contributes to the susceptibility to multifactorial disorders"),(0,i.kt)("li",{parentName:"ul"},"variations that lead to apparently abnormal laboratory test values"),(0,i.kt)("li",{parentName:"ul"},"unconfirmed mapping")))}d.isMDXComponent=!0},26464:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>c,frontMatter:()=>o,metadata:()=>s,toc:()=>m});var a=n(87462),i=(n(67294),n(3905)),r=n(27968);const o={title:"OMIM"},l=void 0,s={unversionedId:"data-sources/omim",id:"version-3.14/data-sources/omim",title:"OMIM",description:"Overview",source:"@site/versioned_docs/version-3.14/data-sources/omim.mdx",sourceDirName:"data-sources",slug:"/data-sources/omim",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/omim",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/omim.mdx",tags:[],version:"3.14",frontMatter:{title:"OMIM"},sidebar:"version-3.14/docs",previous:{title:"MITOMAP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/mitomap"},next:{title:"Primate AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/primate-ai"}},m=[{value:"Overview",id:"overview",children:[],level:2},{value:"Parse OMIM data",id:"parse-omim-data",children:[{value:"mim2gene.txt",id:"mim2genetxt",children:[],level:3},{value:"OMIM API",id:"omim-api",children:[{value:"Mapping key to content",id:"mapping-key-to-content",children:[],level:4},{value:"Phenotype character to comment",id:"phenotype-character-to-comment",children:[],level:4}],level:3},{value:"Remove links in OMIM descriptions",id:"remove-links-in-omim-descriptions",children:[],level:3}],level:2},{value:"JSON output",id:"json-output",children:[],level:2}],p={toc:m},d="wrapper";function c(e){let{components:t,...n}=e;return(0,i.kt)(d,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"OMIM is a comprehensive, authoritative compendium of human genes and genetic phenotypes that is freely available and updated daily."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publications")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Amberger JS, Bocchini CA, Scott AF, Hamosh A. OMIM.org: leveraging knowledge across phenotype-gene relationships. Nucleic Acids Res. 2019 Jan 8;47(D1):D1038-D1043. doi:10.1093/nar/gky1151. PMID: ",(0,i.kt)("a",{parentName:"p",href:"https://pubmed.ncbi.nlm.nih.gov/30445645/"},"30445645"),"."),(0,i.kt)("p",{parentName:"div"},"Amberger JS, Bocchini CA, Schiettecatte FJM, Scott AF, Hamosh A. OMIM.org: Online Mendelian Inheritance in Man (OMIM\xae), an online catalog of human genes and genetic disorders. Nucleic Acids Res. 2015 Jan;43(Database issue):D789-98. PMID: ",(0,i.kt)("a",{parentName:"p",href:"https://pubmed.ncbi.nlm.nih.gov/25428349/"},"25428349"),"."))),(0,i.kt)("h2",{id:"parse-omim-data"},"Parse OMIM data"),(0,i.kt)("p",null,"Nirvana uses gene symbols as the gene identifiers internally. To generate the OMIM database, we first map the MIM numbers, which are the primary identifiers used by OMIM, to gene symbols supported by Nirvana. Please note that there can be multiple MIM numbers mapped to one gene symbol. Only MIM numbers successfully mapped to a Nirvana gene symbol are further processed. The OMIM API is used to fetch all the information associated with a gene MIM number, except the gene symbols."),(0,i.kt)("h3",{id:"mim2genetxt"},"mim2gene.txt"),(0,i.kt)("p",null,"This mim2gene.txt (",(0,i.kt)("a",{parentName:"p",href:"http://omim.org/static/omim/data/mim2gene.txt"},"http://omim.org/static/omim/data/mim2gene.txt"),") file provides the mapping between MIM numbers and gene symbols. An example of this file is given below:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre"},"# MIM Number MIM Entry Type (see FAQ 1.3 at https://omim.org/help/faq) Entrez Gene ID (NCBI) Approved Gene Symbol (HGNC) Ensembl Gene ID (Ensembl)\n100050 predominantly phenotypes\n100070 phenotype 100329167\n100100 phenotype\n100200 predominantly phenotypes\n100300 phenotype\n100500 moved/removed\n100600 phenotype\n100640 gene 216 ALDH1A1 ENSG00000165092\n100650 gene/phenotype 217 ALDH2 ENSG00000111275\n100660 gene 218 ALDH3A1 ENSG00000108602\n100670 gene 219 ALDH1B1 ENSG00000137124\n100675 predominantly phenotypes\n100678 gene 39 ACAT2 ENSG00000120437\n")),(0,i.kt)("p",null,'The information in the "Entrez Gene ID (NCBI)", "Approved Gene Symbol (HGNC)" and "Ensembl Gene ID (Ensembl)" columns are used to find the proper gene symbol supported by Nirvana, which may or may not be the same as the gene symbol listed here.'),(0,i.kt)("h3",{id:"omim-api"},"OMIM API"),(0,i.kt)("p",null,"Nirvana retrieves the OMIM annotations from the ",(0,i.kt)("a",{parentName:"p",href:"https://www.omim.org/api"},"OMIM API"),' JSON responses. The "entry" handler is used to fetch all the annotations associated with a given OMIM gene. A sample JSON response from the API is provided there.'),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{\n "omim": {\n "version": "1.0",\n "entryList": [\n {\n "entry": {\n "prefix": "*",\n "mimNumber": 100640,\n "status": "live",\n "titles": {\n "preferredTitle": "ALDEHYDE DEHYDROGENASE 1 FAMILY, MEMBER A1; ALDH1A1",\n "alternativeTitles": "ALDEHYDE DEHYDROGENASE 1; ALDH1;;\\nACETALDEHYDE DEHYDROGENASE 1;;\\nALDH, LIVER CYTOSOLIC;;\\nRETINAL DEHYDROGENASE 1; RALDH1"\n },\n "textSectionList": [\n {\n "textSection": {\n "textSectionName": "description",\n "textSectionTitle": "Description",\n "textSectionContent": "The ALDH1A1 gene encodes a liver cytosolic isoform of acetaldehyde dehydrogenase ({EC 1.2.1.3}), an enzyme involved in the major pathway of alcohol metabolism after alcohol dehydrogenase (ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650}), variation in which has been implicated in different responses to alcohol ingestion.\\n\\nALDH1 is associated with a low Km for NAD, a high Km for acetaldehyde, and is strongly inactivated by disulfiram. ALDH2 is associated with a high Km for NAD, and low Km for acetaldehyde, and is insensitive to inhibition by disulfiram ({4:Hsu et al., 1985})."\n }\n }\n ],\n "geneMap": {\n "sequenceID": 7709,\n "chromosome": 9,\n "chromosomeSymbol": "9",\n "chromosomeSort": 225,\n "chromosomeLocationStart": 72900670,\n "chromosomeLocationEnd": 72953052,\n "transcript": "ENST00000297785.7",\n "cytoLocation": "9q21",\n "computedCytoLocation": "9q21.13",\n "mimNumber": 100640,\n "geneSymbols": "ALDH1A1",\n "geneName": "Aldehyde dehydrogenase-1 family, member A1, soluble",\n "mappingMethod": "REa, A",\n "confidence": "P",\n "mouseGeneSymbol": "Aldh1a1",\n "mouseMgiID": "MGI:1353450",\n "geneInheritance": null\n },\n "externalLinks": {\n "geneIDs": "216",\n "hgncID": "402",\n "ensemblIDs": "ENSG00000165092,ENST00000297785.8",\n "approvedGeneSymbols": "ALDH1A1",\n "ncbiReferenceSequences": "1519246465",\n "proteinSequences": "194378740,211947843,2183299,178400,119582947,119582948,178372,40807656,194375548,30582681,209402710,4262707,194739599,4261625,178394,261487497,16306661,21361176,32815082,118495,62089228",\n "uniGenes": "Hs.76392",\n "swissProtIDs": "P00352",\n "decipherGene": false,\n "umlsIDs": "C1412333",\n "gtr": true,\n "cmgGene": false,\n "keggPathways": true,\n "gwasCatalog": false,\n\n }\n }\n },\n {\n "entry": {\n "prefix": "*",\n "mimNumber": 102560,\n "status": "live",\n "titles": {\n "preferredTitle": "ACTIN, GAMMA-1; ACTG1",\n "alternativeTitles": "ACTIN, GAMMA; ACTG;;\\nCYTOSKELETAL GAMMA-ACTIN;;\\nACTIN, CYTOPLASMIC, 2"\n },\n "textSectionList": [\n {\n "textSection": {\n "textSectionName": "description",\n "textSectionTitle": "Description",\n "textSectionContent": "Actins are a family of highly conserved cytoskeletal proteins that play fundamental roles in nearly all aspects of eukaryotic cell biology. The ability of a cell to divide, move, endocytose, generate contractile force, and maintain shape is reliant upon functional actin-based structures. Actin isoforms are grouped according to expression patterns: muscle actins predominate in striated and smooth muscle (e.g., ACTA1, {102610}, and ACTA2, {102620}, respectively), whereas the 2 cytoplasmic nonmuscle actins, gamma-actin (ACTG1) and beta-actin (ACTB; {102630}), are found in all cells ({13:Sonnemann et al., 2006})."\n }\n }\n ],\n "geneMap": {\n "sequenceID": 13666,\n "chromosome": 17,\n "chromosomeSymbol": "17",\n "chromosomeSort": 947,\n "chromosomeLocationStart": 81509970,\n "chromosomeLocationEnd": 81512798,\n "transcript": "ENST00000331925.7",\n "cytoLocation": "17q25.3",\n "computedCytoLocation": "17q25.3",\n "mimNumber": 102560,\n "geneSymbols": "ACTG1, DFNA20, DFNA26, BRWS2",\n "geneName": "Actin, gamma-1",\n "mappingMethod": "REa, A, Fd",\n "confidence": "C",\n "mouseGeneSymbol": "Actg1",\n "mouseMgiID": "MGI:87906",\n "geneInheritance": null,\n "phenotypeMapList": [\n {\n "phenotypeMap": {\n "mimNumber": 102560,\n "phenotype": "Baraitser-Winter syndrome 2",\n "phenotypeMimNumber": 614583,\n "phenotypicSeriesNumber": "PS243310",\n "phenotypeMappingKey": 3,\n "phenotypeInheritance": "Autosomal dominant"\n }\n },\n {\n "phenotypeMap": {\n "mimNumber": 102560,\n "phenotype": "Deafness, autosomal dominant 20/26",\n "phenotypeMimNumber": 604717,\n "phenotypicSeriesNumber": "PS124900",\n "phenotypeMappingKey": 3,\n "phenotypeInheritance": "Autosomal dominant"\n }\n }\n ]\n }\n }\n }\n ]\n }\n}\n')),(0,i.kt)("p",null,"Content from the OMIM API JSON response is reorganized as shown in the Nirvana ",(0,i.kt)("a",{parentName:"p",href:"#json-output"},"JSON Output")),(0,i.kt)("p",null,"Mappings between the Nirvana JSON output and OMIM JSON API are listed in the table below:"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Nirvana JSON key chain"),(0,i.kt)("th",{parentName:"tr",align:"left"},"OMIM API JSON key chain"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:mimNumber")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:geneName"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:geneName")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:textSectionList:textSection:textSectionContent")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:mimNumber")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:phenotype"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:textSectionList:textSection:textSectionContent")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:mapping"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotypeMappingKey (",(0,i.kt)("a",{parentName:"td",href:"#mapping-key-to-content"},"see mapping below"),")")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:inheritances"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotypeInheritance")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:comments"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotype (",(0,i.kt)("a",{parentName:"td",href:"#phenotype-character-to-comment"},"see mapping below"),")")))),(0,i.kt)("h4",{id:"mapping-key-to-content"},"Mapping key to content"),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"1")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"disorder was positioned by mapping of the wild type gene"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"2")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"disease phenotype itself was mapped"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"3")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"molecular basis of the disorder is known"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"4")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"disorder is a chromosome deletion or duplication syndrome"),(0,i.kt)("br",null)),(0,i.kt)("h4",{id:"phenotype-character-to-comment"},"Phenotype character to comment"),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"?")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"unconfirmed or possibly spurious mapping"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"["),"/",(0,i.kt)("inlineCode",{parentName:"p"},"]")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"nondiseases"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"{"),"/",(0,i.kt)("inlineCode",{parentName:"p"},"}")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"contribute to susceptibility to multifactorial disorders or to susceptibility to infection"),(0,i.kt)("br",null)),(0,i.kt)("h3",{id:"remove-links-in-omim-descriptions"},"Remove links in OMIM descriptions"),(0,i.kt)("p",null,"There are different types of link in the OMIM description section. For example, in above JSON response, we have the description of MIM entry 100640:"),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"The ALDH1A1 gene encodes a liver cytosolic isoform of acetaldehyde dehydrogenase ({EC 1.2.1.3}), an enzyme involved in the major pathway of alcohol metabolism after alcohol dehydrogenase (ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650}), variation in which has been implicated in different responses to alcohol ingestion.\\n\\nALDH1 is associated with a low Km for NAD, a high Km for acetaldehyde, and is strongly inactivated by disulfiram. ALDH2 is associated with a high Km for NAD, and low Km for acetaldehyde, and is insensitive to inhibition by disulfiram ({4:Hsu et al., 1985}).")),(0,i.kt)("p",null,"As the descriptions will be shown as plain text, we remove the curry brackets surrounding links and try to make the text still readable with minimal modifications. Briefly:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},'Links referring to another MIM entry (e.g. {100650}) will be removed. Any word(s) specifically associated with the removed link will also be removed. For example, "(ADH, see {103700})" will become "(ADH)" after the process.'),(0,i.kt)("li",{parentName:"ul"},'Links referring to a literature reference will be processed to remove the internal index and curry brackets. For example, "{4:Hsu et al., 1985}" becomes "Hsu et al., 1985".'),(0,i.kt)("li",{parentName:"ul"},'All the other links will simple have their curry brackets removed. For example, "{EC 1.2.1.3}" becomes "EC 1.2.1.3".'),(0,i.kt)("li",{parentName:"ul"},'If the content within a pair of parentheses becomes empty after being processed, the parentheses need to be removed as well and its surrounding white spaces should be properly processed. For example, "ALDH2 ({100650})," will become "ALDH2,".')),(0,i.kt)("p",null,"Here is a list of examples about how the description section supposed to be processed:"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Original text"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Processed text"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"({516030}, {516040}, and {516050})"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., D1, {168461}; D2, {123833}; D3, {123834})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., D1; D2; D3)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(desmocollins; see DSC2, {125645})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(desmocollins; see DSC2)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., see {102700}, {300755})"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(ADH). See also liver mitochondrial ALDH2")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(see, e.g., CACNA1A; {601011})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(see, e.g., CACNA1A)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., GSTA1; {138359}), mu (e.g., {138350})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., GSTA1), mu")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(NFKB; see {164011})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(NFKB)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(see ISGF3G, {147574})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(see ISGF3G)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(DCK; {EC 2.7.1.74}; {125450})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(DCK; EC 2.7.1.74)")))),(0,i.kt)("h2",{id:"json-output"},"JSON output"),(0,i.kt)(r.default,{mdxType:"JSON"}))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/4bfcd97d.ee571746.js b/assets/js/4bfcd97d.ee571746.js deleted file mode 100644 index 4c0718f3..00000000 --- a/assets/js/4bfcd97d.ee571746.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1155],{3905:(e,t,n)=>{n.d(t,{Zo:()=>s,kt:()=>f});var r=n(67294);function o(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function a(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}var i=r.createContext({}),p=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},s=function(e){var t=p(e.components);return r.createElement(i.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,o=e.mdxType,a=e.originalType,i=e.parentName,s=c(e,["components","mdxType","originalType","parentName"]),u=p(n),d=o,f=u["".concat(i,".").concat(d)]||u[d]||m[d]||a;return n?r.createElement(f,l(l({ref:t},s),{},{components:n})):r.createElement(f,l({ref:t},s))}));function f(e,t){var n=arguments,o=t&&t.mdxType;if("string"==typeof e||o){var a=n.length,l=new Array(a);l[0]=d;var c={};for(var i in t)hasOwnProperty.call(t,i)&&(c[i]=t[i]);c.originalType=e,c[u]="string"==typeof e?e:o,l[1]=c;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>a,metadata:()=>c,toc:()=>i});var r=n(87462),o=(n(67294),n(3905));const a={},l=void 0,c={unversionedId:"data-sources/phylop-json",id:"version-3.14/data-sources/phylop-json",title:"phylop-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/phylop-json.md",sourceDirName:"data-sources",slug:"/data-sources/phylop-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/phylop-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/phylop-json.md",tags:[],version:"3.14",frontMatter:{}},i=[],p={toc:i},s="wrapper";function u(e){let{components:t,...n}=e;return(0,o.kt)(s,(0,r.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-json"},'"variants":[\n {\n "vid":"2:48010488:A",\n "chromosome":"chr2",\n "begin":48010488,\n "end":48010488,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "phylopScore":0.459\n }\n] \n')),(0,o.kt)("table",null,(0,o.kt)("thead",{parentName:"table"},(0,o.kt)("tr",{parentName:"thead"},(0,o.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,o.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,o.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,o.kt)("tbody",{parentName:"table"},(0,o.kt)("tr",{parentName:"tbody"},(0,o.kt)("td",{parentName:"tr",align:"left"},"phylopScore"),(0,o.kt)("td",{parentName:"tr",align:"center"},"float"),(0,o.kt)("td",{parentName:"tr",align:"left"},"range: -14.08 to 6.424")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/4c015796.5206e8fe.js b/assets/js/4c015796.5206e8fe.js deleted file mode 100644 index 7cbaa803..00000000 --- a/assets/js/4c015796.5206e8fe.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6357],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>u});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},p=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},m="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},h=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),m=d(n),h=i,u=m["".concat(s,".").concat(h)]||m[h]||c[h]||r;return n?a.createElement(u,o(o({ref:t},p),{},{components:n})):a.createElement(u,o({ref:t},p))}));function u(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,o=new Array(r);o[0]=h;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[m]="string"==typeof e?e:i,o[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>m,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={title:"Mitochondrial Heteroplasmy"},o=void 0,l={unversionedId:"data-sources/mito-heteroplasmy",id:"version-3.14/data-sources/mito-heteroplasmy",title:"Mitochondrial Heteroplasmy",description:"Overview",source:"@site/versioned_docs/version-3.14/data-sources/mito-heteroplasmy.md",sourceDirName:"data-sources",slug:"/data-sources/mito-heteroplasmy",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/mito-heteroplasmy",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/mito-heteroplasmy.md",tags:[],version:"3.14",frontMatter:{title:"Mitochondrial Heteroplasmy"},sidebar:"version-3.14/docs",previous:{title:"gnomAD",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/gnomad"},next:{title:"MITOMAP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/mitomap"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"JSON File",id:"json-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Binning VRF Data",id:"binning-vrf-data",children:[],level:4},{value:"Pre-processing the Data",id:"pre-processing-the-data",children:[],level:4},{value:"Algorithm",id:"algorithm",children:[],level:4}],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],d={toc:s},p="wrapper";function m(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"Mitochondrial Heteroplasmy is an aggregate population data set that characterizes the amount of heteroplasmy observed for each variant. The latest version of this data set is based on re-processed 1000 Genomes Project data using the Illumina DRAGEN pipeline."),(0,i.kt)("h2",{id:"json-file"},"JSON File"),(0,i.kt)("h3",{id:"example"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{\n "T:C":{\n "ad":[\n 1,\n 1,\n 1,\n 1,\n 1,\n 1\n ],\n "allele_type":"alt",\n "vrf":[\n 0.002369668246445498,\n 0.0024937655860349127,\n 0.0016129032258064516,\n 0.0025188916876574307,\n 0.0022935779816513763,\n 0.002008032128514056\n ],\n "vrf_stats":{\n "kurtosis":38.889891511122556,\n "max":0.0025188916876574307,\n "mean":5.4052190471990743e-05,\n "min":0.0,\n "nobs":246,\n "skewness":6.346664692283075,\n "stdev":0.0003461416264750575,\n "variance":1.1981402557879823e-07\n }\n }\n}\n\n')),(0,i.kt)("h3",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"From the JSON file, we're mainly interested in the following keys:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"variant")," (i.e. ",(0,i.kt)("inlineCode",{parentName:"li"},"T:C"),")"),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"ad")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"vrf")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"nobs")," (number of observations)")),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Adjusting for null observations")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The ",(0,i.kt)("inlineCode",{parentName:"p"},"nobs")," value indicates how many observations were made. Ideally this would have been represented in the ",(0,i.kt)("inlineCode",{parentName:"p"},"ad")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"vrf")," arrays, but it's left as an exercise for the reader."))),(0,i.kt)("h4",{id:"binning-vrf-data"},"Binning VRF Data"),(0,i.kt)("p",null,"The ",(0,i.kt)("inlineCode",{parentName:"p"},"vrf")," (variant read frequency) array in the JSON object above is paired with with the ",(0,i.kt)("inlineCode",{parentName:"p"},"ad")," array (allele depths) shown above."),(0,i.kt)("p",null,"The data in the JSON object has a crazy number of significant digits. This means that as the number of samples increase, this array will grow. To make this more future-proof, Nirvana bins everything according to 0.1% increments."),(0,i.kt)("p",null,"With the binned data, we end up having 775 distinct ",(0,i.kt)("inlineCode",{parentName:"p"},"vrf")," values in the entire JSON file. This also means that the variant with the largest number of VRFs would originally have 246 entries, but due to binning this will decrease to 143."),(0,i.kt)("h4",{id:"pre-processing-the-data"},"Pre-processing the Data"),(0,i.kt)("p",null,"The JSON file is converted into a small TSV file that is ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/Illumina/Nirvana/blob/main/MitoHeteroplasmy/Resources/MitoHeteroplasmy.tsv.gz"},"embedded in Nirvana"),". Here is an example of the TSV file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS REF ALT VRF_BINS VRF_COUNTS\nchrM 1 G . 0.981,0.987,0.988,0.989,0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.998,0.999 1,2,2,4,7,8,11,19,43,60,48,64,499,1736\nchrM 2 A . 0.981,0.987,0.988,0.989,0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.998,0.999 1,2,2,4,7,8,11,19,43,60,48,64,499,1736\n")),(0,i.kt)("h4",{id:"algorithm"},"Algorithm"),(0,i.kt)("p",null,"Nirvana will calculate mitochondrial heteroplasmy data for every sample in the VCF. Using the computed VRF for each sample, we compute where in the empirical mitochondrial heteroplasmy distribution that VRF occurs and express that as a percentile."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Percentiles")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Nirvana uses the ",(0,i.kt)("a",{parentName:"p",href:"https://en.wikipedia.org/wiki/Percentile"},"statistical definition of percentile")," (indicating the value below which a given percentage of observations in a group of observations falls). Unless the sample's VRF is higher than all the VRFs represented in the distribution, the range will be [0, 1)."))),(0,i.kt)("h2",{id:"download-url"},"Download URL"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Unavailable")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The original data set is only available internally at Illumina at the moment."))),(0,i.kt)("h2",{id:"json-output"},"JSON Output"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{14-17}","{14-17}":!0},'"samples":[\n {\n "genotype":"0/1",\n "variantFrequencies":[\n 0.333,\n 0.5\n ],\n ],\n "alleleDepths":[\n 10,\n 20,\n 30\n ],\n "heteroplasmyPercentile":[\n 23.13,\n 12.65\n ]\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"heteroplasmyPercentile"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"one percentile for each variant frequency (each alternate allele)")))))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/4c1c9794.b4c2c7eb.js b/assets/js/4c1c9794.b4c2c7eb.js deleted file mode 100644 index 7d8c269c..00000000 --- a/assets/js/4c1c9794.b4c2c7eb.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1262,357],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>v});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var l=a.createContext({}),c=function(e){var t=a.useContext(l),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},p=function(e){var t=c(e.components);return a.createElement(l.Provider,{value:t},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,l=e.parentName,p=s(e,["components","mdxType","originalType","parentName"]),d=c(n),u=r,v=d["".concat(l,".").concat(u)]||d[u]||m[u]||i;return n?a.createElement(v,o(o({ref:t},p),{},{components:n})):a.createElement(v,o({ref:t},p))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var s={};for(var l in t)hasOwnProperty.call(t,l)&&(s[l]=t[l]);s.originalType=e,s[d]="string"==typeof e?e:r,o[1]=s;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>s,toc:()=>l});var a=n(87462),r=(n(67294),n(3905));const i={},o=void 0,s={unversionedId:"data-sources/primate-ai-json",id:"version-3.14/data-sources/primate-ai-json",title:"primate-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/primate-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/primate-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/primate-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/primate-ai-json.md",tags:[],version:"3.14",frontMatter:{}},l=[],c={toc:l},p="wrapper";function d(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI":[\n {\n "hgnc":"TP53",\n "scorePercentile":0.3,\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))))}d.isMDXComponent=!0},94870:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>s,default:()=>m,frontMatter:()=>o,metadata:()=>l,toc:()=>c});var a=n(87462),r=(n(67294),n(3905)),i=n(39038);const o={title:"Primate AI"},s=void 0,l={unversionedId:"data-sources/primate-ai",id:"version-3.14/data-sources/primate-ai",title:"Primate AI",description:"Overview",source:"@site/versioned_docs/version-3.14/data-sources/primate-ai.mdx",sourceDirName:"data-sources",slug:"/data-sources/primate-ai",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/primate-ai",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/primate-ai.mdx",tags:[],version:"3.14",frontMatter:{title:"Primate AI"},sidebar:"version-3.14/docs",previous:{title:"OMIM",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/omim"},next:{title:"PhyloP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/phylop"}},c=[{value:"Overview",id:"overview",children:[],level:2},{value:"TSV File",id:"tsv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Pre-processing",id:"pre-processing",children:[{value:"Converting UCSC IDs",id:"converting-ucsc-ids",children:[],level:3},{value:"Running the Pre-Processor",id:"running-the-pre-processor",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],p={toc:c},d="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"Primate AI is a deep residual neural network for classifying the pathogenicity of missense mutations. The method is described in the publication:"),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Sundaram, L., Gao, H., Padigepati, S.R. et al. Predicting the clinical impact of human mutation with deep neural networks. ",(0,r.kt)("em",{parentName:"p"},"Nat Genet")," ",(0,r.kt)("strong",{parentName:"p"},"50"),", 1161\u20131170 (2018). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1038/s41588-018-0167-z"},"https://doi.org/10.1038/s41588-018-0167-z")))),(0,r.kt)("h2",{id:"tsv-file"},"TSV File"),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr pos ref alt refAA altAA strand_1pos_0neg trinucleotide_context UCSC_gene ExAC_coverage primateDL_score\nchr10 1046704 C T R C 1 CCG uc001ift.3 45.49 0.849114537239\nchr10 1046704 C G R G 1 CCG uc001ift.3 45.49 0.795686006546\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the TSV file, we're mainly interested in the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"chr")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"ref")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"alt")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"primateDL_score"))),(0,r.kt)("p",null,"We also use ",(0,r.kt)("inlineCode",{parentName:"p"},"UCSC_gene")," to filter out variants that don't have matching gene models in Nirvana."),(0,r.kt)("h2",{id:"pre-processing"},"Pre-processing"),(0,r.kt)("h3",{id:"converting-ucsc-ids"},"Converting UCSC IDs"),(0,r.kt)("p",null,"Primate AI only provides UCSC IDs. As an initial pre-processing step, we'll need to convert these to either Entrez or Ensembl Gene IDs."),(0,r.kt)("p",null,"The following queries are used to download the conversions from UCSC:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-bash"},'mysql -h genome-mysql.soe.ucsc.edu -u genome -A -P 3306 \\\n -e "select * FROM knownToLocusLink;" hg19 > ucsc_locuslink.tsv\n\nmysql -h genome-mysql.soe.ucsc.edu -u genome -A -P 3306 \\\n -e "select knownToEnsembl.name, knownToEnsembl.value, ensGene.name2 FROM knownToEnsembl, ensGene WHERE knownToEnsembl.value = ensGene.name;" \\\n hg19 > ucsc_ensembl.tsv\n')),(0,r.kt)("h3",{id:"running-the-pre-processor"},"Running the Pre-Processor"),(0,r.kt)("p",null,"The Primate AI pre-processor can be run as follows:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet PrimateAiPreProcessor.dll UGA_develop.tsv PrimateAI_scores_v0.2.tsv.gz \\\n ucsc_locuslink.tsv ucsc_ensembl.tsv PrimateAI_0.2_GRCh37.tsv.gz\n")),(0,r.kt)("p",null,"During conversion, 0.5% of the UCSC Ids cannot be converted to either Entrez or Ensembl gene IDs. Once the gene IDs have been acquired, we check to see which are available in Nirvana."),(0,r.kt)("p",null,"The following Entrez Gene IDs were not found:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"399753\n401980\n504189\n504191\n100293534\n")),(0,r.kt)("p",null,"Here is the output from the pre-processor:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"- loading UCSC to Entrez Gene ID dictionary... 73,432 genes loaded.\n- loading UCSC to Ensembl Gene ID dictionary... 76,178 genes loaded.\n- loading UGA gene ID to gene dictionary... 103,277 genes loaded.\n- parsing Primate AI variants... 70,121,953 variants parsed.\n \n# variants with unknown gene ID: 27,253 / 70,121,953\n# genes with unknown gene ID: 109 / 19,614\n \n# variants not in UGA: 2,036 / 70,121,953\n# genes not in UGA: 6 / 19,614\n")),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"The Primate AI data set provides raw scores, but the scores are biased according to gene context. I.e. a 0.4 means something different in ",(0,r.kt)("inlineCode",{parentName:"p"},"TP53")," than it does in ",(0,r.kt)("inlineCode",{parentName:"p"},"KRAS"),"."),(0,r.kt)("p",{parentName:"div"},"As a result, the Primate AI team provided guidance on aggregating these scores and presenting them as percentiles with respect to the associated gene. According to their research, the 25",(0,r.kt)("sup",null,"th")," percentile is a good proxy for benign variants and the 75",(0,r.kt)("sup",null,"th")," percentile is a good proxy for pathogenic variants."))),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://basespace.illumina.com/s/cPgCSmecvhb4"},"https://basespace.illumina.com/s/cPgCSmecvhb4")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(i.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/4d2a7d8e.150708a6.js b/assets/js/4d2a7d8e.150708a6.js deleted file mode 100644 index 91c65499..00000000 --- a/assets/js/4d2a7d8e.150708a6.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1932,5337,9151,3425,1790,9383,8660],{3905:(t,e,a)=>{a.d(e,{Zo:()=>d,kt:()=>k});var n=a(67294);function r(t,e,a){return e in t?Object.defineProperty(t,e,{value:a,enumerable:!0,configurable:!0,writable:!0}):t[e]=a,t}function l(t,e){var a=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),a.push.apply(a,n)}return a}function i(t){for(var e=1;e=0||(r[a]=t[a]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,a)&&(r[a]=t[a])}return r}var p=n.createContext({}),m=function(t){var e=n.useContext(p),a=e;return t&&(a="function"==typeof t?t(e):i(i({},e),t)),a},d=function(t){var e=m(t.components);return n.createElement(p.Provider,{value:e},t.children)},s="mdxType",g={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},N=n.forwardRef((function(t,e){var a=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,d=o(t,["components","mdxType","originalType","parentName"]),s=m(a),N=r,k=s["".concat(p,".").concat(N)]||s[N]||g[N]||l;return a?n.createElement(k,i(i({ref:e},d),{},{components:a})):n.createElement(k,i({ref:e},d))}));function k(t,e){var a=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=a.length,i=new Array(l);i[0]=N;var o={};for(var p in e)hasOwnProperty.call(e,p)&&(o[p]=e[p]);o.originalType=t,o[s]="string"==typeof t?t:r,i[1]=o;for(var m=2;m{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/1000Genomes-snv-json",id:"version-3.2.5/data-sources/1000Genomes-snv-json",title:"1000Genomes-snv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.2.5/data-sources/1000Genomes-snv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-snv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/1000Genomes-snv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/data-sources/1000Genomes-snv-json.md",tags:[],version:"3.2.5",frontMatter:{}},p=[],m={toc:p},d="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(d,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":{\n "allAf":0.200879,\n "afrAf":0.210287,\n "amrAf":0.139769,\n "easAf":0.275794,\n "eurAf":0.181909,\n "sasAf":0.173824,\n "allAn":5008,\n "afrAn":1322,\n "amrAn":694,\n "easAn":1008,\n "eurAn":1006,\n "sasAn":978,\n "allAc":1006,\n "afrAc":278,\n "amrAc":97,\n "easAc":278,\n "eurAc":183,\n "sasAc":170\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the African super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the African super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the Ad Mixed American super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the Ad Mixed American super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the East Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the East Asian super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the European super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the European super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the South Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the South Asian super population. Non-zero integer.")))))}s.isMDXComponent=!0},50092:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/1000Genomes-sv-json",id:"version-3.2.5/data-sources/1000Genomes-sv-json",title:"1000Genomes-sv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.2.5/data-sources/1000Genomes-sv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-sv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/1000Genomes-sv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/data-sources/1000Genomes-sv-json.md",tags:[],version:"3.2.5",frontMatter:{}},p=[],m={toc:p},d="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(d,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":[\n {\n "chromosome":"1",\n "begin":1595369,\n "end":1612441,\n "variantType": "copy_number_variation",\n "id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",\n "allAn": 5008,\n "allAc": 2702,\n "allAf": 0.539537,\n "afrAf": 0.6052,\n "amrAf": 0.3675,\n "eurAf": 0.5357,\n "easAf": 0.5368,\n "sasAf": 0.5797,\n "reciprocalOverlap": 0.07555\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"range: 0 - 1.")))))}s.isMDXComponent=!0},86631:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clinvar-json",id:"version-3.2.5/data-sources/clinvar-json",title:"clinvar-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.2.5/data-sources/clinvar-json.md",sourceDirName:"data-sources",slug:"/data-sources/clinvar-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/clinvar-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/data-sources/clinvar-json.md",tags:[],version:"3.2.5",frontMatter:{}},p=[],m={toc:p},d="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(d,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "id":"RCV000030258.4",\n "reviewStatus":"reviewed by expert panel",\n "alleleOrigins":[\n "germline"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "phenotypes":[\n "Lynch syndrome"\n ],\n "medGenIds":[\n "C1333990"\n ],\n "omimIds":[\n "120435"\n ],\n "significance":[\n "benign"\n ],\n "lastUpdatedDate":"2017-05-01",\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"reviewStatus"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"alleleOrigins"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"medGenIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MedGen IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"omimIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"orphanetIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Orphanet IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"significance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"lastUpdatedDate"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the ClinVar alternate allele")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"reviewStatus:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no assertion provided"),(0,r.kt)("li",{parentName:"ul"},"no assertion criteria provided"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, single submitter"),(0,r.kt)("li",{parentName:"ul"},"practice guideline"),(0,r.kt)("li",{parentName:"ul"},"classified by multiple submitters"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, conflicting interpretations"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, multiple submitters, no conflicts"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"alleleOrigins:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"unknown"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"germline"),(0,r.kt)("li",{parentName:"ul"},"somatic"),(0,r.kt)("li",{parentName:"ul"},"inherited"),(0,r.kt)("li",{parentName:"ul"},"paternal"),(0,r.kt)("li",{parentName:"ul"},"maternal"),(0,r.kt)("li",{parentName:"ul"},"de-novo"),(0,r.kt)("li",{parentName:"ul"},"biparental"),(0,r.kt)("li",{parentName:"ul"},"uniparental"),(0,r.kt)("li",{parentName:"ul"},"not-tested"),(0,r.kt)("li",{parentName:"ul"},"tested-inconclusive")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"significance:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"uncertain significance"),(0,r.kt)("li",{parentName:"ul"},"not provided"),(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"drug response"),(0,r.kt)("li",{parentName:"ul"},"histocompatibility"),(0,r.kt)("li",{parentName:"ul"},"association"),(0,r.kt)("li",{parentName:"ul"},"risk factor"),(0,r.kt)("li",{parentName:"ul"},"protective"),(0,r.kt)("li",{parentName:"ul"},"affects"),(0,r.kt)("li",{parentName:"ul"},"conflicting data from submitters"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant"),(0,r.kt)("li",{parentName:"ul"},"conflicting interpretations of pathogenicity")))}s.isMDXComponent=!0},59999:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/dbsnp-json",id:"version-3.2.5/data-sources/dbsnp-json",title:"dbsnp-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.2.5/data-sources/dbsnp-json.md",sourceDirName:"data-sources",slug:"/data-sources/dbsnp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/dbsnp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/data-sources/dbsnp-json.md",tags:[],version:"3.2.5",frontMatter:{}},p=[],m={toc:p},d="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(d,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"dbsnp":[\n "rs1042821"\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"dbsnp"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"dbSNP rsIDs")))))}s.isMDXComponent=!0},92811:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gnomad-exomes-small-variants-json",id:"version-3.2.5/data-sources/gnomad-exomes-small-variants-json",title:"gnomad-exomes-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.2.5/data-sources/gnomad-exomes-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-exomes-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/gnomad-exomes-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/data-sources/gnomad-exomes-small-variants-json.md",tags:[],version:"3.2.5",frontMatter:{}},p=[],m={toc:p},d="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(d,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomadExome":{ \n "coverage":20,\n "allAf":0.190317,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"coverage"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the South Asian population Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the South Asian population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the South Asian population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")))))}s.isMDXComponent=!0},64501:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gnomad-genomes-small-variants-json",id:"version-3.2.5/data-sources/gnomad-genomes-small-variants-json",title:"gnomad-genomes-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.2.5/data-sources/gnomad-genomes-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-genomes-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/gnomad-genomes-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/data-sources/gnomad-genomes-small-variants-json.md",tags:[],version:"3.2.5",frontMatter:{}},p=[],m={toc:p},d="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(d,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomad":{ \n "coverage":20,\n "allAf":0.190317,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"coverage"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")))))}s.isMDXComponent=!0},16712:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>g,default:()=>f,frontMatter:()=>s,metadata:()=>N,toc:()=>k});var n=a(87462),r=(a(67294),a(3905)),l=a(86631),i=a(59999),o=a(64501),p=a(92811),m=a(22166),d=a(50092);const s={title:"Nirvana JSON File Format"},g=void 0,N={unversionedId:"file-formats/nirvana-json-file-format",id:"version-3.2.5/file-formats/nirvana-json-file-format",title:"Nirvana JSON File Format",description:"Overview",source:"@site/versioned_docs/version-3.2.5/file-formats/nirvana-json-file-format.mdx",sourceDirName:"file-formats",slug:"/file-formats/nirvana-json-file-format",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/file-formats/nirvana-json-file-format",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/file-formats/nirvana-json-file-format.mdx",tags:[],version:"3.2.5",frontMatter:{title:"Nirvana JSON File Format"},sidebar:"version-3.2.5/docs",previous:{title:"gnomAD",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/gnomad"},next:{title:"Variant IDs",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/core-functionality/variant-ids"}},k=[{value:"Overview",id:"overview",children:[{value:"Conventions",id:"conventions",children:[],level:3},{value:"JSON Layout",id:"json-layout",children:[],level:3}],level:2},{value:"Header",id:"header",children:[{value:"Data Source",id:"data-source",children:[],level:4},{value:"Genome Assemblies",id:"genome-assemblies",children:[],level:4}],level:2},{value:"Positions",id:"positions",children:[{value:"1000 Genomes (SV)",id:"1000-genomes-sv",children:[],level:3}],level:2},{value:"Samples",id:"samples",children:[],level:2},{value:"Variants",id:"variants",children:[{value:"Transcripts",id:"transcripts",children:[{value:"PolyPhen",id:"polyphen",children:[],level:4},{value:"SIFT",id:"sift",children:[],level:4},{value:"Gene Fusions",id:"gene-fusions",children:[],level:4},{value:"Fusion",id:"fusion",children:[],level:4}],level:3},{value:"Regulatory Regions",id:"regulatory-regions",children:[{value:"Regulatory Types",id:"regulatory-types",children:[],level:4},{value:"Regulatory Consequences",id:"regulatory-consequences",children:[],level:4}],level:3},{value:"ClinVar",id:"clinvar",children:[],level:3},{value:"1000 Genomes",id:"1000-genomes",children:[],level:3},{value:"gnomAD (genomes)",id:"gnomad-genomes",children:[],level:3},{value:"gnomAD (exomes)",id:"gnomad-exomes",children:[],level:3},{value:"dbSNP",id:"dbsnp",children:[],level:3}],level:2}],u={toc:k},c="wrapper";function f(t){let{components:e,...s}=t;return(0,r.kt)(c,(0,n.Z)({},u,s,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("h3",{id:"conventions"},"Conventions"),(0,r.kt)("p",null,"In the Nirvana JSON representation, we try to maximize the amount of useful information that is relayed in the output file. As such, we have several conventions that are useful to know about:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"With boolean key/value pairs, we only output the keys that have a true value. I.e. there's no reason to display ",(0,r.kt)("inlineCode",{parentName:"li"},'"isStructuralVariant":false')," a few million times when annotating a small variant VCF."),(0,r.kt)("li",{parentName:"ul"},"When transferring data from the VCF file to the JSON (e.g. for allele depths (AD)), it is common to use a period (.) as a placeholder for missing data in the VCF file. Nirvana treats periods like empty or null strings and therefore will not output those entries.")),(0,r.kt)("h3",{id:"json-layout"},"JSON Layout"),(0,r.kt)("p",null,(0,r.kt)("img",{src:a(40122).Z})),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"In general, each position corresponds to a row in the original VCF file."),(0,r.kt)("p",{parentName:"div"},"For each gene that was referenced in the transcripts found in the positions section, there will be additional gene-level annotation in the gene section."))),(0,r.kt)("h2",{id:"header"},"Header"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'{ \n "header":{ \n "annotator":"Nirvana 3.2.5",\n "creationTime":"2022-12-05 16:43:41",\n "genomeAssembly":"GRCh37",\n "schemaVersion":6,\n "dataVersion":"91.26.50",\n "dataSources":[ \n { \n "name":"VEP",\n "version":"91",\n "description":"RefSeq",\n "releaseDate":"2018-03-05"\n },\n { \n "name":"ClinVar",\n "version":"20190204",\n "description":"A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence",\n "releaseDate":"2019-02-04"\n }\n ],\n "samples":[ \n "NA12878",\n "NA12891",\n "NA12892"\n ]\n },\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"annotator"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"the name of the annotator and the current version")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"creationTime"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd hh:mm:ss")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genomeAssembly"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#genome-assemblies"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"schemaVersion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"incremented whenever the core structure of the JSON file introduces breaking changes")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"dataVersion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"dataSources"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#data-source"},"Data Source entry below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"samples"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"the order of these sample names will be used throughout the JSON file when enumerating samples")))),(0,r.kt)("h4",{id:"data-source"},"Data Source"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"name"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"version"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"optional description of the data source")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"releaseDate"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")))),(0,r.kt)("h4",{id:"genome-assemblies"},"Genome Assemblies"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"GRCh37"),(0,r.kt)("li",{parentName:"ul"},"GRCh38"),(0,r.kt)("li",{parentName:"ul"},"hg19")),(0,r.kt)("h2",{id:"positions"},"Positions"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"positions":[ \n { \n "chromosome":"chr2",\n "position":48010488,\n "repeatUnit":"GGCCCC",\n "refRepeatCount":3,\n "svEnd":48020488,\n "refAllele":"G",\n "altAlleles":[ \n "A",\n "GT"\n ],\n "quality":461,\n "filters":[ \n "PASS"\n ],\n "ciPos":[ \n -170,\n 170\n ],\n "ciEnd":[ \n -175,\n 175\n ],\n "svLength":1000,\n "strandBias":1.23,\n "jointSomaticNormalQuality":29,\n "cytogeneticBand":"2p16.3",\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Variant Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"postion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf (1-based notation). Range: 1 - 250 million")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"repeatUnit"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"STR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by ExpansionHunter")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refRepeatCount"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"STR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by ExpansionHunter")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"svEnd"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"quality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf (Normally an integer, but some variant callers using floating point. Has been observed as high as 500k)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"filters"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ciPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ciEnd"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"svLength"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"strandBias"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"small variant"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by GATK (from SB)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"jointSomaticNormalQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by the Manta variant caller (SOMATICSCORE)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cytogeneticBand"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"e.g. 17p13.1")))),(0,r.kt)("h3",{id:"1000-genomes-sv"},"1000 Genomes (SV)"),(0,r.kt)(d.default,{mdxType:"ThousandGenomesSV"}),(0,r.kt)("h2",{id:"samples"},"Samples"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"samples":[\n {\n "genotype":"0/1",\n "variantFrequencies":[\n 0.333,\n 0.5\n ],\n "totalDepth":57,\n "genotypeQuality":12,\n "copyNumber":3,\n "repeatUnitCounts":[\n 10,\n 20\n ],\n "alleleDepths":[\n 10,\n 20,\n 30\n ],\n "failedFilter":true,\n "splitReadCounts":[\n 10,\n 20\n ],\n "pairedEndReadCounts":[\n 10,\n 20\n ],\n "diseaseAffectedStatuses":[\n "-"\n ],\n "artifactAdjustedQualityScore":89.3,\n "likelihoodRatioQualityScore":78.2\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genotype"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"repeatNumbers"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ExpansionHunter-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"repeatNumberSpans"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ExpansionHunter-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantFrequencies"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. One value per alternate allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"totalDepth"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genotypeQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values. Typically maxes out at 99")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"copyNumber"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"alleleDepths"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"splitReadCounts"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Manta-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pairedEndReadCounts"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Manta-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"lossOfHeterozygosity"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"deNovoQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mpileupAlleleDepths"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"SMN1-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"silentCarrierHaplotype"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"SMN1-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"paralogousEntrezGeneIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"SMN1-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"paralogousGeneCopyNumbers"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"SMN1-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"diseaseClassificationSources"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"SMN1-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"diseaseIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"SMN1-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"diseaseAffectedStatuses"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"SMN1-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"proteinAlteringVariantPositions"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"SMN1-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isCompoundHetCompatible"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"SMN1-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"artifactAdjustedQualityScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PEPE-specific. Range: 0 - 100.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"likelihoodRatioQualityScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PEPE-specific. Range: 0 - 100.0")))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Empty Samples")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"If a sample does not contain any entries, we will create a sample object that contains the ",(0,r.kt)("inlineCode",{parentName:"p"},"isEmpty")," key. This ensures that sample ordering is preserved while indicating that a sample is intentionally empty."),(0,r.kt)("pre",{parentName:"div"},(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"samples":[ \n { \n "isEmpty":true\n }\n],\n')))),(0,r.kt)("h2",{id:"variants"},"Variants"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"variants":[ \n { \n "vid":"2:48010488:A",\n "chromosome":"chr2",\n "begin":48010488,\n "end":48010488,\n "isReferenceMinorAllele":true,\n "isStructuralVariant":true,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "isDecomposedVariant":true,\n "isRecomposedVariant":true,\n "hgvsg":"NC_000002.11:g.48010488G>A",\n "phylopScore":0.459\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"vid"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"../core-functionality/variant-ids"},"Variant Identifiers"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"1-based non-negative integer values. Range: 1 - 250 million")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"end"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"1-based non-negative integer values. Range: 1 - 250 million")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isReferenceMinorAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this is a reference minor allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isStructuralVariant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant is a structural variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"parsimonious representation of the reference allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"parsimonious representation of the alternate allele.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"uses\xa0",(0,r.kt)("a",{parentName:"td",href:"http://www.sequenceontology.org/browser/current_svn/term/SO:0001059"},"Sequence Ontology sequence alterations"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isDecomposedVariant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the decomposed variant has been used to create another recomposed variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isRecomposedVariant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant is recomposed from two or more decomposed variants")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsg"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS g. notation")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phylopScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"phyloP conservation score. Range: -14.08 to 6.424")))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Reference Minor Alleles")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Nirvana supports annotating reference minor alleles. In such a case, ",(0,r.kt)("inlineCode",{parentName:"p"},"refAllele")," will be replaced by the global major allele and ",(0,r.kt)("inlineCode",{parentName:"p"},"altAllele")," will be replaced with the original reference allele."))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Flagging Decomposed & Recomposed Variants")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"When two or more decomposed variants are recomposed into an MNV, the decomposed variants will be marked with ",(0,r.kt)("inlineCode",{parentName:"p"},'"isDecomposedVariant":true'),"."),(0,r.kt)("p",{parentName:"div"},"Similarly, the recomposed variant will be shown as a new VCF position. This recomposed variant will be flagged with ",(0,r.kt)("inlineCode",{parentName:"p"},'"isRecomposedVariant":true'),"."))),(0,r.kt)("h3",{id:"transcripts"},"Transcripts"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"transcripts":[\n {\n "transcript":"ENST00000445503.1",\n "source":"Ensembl",\n "bioType":"nonsense_mediated_decay",\n "codons":"gGg/gAg",\n "aminoAcids":"G/E",\n "cdnaPos":"268",\n "cdsPos":"116",\n "exons":"1/9",\n "introns":"1/8",\n "proteinPos":"39",\n "geneId":"ENSG00000116062",\n "hgnc":"MSH6",\n "consequence":[\n "missense_variant",\n "NMD_transcript_variant"\n ],\n "hgvsc":"ENST00000445503.1:c.116G>A",\n "hgvsp":"ENSP00000405294.1:p.(Gly39Glu)",\n "geneFusion":{\n "exon":6,\n "intron":5,\n "fusions":[\n {\n "hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000437180.1}:c.58+568_1443",\n "exon":3,\n "intron":2\n },\n {\n "hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000300305.3}:c.58+568_1443",\n "exon":2,\n "intron":1\n }\n ]\n },\n "isCanonical":true,\n "polyPhenScore":0.95,\n "polyPhenPrediction":"probably damaging",\n "proteinId":"ENSP00000405294.1",\n "siftScore":0.61,\n "siftPrediction":"tolerated",\n "completeOverlap":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"transcript"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"transcript ID. e.g. ENST00000445503.1")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"source"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"RefSeq / Ensembl")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"bioType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"descriptions of the ",(0,r.kt)("a",{parentName:"td",href:"https://uswest.ensembl.org/info/genome/genebuild/biotypes.html"},"biotypes from Ensembl"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"codons"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"aminoAcids"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cdnaPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cdsPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"exons"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exons affected by the variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"introns"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"introns affected by the variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"proteinPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene ID. e.g. ENSG00000116062")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"consequence"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://www.sequenceontology.org/index.html"},"Sequence Ontology Consequences"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS coding nomenclature")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsp"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS protein nomenclature")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneFusion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#gene-fusions"},"Gene Fusions entry below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isCanonical"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this is a canonical transcript")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"polyPhenScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"polyPhenPrediction"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#polyphen"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"proteinId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"protein ID. E.g. ENSP00000405294.1")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"siftScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"siftPrediction"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#sift"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"completeOverlap"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this transcript is completely overlapped by the variant")))),(0,r.kt)("h4",{id:"polyphen"},"PolyPhen"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"probably damaging"),(0,r.kt)("li",{parentName:"ul"},"possibly damaging"),(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"unknown")),(0,r.kt)("h4",{id:"sift"},"SIFT"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"tolerated"),(0,r.kt)("li",{parentName:"ul"},"deleterious"),(0,r.kt)("li",{parentName:"ul"},"tolerated - low confidence"),(0,r.kt)("li",{parentName:"ul"},"deleterious - low confidence")),(0,r.kt)("h4",{id:"gene-fusions"},"Gene Fusions"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"exon"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual exon where the breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"intron"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual intron where the breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"fusions"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#fusion"},"Fusion entry below"))))),(0,r.kt)("h4",{id:"fusion"},"Fusion"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"exon"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual exon where the other breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"intron"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual intron where the other breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS coding nomenclature describing the two genes and the transcripts that are fused along with")))),(0,r.kt)("h3",{id:"regulatory-regions"},"Regulatory Regions"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"regulatoryRegions":[ \n { \n "id":"ENSR00001542175",\n "type":"promoter",\n "consequence":[ \n "regulatory_region_variant"\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"type"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see ",(0,r.kt)("a",{parentName:"td",href:"#regulatory-types"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"consequence"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"see ",(0,r.kt)("a",{parentName:"td",href:"#regulatory-consequences"},"possible values below"))))),(0,r.kt)("h4",{id:"regulatory-types"},"Regulatory Types"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"CTCF_binding_site"),(0,r.kt)("li",{parentName:"ul"},"enhancer"),(0,r.kt)("li",{parentName:"ul"},"open_chromatin_region"),(0,r.kt)("li",{parentName:"ul"},"promoter"),(0,r.kt)("li",{parentName:"ul"},"promoter_flanking_region"),(0,r.kt)("li",{parentName:"ul"},"TF_binding_site")),(0,r.kt)("h4",{id:"regulatory-consequences"},"Regulatory Consequences"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"regulatory_region_variant"),(0,r.kt)("li",{parentName:"ul"},"regulatory_region_ablation"),(0,r.kt)("li",{parentName:"ul"},"regulatory_region_amplification"),(0,r.kt)("li",{parentName:"ul"},"regulatory_region_truncation")),(0,r.kt)("h3",{id:"clinvar"},"ClinVar"),(0,r.kt)(l.default,{mdxType:"ClinVar"}),(0,r.kt)("h3",{id:"1000-genomes"},"1000 Genomes"),(0,r.kt)(m.default,{mdxType:"ThousandGenomesSmall"}),(0,r.kt)("h3",{id:"gnomad-genomes"},"gnomAD (genomes)"),(0,r.kt)(o.default,{mdxType:"GnomadGenomesSmall"}),(0,r.kt)("h3",{id:"gnomad-exomes"},"gnomAD (exomes)"),(0,r.kt)(p.default,{mdxType:"GnomadExomesSmall"}),(0,r.kt)("h3",{id:"dbsnp"},"dbSNP"),(0,r.kt)(i.default,{mdxType:"DbSNP"}))}f.isMDXComponent=!0},40122:(t,e,a)=>{a.d(e,{Z:()=>n});const n=a.p+"assets/images/JSON-Layout-fc8e5c0cf4c8428981cd206fe9b6feac.svg"}}]); \ No newline at end of file diff --git a/assets/js/4d3acc1e.2f259c87.js b/assets/js/4d3acc1e.2f259c87.js deleted file mode 100644 index 4c9bcc15..00000000 --- a/assets/js/4d3acc1e.2f259c87.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6766],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function s(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var i=r.createContext({}),l=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):s(s({},t),e)),n},p=function(e){var t=l(e.components);return r.createElement(i.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,p=c(e,["components","mdxType","originalType","parentName"]),u=l(n),m=a,f=u["".concat(i,".").concat(m)]||u[m]||d[m]||o;return n?r.createElement(f,s(s({ref:t},p),{},{components:n})):r.createElement(f,s({ref:t},p))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,s=new Array(o);s[0]=m;var c={};for(var i in t)hasOwnProperty.call(t,i)&&(c[i]=t[i]);c.originalType=e,c[u]="string"==typeof e?e:a,s[1]=c;for(var l=2;l{n.r(t),n.d(t,{contentTitle:()=>s,default:()=>u,frontMatter:()=>o,metadata:()=>c,toc:()=>i});var r=n(87462),a=(n(67294),n(3905));const o={},s=void 0,c={unversionedId:"data-sources/dbsnp-json",id:"version-3.17/data-sources/dbsnp-json",title:"dbsnp-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/dbsnp-json.md",sourceDirName:"data-sources",slug:"/data-sources/dbsnp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/dbsnp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/dbsnp-json.md",tags:[],version:"3.17",frontMatter:{}},i=[],l={toc:i},p="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},l,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"dbsnp":[\n "rs1042821"\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"dbsnp"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"dbSNP rsIDs")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/4d3b5a47.abb58158.js b/assets/js/4d3b5a47.abb58158.js deleted file mode 100644 index d1466ba5..00000000 --- a/assets/js/4d3b5a47.abb58158.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5862],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>g});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var p=a.createContext({}),u=function(t){var e=a.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},m=function(t){var e=u(t.components);return a.createElement(p.Provider,{value:e},t.children)},s="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},c=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),s=u(n),c=r,g=s["".concat(p,".").concat(c)]||s[c]||d[c]||l;return n?a.createElement(g,o(o({ref:e},m),{},{components:n})):a.createElement(g,o({ref:e},m))}));function g(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=c;var i={};for(var p in e)hasOwnProperty.call(e,p)&&(i[p]=e[p]);i.originalType=t,i[s]="string"==typeof t?t:r,o[1]=i;for(var u=2;u{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>s,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/1000Genomes-sv-json",id:"version-3.21/data-sources/1000Genomes-sv-json",title:"1000Genomes-sv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/1000Genomes-sv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-sv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/1000Genomes-sv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/1000Genomes-sv-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],u={toc:p},m="wrapper";function s(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},u,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":[\n {\n "chromosome":"1",\n "begin":1595369,\n "end":1612441,\n "variantType": "copy_number_variation",\n "id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",\n "allAn": 5008,\n "allAc": 2702,\n "allAf": 0.539537,\n "afrAf": 0.6052,\n "amrAf": 0.3675,\n "eurAf": 0.5357,\n "easAf": 0.5368,\n "sasAf": 0.5797,\n "reciprocalOverlap": 0.07555\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"range: 0 - 1.")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/4db31704.fff51b66.js b/assets/js/4db31704.fff51b66.js deleted file mode 100644 index b49437df..00000000 --- a/assets/js/4db31704.fff51b66.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4317],{3905:(t,e,a)=>{a.d(e,{Zo:()=>p,kt:()=>g});var n=a(67294);function l(t,e,a){return e in t?Object.defineProperty(t,e,{value:a,enumerable:!0,configurable:!0,writable:!0}):t[e]=a,t}function r(t,e){var a=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),a.push.apply(a,n)}return a}function i(t){for(var e=1;e=0||(l[a]=t[a]);return l}(t,e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,a)&&(l[a]=t[a])}return l}var s=n.createContext({}),m=function(t){var e=n.useContext(s),a=e;return t&&(a="function"==typeof t?t(e):i(i({},e),t)),a},p=function(t){var e=m(t.components);return n.createElement(s.Provider,{value:e},t.children)},d="mdxType",k={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},N=n.forwardRef((function(t,e){var a=t.components,l=t.mdxType,r=t.originalType,s=t.parentName,p=o(t,["components","mdxType","originalType","parentName"]),d=m(a),N=l,g=d["".concat(s,".").concat(N)]||d[N]||k[N]||r;return a?n.createElement(g,i(i({ref:e},p),{},{components:a})):n.createElement(g,i({ref:e},p))}));function g(t,e){var a=arguments,l=e&&e.mdxType;if("string"==typeof t||l){var r=a.length,i=new Array(r);i[0]=N;var o={};for(var s in e)hasOwnProperty.call(e,s)&&(o[s]=e[s]);o.originalType=t,o[d]="string"==typeof t?t:l,i[1]=o;for(var m=2;m{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>d,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var n=a(87462),l=(a(67294),a(3905));const r={title:"Custom Annotations"},i=void 0,o={unversionedId:"file-formats/custom-annotations",id:"version-3.16/file-formats/custom-annotations",title:"Custom Annotations",description:"Overview",source:"@site/versioned_docs/version-3.16/file-formats/custom-annotations.md",sourceDirName:"file-formats",slug:"/file-formats/custom-annotations",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/file-formats/custom-annotations",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/file-formats/custom-annotations.md",tags:[],version:"3.16",frontMatter:{title:"Custom Annotations"},sidebar:"version-3.16/docs",previous:{title:"Nirvana JSON File Format",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/file-formats/nirvana-json-file-format"},next:{title:"Canonical Transcripts",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/core-functionality/canonical-transcripts"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Variant File Format",id:"variant-file-format",children:[{value:"Basic Allele Frequency Example",id:"basic-allele-frequency-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv",children:[],level:4},{value:"Convert to Nirvana Format",id:"convert-to-nirvana-format",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results",children:[],level:4}],level:3},{value:"Categories & Descriptions Example",id:"categories--descriptions-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-1",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana-1",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-1",children:[],level:4},{value:"Using Positional Matches",id:"using-positional-matches",children:[],level:4}],level:3},{value:"Genomic Region Example",id:"genomic-region-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-2",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana-2",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-2",children:[],level:4}],level:3},{value:"Mixing Small Variants and Genomic Regions",id:"mixing-small-variants-and-genomic-regions",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-3",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana-3",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-3",children:[],level:4}],level:3}],level:2},{value:"Gene File Format",id:"gene-file-format",children:[{value:"Basic Gene Example",id:"basic-gene-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-4",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana-4",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-4",children:[],level:4}],level:3}],level:2},{value:"Customizing the Header",id:"customizing-the-header",children:[{value:"Title",id:"title",children:[],level:3},{value:"Genome Assemblies",id:"genome-assemblies",children:[],level:3},{value:"Matching Criteria",id:"matching-criteria",children:[],level:3},{value:"Categories",id:"categories",children:[],level:3},{value:"Descriptions",id:"descriptions",children:[{value:"Populations",id:"populations",children:[],level:4}],level:3},{value:"Data Types",id:"data-types",children:[],level:3}],level:2},{value:"Using SAUtils",id:"using-sautils",children:[{value:"Convert Variant File",id:"convert-variant-file",children:[],level:3},{value:"Convert Gene File",id:"convert-gene-file",children:[],level:3}],level:2}],m={toc:s},p="wrapper";function d(t){let{components:e,...a}=t;return(0,l.kt)(p,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,l.kt)("h2",{id:"overview"},"Overview"),(0,l.kt)("p",null,"While the team tries to keep data sources up-to-date, you might want to start incorporate new annotations ahead of our update cycle. Another\ncommon use case involves protected health information (PHI). Custom annotations are a mechanism that enables both use cases."),(0,l.kt)("p",null,"Here are some examples of how our collaborators use custom annotations:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"associating context from both a patient-level and a patient cohort level with the variant annotations"),(0,l.kt)("li",{parentName:"ul"},"adding content that is licensed (e.g. HGMD) to the variant annotations")),(0,l.kt)("p",null,"At the moment, we have two different custom annotation file formats. One provides additional annotations to variants (both small variants and SVs)\nwhile the other caters to gene annotations."),(0,l.kt)("p",null,"In both cases, the custom annotation file format is a tab-delimited file that is separated into two parts: the header & the data."),(0,l.kt)("p",null,"The header is where you can customize how you want the data to appear in the JSON file and provide context about the genome assembly and how\nNirvana should match the variants."),(0,l.kt)("p",null,"At Illumina, there are usually many components downstream of Nirvana that have to parse our annotations. If a customer provides a custom\nannotation, those downstream tools need to understand more about the data such as:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"data type (e.g. number, boolean, or a string)"),(0,l.kt)("li",{parentName:"ul"},"data category (e.g. is this an allele count, allele number, allele frequency, etc.)"),(0,l.kt)("li",{parentName:"ul"},"associated population (i.e. if this is an allele frequency)")),(0,l.kt)("p",null,"For each custom annotation, Nirvana uses this context to create a ",(0,l.kt)("a",{parentName:"p",href:"https://json-schema.org/"},"JSON schema")," that can be sent to downstream tools. If\na tool knows that this is an allele frequency, it can validate user input to ensure that it's in the range of ","[0, 1]","."),(0,l.kt)("h2",{id:"variant-file-format"},"Variant File Format"),(0,l.kt)("h3",{id:"basic-allele-frequency-example"},"Basic Allele Frequency Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Imagine that you want to create a basic allele frequency custom annotation for small variants. If we visualized the tab-delimited file\n(TSV), it would look something like this:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allAf")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleFrequency")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"number")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"23603511"),(0,l.kt)("td",{parentName:"tr",align:"left"},"TGA"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006579")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"68801894"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006569")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr19"),(0,l.kt)("td",{parentName:"tr",align:"left"},"11107436"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.00003291")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over the header and discuss the contents:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"title")," indicates the name of the JSON key"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"assembly")," indicates that this data is only valid for ",(0,l.kt)("inlineCode",{parentName:"li"},"GRCh38")),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"matchVariantsBy")," indicates that we should only match the annotations if they are allele-specific"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"categories")," provides hints to downstream tools on how they might want to treat the data. In this case, we indicate that it's an allele\nfrequency."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"descriptions")," are used in special circumstances to provide more context. Even though column 5 is called ",(0,l.kt)("inlineCode",{parentName:"li"},"allAf"),", it might not be clear to a\ndownstream tool that this means a global allele frequency using all sub-populations. In this case, ",(0,l.kt)("inlineCode",{parentName:"li"},"ALL")," indicates the intended population."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"type")," indicates to downstream tools the data type. Since allele frequencies are numbers, we'll write ",(0,l.kt)("inlineCode",{parentName:"li"},"number")," in this column.")),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Reference Base Checking")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Nirvana validates all the reference bases in a custom annotation. If a variant or genomic region is specified that has the wrong reference base, an error will be produced."))),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Sorting")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"The variants within each chromosome must be sorted by genomic position."))),(0,l.kt)("h4",{id:"convert-to-nirvana-format"},"Convert to Nirvana Format"),(0,l.kt)("p",null,"First we need to convert the TSV file to Nirvana's native file format and let's put that file in a new directory called CA:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"$ mkdir CA\n$ dotnet bin/Release/netcoreapp2.1/SAUtils.dll customvar \\\n -r Data/References/Homo_sapiens.GRCh38.Nirvana.dat -i MyDataSource.tsv -o CA\n---------------------------------------------------------------------------\nSAUtils (c) 2020 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0\n---------------------------------------------------------------------------\n\nChromosome 16 completed in 00:00:00.1\nChromosome 19 completed in 00:00:00.0\n\nTime: 00:00:00.2\n")),(0,l.kt)("h4",{id:"annotate-with-nirvana"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's annotate the following VCF (notice that it's one of the variants that we have in our custom annotation):"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n16 68801894 . G A . . .\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA.vcf"},"the full VCF file"),"."),(0,l.kt)("p",null,"Since Nirvana can handle multiple directories with external annotations, all we need to do is specify our new CA directory in addition to\nthe normal Nirvana command-line."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash",metastring:"{3}","{3}":!0},"$ dotnet bin/Release/netcoreapp2.1/Nirvana.dll -c Data/Cache/GRCh38/Both \\\n -r Data/References/Homo_sapiens.GRCh38.Nirvana.dat \\\n --sd Data/SupplementaryAnnotation/GRCh38 --sd CA -i TestCA.vcf -o TestCA\n---------------------------------------------------------------------------\nNirvana (c) 2020 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0\n---------------------------------------------------------------------------\n\nInitialization Time Positions/s\n---------------------------------------------------------------------------\nCache 00:00:01.8\nSA Position Scan 00:00:00.0 19\n\nReference Preload Annotation Variants/s\n---------------------------------------------------------------------------\nchr16 00:00:00.2 00:00:01.3 1\n\nSummary Time Percent\n---------------------------------------------------------------------------\nInitialization 00:00:01.9 25.5 %\nPreload 00:00:00.2 3.3 %\nAnnotation 00:00:01.3 18.2 %\n\nTime: 00:00:06.3\n")),(0,l.kt)("h4",{id:"investigate-the-results"},"Investigate the Results"),(0,l.kt)("p",null,"We would expect the following data to show up in our JSON output file:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-16}","{12-16}":!0},' "variants": [\n {\n "vid": "16-68801894-G-A",\n "chromosome": "16",\n "begin": 68801894,\n "end": 68801894,\n "refAllele": "G",\n "altAllele": "A",\n "variantType": "SNV",\n "hgvsg": "NC_000016.10:g.68801894G>A",\n "phylopScore": 1,\n "MyDataSource": {\n "refAllele": "G",\n "altAllele": "A",\n "allAf": 7e-06\n },\n "clinvar": [\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA.json.gz"},"the full JSON file"),"."),(0,l.kt)("p",null,"Nirvana preserves up to 6 decimal places for allele frequency data."),(0,l.kt)("h3",{id:"categories--descriptions-example"},"Categories & Descriptions Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-1"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Building on the previous example, we can add other types of annotations like predictions and general notes."),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 6"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 7"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,l.kt)("td",{parentName:"tr",align:"left"},"pathogenicity"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleFrequency"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Prediction"),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"number"),(0,l.kt)("td",{parentName:"tr",align:"left"},"string"),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"23603511"),(0,l.kt)("td",{parentName:"tr",align:"left"},"TGA"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006579"),(0,l.kt)("td",{parentName:"tr",align:"left"},"P"),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"68801894"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006569"),(0,l.kt)("td",{parentName:"tr",align:"left"},"LP"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Seen in case 123")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr19"),(0,l.kt)("td",{parentName:"tr",align:"left"},"11107436"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.00003291"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource2.tsv"},"the full TSV file"),"."),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Placeholders")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"You can use a period to denote an empty value (much in the same way as periods are used in VCF files to signify missing values). While\nNirvana also accepts empty columns in the TSV file, we use them in these examples to promote readability."))),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 6")," adds a field called ",(0,l.kt)("inlineCode",{parentName:"li"},"pathogenicity")," which uses the ",(0,l.kt)("inlineCode",{parentName:"li"},"Prediction")," category. When using this category, Nirvana will\nvalidate to make\nsure that the field contains either the abbreviations (B, LB, VUS, LP, and P) or the long-form equivalents (e.g. benign or pathogenic)."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 7")," adds a field called ",(0,l.kt)("inlineCode",{parentName:"li"},"notes")," and it doesn't have a category or description. We're just going to use it to add some internal\nnotes.")),(0,l.kt)("h4",{id:"annotate-with-nirvana-1"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's use a new VCF file. It includes all the same positions as our custom annotation file, but only the middle variant also matches the\nalternate allele (allele-specific match):"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n16 23603511 . TG T . . .\n16 68801894 . G A . . .\n19 11107436 . G C . . .\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA2.vcf"},"the full VCF file"),"."),(0,l.kt)("h4",{id:"investigate-the-results-1"},"Investigate the Results"),(0,l.kt)("p",null,"Because we specified ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=allele")," in our custom annotation file, only the middle variant will get an annotation:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-18}","{12-18}":!0},' "variants": [\n {\n "vid": "16-68801894-G-A",\n "chromosome": "16",\n "begin": 68801894,\n "end": 68801894,\n "refAllele": "G",\n "altAllele": "A",\n "variantType": "SNV",\n "hgvsg": "NC_000016.10:g.68801894G>A",\n "phylopScore": 1,\n "MyDataSource": {\n "refAllele": "G",\n "altAllele": "A",\n "allAf": 7e-06,\n "pathogenicity": "LP",\n "notes": "Seen in case 123"\n },\n "clinvar": [\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA2.json.gz"},"the full JSON file"),"."),(0,l.kt)("h4",{id:"using-positional-matches"},"Using Positional Matches"),(0,l.kt)("p",null,"What would happen if we changed to ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=position"),"? Two things will happen. First, our positional variants will now match:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-17}","{12-17}":!0},' "variants": [\n {\n "vid": "16-23603511-TG-T",\n "chromosome": "16",\n "begin": 23603512,\n "end": 23603512,\n "refAllele": "G",\n "altAllele": "-",\n "variantType": "deletion",\n "hgvsg": "NC_000016.10:g.23603512delG",\n "MyDataSource": [\n {\n "refAllele": "GA",\n "altAllele": "-",\n "allAf": 7e-06,\n "pathogenicity": "P"\n }\n ],\n "clinvar": [\n')),(0,l.kt)("p",null,"In addition, you will now see an extra flag for our allele-specific variant:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-20}","{12-20}":!0},' "variants": [\n {\n "vid": "16-68801894-G-A",\n "chromosome": "16",\n "begin": 68801894,\n "end": 68801894,\n "refAllele": "G",\n "altAllele": "A",\n "variantType": "SNV",\n "hgvsg": "NC_000016.10:g.68801894G>A",\n "phylopScore": 1,\n "MyDataSource": [\n {\n "refAllele": "G",\n "altAllele": "A",\n "allAf": 7e-06,\n "pathogenicity": "LP",\n "notes": "Seen in case 123",\n "isAlleleSpecific": true\n }\n ],\n "clinvar": [\n')),(0,l.kt)("h3",{id:"genomic-region-example"},"Genomic Region Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-2"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"In the previous example, we added a note for the middle variant, but sometimes it's handy to annotate a genomic region. Consider the following example:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"END"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"20000000"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"70000000"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Lots of false positives in this region")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource3.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 5")," now has a field called ",(0,l.kt)("inlineCode",{parentName:"li"},"notes"),". In essence, it looks exactly like column 7 from our previous example."),(0,l.kt)("li",{parentName:"ul"},"The main difference is that now one of our custom annotation entries is actually a genomic region. Any variant that overlaps with that region will get a custom annotation.")),(0,l.kt)("p",null,"In the previous example we learned about positional matching vs allele-specific matching. For genomic regions, ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=allele")," and ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=position")," produce\nthe same result."),(0,l.kt)("h4",{id:"annotate-with-nirvana-2"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's use the same VCF file as our previous example."),(0,l.kt)("h4",{id:"investigate-the-results-2"},"Investigate the Results"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{9-17}","{9-17}":!0},' {\n "chromosome": "16",\n "position": 23603511,\n "refAllele": "TG",\n "altAlleles": [\n "T"\n ],\n "cytogeneticBand": "16p12.2",\n "MyDataSource": [\n {\n "start": 20000000,\n "end": 70000000,\n "notes": "Lots of false positives in this region",\n "reciprocalOverlap": 0,\n "annotationOverlap": 0\n }\n ],\n "variants": [\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA3.json.gz"},"the full JSON file"),"."),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Reciprocal & Annotation Overlap")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"For all intervals, Nirvana internally calculates two overlaps: a ",(0,l.kt)("strong",{parentName:"p"},"variant overlap")," and an ",(0,l.kt)("strong",{parentName:"p"},"annotation overlap"),". Variant overlap is the percentage of the variant's length that is\noverlapped. Annotation overlap is the percentage of the annotation's length that is overlap. "),(0,l.kt)("p",{parentName:"div"},(0,l.kt)("strong",{parentName:"p"},"Reciprocal overlap")," is the minimum of those two overlaps. Given that the annotation is 50 Mbp and the deletion is one 1 bp, both overlaps will be pretty close to 0."))),(0,l.kt)("p",null,"We will also see this annotation for the other variant on chr16:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{9-17}","{9-17}":!0},' {\n "chromosome": "16",\n "position": 68801894,\n "refAllele": "G",\n "altAlleles": [\n "A"\n ],\n "cytogeneticBand": "16q22.1",\n "MyDataSource": [\n {\n "start": 20000000,\n "end": 70000000,\n "notes": "Lots of false positives in this region",\n "reciprocalOverlap": 0,\n "annotationOverlap": 0\n }\n ],\n "variants": [\n')),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Targeting Structural Variants")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Often we use genomic regions to represent other known CNVs and SVs in the genome. In this use case, we usually don't want to match these regions to other small variants. To\nforce Nirvana to match regions only to other SVs, use the ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=sv")," option in the header."))),(0,l.kt)("h3",{id:"mixing-small-variants-and-genomic-regions"},"Mixing Small Variants and Genomic Regions"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-3"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Previously we looked at examples that either had small variants or genomic regions. Let's create a file that contains both:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 6"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"END"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"23603511"),(0,l.kt)("td",{parentName:"tr",align:"left"},"TGA"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"68801894"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr19"),(0,l.kt)("td",{parentName:"tr",align:"left"},"11107436"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr21"),(0,l.kt)("td",{parentName:"tr",align:"left"},"10510818"),(0,l.kt)("td",{parentName:"tr",align:"left"},"C"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"10699435"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Interval #1")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr21"),(0,l.kt)("td",{parentName:"tr",align:"left"},"10510818"),(0,l.kt)("td",{parentName:"tr",align:"left"},"C"),(0,l.kt)("td",{parentName:"tr",align:"left"},"<","DEL",">"),(0,l.kt)("td",{parentName:"tr",align:"left"},"10699435"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Interval #2")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr22"),(0,l.kt)("td",{parentName:"tr",align:"left"},"12370388"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T[chr22:12370729["),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"Known false-positive")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource4.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 4")," now has the ",(0,l.kt)("inlineCode",{parentName:"li"},"REF")," field. Exception for the case listed below, this is only used by small variants or translocation breakends."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 5")," now has the ",(0,l.kt)("inlineCode",{parentName:"li"},"END")," field. This is only used by genomic regions."),(0,l.kt)("li",{parentName:"ul"},"There are two custom annotations on chr21 and the start and end coordinates look the same, so what's different? Interval #2 has ",(0,l.kt)("strong",{parentName:"li"},"a symbolic allele in the ALT column"),". When this is used in custom annotation, the start position is treated as the padding base (using VCF conventions). When Nirvana matches a variant to interval #2, it will ignore the padding base and consider the start position to be at position 10510819.")),(0,l.kt)("h4",{id:"annotate-with-nirvana-3"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's use a new VCF file to study how matching works for intervals #1 and #2:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n21 10510818 . C . . END=10699435;SVTYPE=DUP\n22 12370388 . T T[chr22:12370729[ . . SVTYPE=BND\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA3.vcf"},"the full VCF file"),"."),(0,l.kt)("p",null,'The first variant is similar to the custom annotation labelled "interval #2". Position 10510818 is the padding base, so it effectively starts at position 10510819.'),(0,l.kt)("h4",{id:"investigate-the-results-3"},"Investigate the Results"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{11-26}","{11-26}":!0},' "positions": [\n {\n "chromosome": "21",\n "position": 10510818,\n "svEnd": 10699435,\n "refAllele": "C",\n "altAlleles": [\n ""\n ],\n "cytogeneticBand": "21p11.2",\n "MyDataSource": [\n {\n "start": 10510818,\n "end": 10699435,\n "notes": "Interval #1",\n "reciprocalOverlap": 0.99999,\n "annotationOverlap": 0.99999\n },\n {\n "start": 10510819,\n "end": 10699435,\n "notes": "Interval #2",\n "reciprocalOverlap": 1,\n "annotationOverlap": 1\n }\n ],\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA4.json.gz"},"the full JSON file"),"."),(0,l.kt)("p",null,"As expected, the variant and interval #2 have matching endpoints, therefore there is 100% overlap. Interval #1 technically starts 1 bp earlier, so its overlap 99.9%."),(0,l.kt)("p",null,"Further down the JSON file, we find the annotated translocation breakend:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{11-15}","{11-15}":!0},' "variants": [\n {\n "vid": "22-12370388-T-T[chr22:12370729[",\n "chromosome": "22",\n "begin": 12370388,\n "end": 12370388,\n "isStructuralVariant": true,\n "refAllele": "T",\n "altAllele": "T[chr22:12370729[",\n "variantType": "translocation_breakend",\n "MyDataSource": {\n "refAllele": "T",\n "altAllele": "T[chr22:12370729[",\n "notes": "Known false-positive"\n }\n }\n')),(0,l.kt)("h2",{id:"gene-file-format"},"Gene File Format"),(0,l.kt)("h3",{id:"basic-gene-example"},"Basic Gene Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-4"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Previously we looked at examples that either had small variants or genomic regions, however, sometimes we would like to add custom gene annotations. The gene custom annotation file format\nlooks slightly different:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#geneSymbol"),(0,l.kt)("td",{parentName:"tr",align:"left"},"geneId"),(0,l.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string"),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"TP53"),(0,l.kt)("td",{parentName:"tr",align:"left"},"7157"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Colorectal cancer, hereditary nonpolyposis, type 5"),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"KRAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ENSG00000133703"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mismatch repair cancer syndrome"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Seen in cohort 123")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource5.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 2")," has the ",(0,l.kt)("inlineCode",{parentName:"li"},"geneId")," field. This can be either an ",(0,l.kt)("strong",{parentName:"li"},"Entrez Gene ID")," or an ",(0,l.kt)("strong",{parentName:"li"},"Ensembl ID"),".")),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Gene Symbols")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Gene symbols are always in flux and are being updated on a daily basis at the NCBI and at HGNC. Due to this, Nirvana uses the ",(0,l.kt)("inlineCode",{parentName:"p"},"geneId")," to match genes rather than the gene symbol. However, to\nmake the custom annotation files easier to read, we've included the ",(0,l.kt)("inlineCode",{parentName:"p"},"geneSymbol")," column as well."))),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Unknown Gene IDs")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"When Nirvana parses the gene custom annotation file, it will note any gene IDs that are currently not recognized in Nirvana. In such a case, Nirvana will display an error showing all the\nunrecognized gene IDs."))),(0,l.kt)("h4",{id:"annotate-with-nirvana-4"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's use a VCF file that contain variants in TP53 and KRAS:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n12 25227255 . A T . . .\n17 7675074 . C A . . .\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA4.vcf"},"the full VCF file"),"."),(0,l.kt)("h4",{id:"investigate-the-results-4"},"Investigate the Results"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{24-27}","{24-27}":!0},' "genes": [\n {\n "name": "KRAS",\n "clingenGeneValidity": [\n {\n "diseaseId": "MONDO_0009026",\n "disease": "Costello syndrome",\n "classification": "disputed",\n "classificationDate": "2018-07-24"\n }\n ],\n "clingenDosageSensitivityMap": {\n "haploinsufficiency": "no evidence to suggest that dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "no evidence to suggest that dosage sensitivity is associated with clinical phenotype"\n },\n "gnomAD": {\n "pLi": 0.000788,\n "pRec": 0.789,\n "pNull": 0.21,\n "synZ": 0.336,\n "misZ": 2.32,\n "loeuf": 1.24\n },\n "MyDataSource": {\n "phenotype": "Mismatch repair cancer syndrome",\n "notes": "Seen in cohort 123"\n }\n },\n')),(0,l.kt)("p",null,"This is the abbreviated output for KRAS. Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA5.json.gz"},"the full JSON file")," if you want to see the complete KRAS entry."),(0,l.kt)("h2",{id:"customizing-the-header"},"Customizing the Header"),(0,l.kt)("h3",{id:"title"},"Title"),(0,l.kt)("p",null,"For the title, you can provide any string that hasn't already been used. The title should be unique."),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Make sure that the title does not conflict with other keys in the JSON file."))),(0,l.kt)("p",null,"For small variants, you can't provide a title that conflicts with other keys in the variant object. Some examples of this would be\n",(0,l.kt)("inlineCode",{parentName:"p"},"vid"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"chromosome"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"transcripts"),", etc.. The title should also not conflict with other data source keys like ",(0,l.kt)("inlineCode",{parentName:"p"},"clinvar")," or ",(0,l.kt)("inlineCode",{parentName:"p"},"gnomad"),"."),(0,l.kt)("p",null,"For structural variants, you can't provide a title that conflicts with other keys in the position object. Some examples of this would be\n",(0,l.kt)("inlineCode",{parentName:"p"},"chromosome"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"svLength"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"cytogeneticBand"),", etc. The title should also not conflict with other data source keys like ",(0,l.kt)("inlineCode",{parentName:"p"},"clingen")," or ",(0,l.kt)("inlineCode",{parentName:"p"},"dgv"),"."),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Care should be taken not to annotate using multiple custom annotations that all use the same title."))),(0,l.kt)("h3",{id:"genome-assemblies"},"Genome Assemblies"),(0,l.kt)("p",null,"The following genome assemblies can be specified:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"GRCh37"),(0,l.kt)("li",{parentName:"ul"},"GRCh38")),(0,l.kt)("h3",{id:"matching-criteria"},"Matching Criteria"),(0,l.kt)("p",null,"The matching criteria instructs how Nirvana should match a VCF variant to the custom annotation."),(0,l.kt)("p",null,"The following matching criteria can be specified:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"allele")," - use this when you only want allele-specific matches. This is commonly the case when using allele frequency data sources like ",(0,l.kt)("inlineCode",{parentName:"li"},"gnomAD")),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"position")," - use this when you want positional matches. This is commonly used with disease phenotype data sources like ",(0,l.kt)("inlineCode",{parentName:"li"},"ClinVar")),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"sv")," - use this when you want to match to all other overlapping SVs. This use case arose when we were adding custom annotations for baseline\ncopy number intervals along the genome.")),(0,l.kt)("h3",{id:"categories"},"Categories"),(0,l.kt)("p",null,"Categories are not used by Nirvana, but are often used by downstream tools. Categories provide hints for how those tools should filter or display\nthe annotation data."),(0,l.kt)("p",null,"When a category is specified, Nirvana will provide additional validation for those fields. The following table describes each category:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Category"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Description"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Validation"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleCount"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allele counts for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleNumber"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allele numbers for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleFrequency"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allele frequencies for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Prediction"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ACMG-style pathogenicity classifications"),(0,l.kt)("td",{parentName:"tr",align:"left"},"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"benign")," (B)",(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"likely benign")," (LB)",(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"VUS"),(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"likely pathogenic")," (LP)",(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"pathogenic")," (P)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Filter"),(0,l.kt)("td",{parentName:"tr",align:"left"},"free text that signals downstream tools to add the column to the filter"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Max 20 characters")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Description"),(0,l.kt)("td",{parentName:"tr",align:"left"},"free-text description"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Max 100 characters")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Identifier"),(0,l.kt)("td",{parentName:"tr",align:"left"},"any ID"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Max 50 characters")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"HomozygousCount"),(0,l.kt)("td",{parentName:"tr",align:"left"},"count of homozygous individuals for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Score"),(0,l.kt)("td",{parentName:"tr",align:"left"},"any score value"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Any double-precision floating point number")))),(0,l.kt)("h3",{id:"descriptions"},"Descriptions"),(0,l.kt)("p",null,"Descriptions are used to add more context to the categories. For now, descriptions are mainly used to associate allele counts, numbers, and frequencies with their respective populations."),(0,l.kt)("h4",{id:"populations"},"Populations"),(0,l.kt)("p",null,"The following populations were specified in the HapMap project, 1000 Genomes Project, ExAC, and gnomAD."),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Population Code"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Super-population Code"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Description"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ACB"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"African Caribbeans in Barbados")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"African")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"All populations")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Ad Mixed American")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ASJ"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"},"Ashkenazi Jewish")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ASW"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Americans of African Ancestry in SW USA")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"BEB"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Bengali from Bangladesh")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CDX"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Chinese Dai in Xishuangbanna, China")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CEU"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Utah Residents (CEPH) with Northern and Western European Ancestry")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CHB"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Han Chinese in Beijing, China")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CHS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Southern Han Chinese")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CLM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Colombians from Medellin, Colombia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"East Asian")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ESN"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Esan in Nigeria")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"European")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"FIN"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Finnish in Finland")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"GBR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"British in England and Scotland")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"GIH"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Gujarati Indian from Houston, Texas")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"GWD"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Gambian in Western Divisions in the Gambia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"IBS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Iberian population in Spain")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ITU"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Indian Telugu from the UK")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"JPT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Japanese in Tokyo, Japan")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"KHV"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Kinh in Ho Chi Minh City, Vietnam")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"LWK"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Luhya in Webuye, Kenya")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MAG"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mandinka in the Gambia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MKK"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Maasai in Kinyawa, Kenya")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MSL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mende in Sierra Leone")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MXL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mexican Ancestry from Los Angeles, USA")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"NFE"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"European (Non-Finnish)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"OTH"),(0,l.kt)("td",{parentName:"tr",align:"left"},"OTH"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Other")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"PEL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Peruvians from Lima, Peru")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"PJL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Punjabi from Lahore, Pakistan")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"PUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Puerto Ricans from Puerto Rico")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"South Asian")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"STU"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Sri Lankan Tamil from the UK")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"TSI"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Toscani in Italia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"YRI"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Yoruba in Ibadan, Nigeria")))),(0,l.kt)("h3",{id:"data-types"},"Data Types"),(0,l.kt)("p",null,"Each custom annotation can be one of the following data types:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"bool")," - true or false"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"number")," - any integer or floating-point number"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"string")," - text")),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"For boolean variables, only keys with a ",(0,l.kt)("inlineCode",{parentName:"p"},"true")," value will be output to the JSON object."))),(0,l.kt)("h2",{id:"using-sautils"},"Using SAUtils"),(0,l.kt)("p",null,"Nirvana includes a tool called ",(0,l.kt)("inlineCode",{parentName:"p"},"SAUtils")," that converts various data sources into Nirvana's native binary format. The sub-commands ",(0,l.kt)("inlineCode",{parentName:"p"},"customvar")," and ",(0,l.kt)("inlineCode",{parentName:"p"},"customgene")," are used to specify a variant file or a gene file respectively."),(0,l.kt)("h3",{id:"convert-variant-file"},"Convert Variant File"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/SAUtils.dll customvar \\\n -r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n -i MyDataSource.tsv \\\n -o SupplementaryAnnotation\n")),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-r")," argument specifies the compressed reference path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input TSV path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output directory")),(0,l.kt)("h3",{id:"convert-gene-file"},"Convert Gene File"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/SAUtils.dll customgene \\\n --uga Nirvana_UGA.tsv \\\n -i MyDataSource.tsv \\\n -o SupplementaryAnnotation\n")),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"--uga")," argument specifies the Nirvana universal gene archive (UGA) path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input TSV path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output directory")))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/4dda5459.3b89bb71.js b/assets/js/4dda5459.3b89bb71.js deleted file mode 100644 index 7e782dd8..00000000 --- a/assets/js/4dda5459.3b89bb71.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4958,5938],{3905:(e,n,t)=>{t.d(n,{Zo:()=>d,kt:()=>h});var a=t(67294);function r(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function o(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function i(e){for(var n=1;n=0||(r[t]=e[t]);return r}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(r[t]=e[t])}return r}var l=a.createContext({}),c=function(e){var n=a.useContext(l),t=n;return e&&(t="function"==typeof e?e(n):i(i({},n),e)),t},d=function(e){var n=c(e.components);return a.createElement(l.Provider,{value:n},e.children)},u="mdxType",p={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},m=a.forwardRef((function(e,n){var t=e.components,r=e.mdxType,o=e.originalType,l=e.parentName,d=s(e,["components","mdxType","originalType","parentName"]),u=c(t),m=r,h=u["".concat(l,".").concat(m)]||u[m]||p[m]||o;return t?a.createElement(h,i(i({ref:n},d),{},{components:t})):a.createElement(h,i({ref:n},d))}));function h(e,n){var t=arguments,r=n&&n.mdxType;if("string"==typeof e||r){var o=t.length,i=new Array(o);i[0]=m;var s={};for(var l in n)hasOwnProperty.call(n,l)&&(s[l]=n[l]);s.originalType=e,s[u]="string"==typeof e?e:r,i[1]=s;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>s,toc:()=>l});var a=t(87462),r=(t(67294),t(3905));const o={},i=void 0,s={unversionedId:"data-sources/amino-acid-conservation-json",id:"version-3.17/data-sources/amino-acid-conservation-json",title:"amino-acid-conservation-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/amino-acid-conservation-json.md",sourceDirName:"data-sources",slug:"/data-sources/amino-acid-conservation-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/amino-acid-conservation-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/amino-acid-conservation-json.md",tags:[],version:"3.17",frontMatter:{}},l=[],c={toc:l},d="wrapper";function u(e){let{components:n,...t}=e;return(0,r.kt)(d,(0,a.Z)({},c,t,{components:n,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"aminoAcidConservation": {\n "scores": [0.34]\n} \n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"aminoAcidConservation"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scores"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array of doubles"),(0,r.kt)("td",{parentName:"tr",align:"left"},"percent conserved with respect to human amino acid residue. Range: 0.01 - 1.00")))))}u.isMDXComponent=!0},90815:(e,n,t)=>{t.r(n),t.d(n,{contentTitle:()=>s,default:()=>p,frontMatter:()=>i,metadata:()=>l,toc:()=>c});var a=t(87462),r=(t(67294),t(3905)),o=t(22027);const i={title:"Amino Acid Conservation"},s=void 0,l={unversionedId:"data-sources/amino-acid-conservation",id:"version-3.17/data-sources/amino-acid-conservation",title:"Amino Acid Conservation",description:"Overview",source:"@site/versioned_docs/version-3.17/data-sources/amino-acid-conservation.mdx",sourceDirName:"data-sources",slug:"/data-sources/amino-acid-conservation",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/amino-acid-conservation",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/amino-acid-conservation.mdx",tags:[],version:"3.17",frontMatter:{title:"Amino Acid Conservation"},sidebar:"version-3.17/docs",previous:{title:"1000 Genomes",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/1000Genomes"},next:{title:"ClinGen",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/clingen"}},c=[{value:"Overview",id:"overview",children:[],level:2},{value:"FASTA File",id:"fasta-file",children:[],level:2},{value:"Parsing FASTA",id:"parsing-fasta",children:[],level:2},{value:"Assigning scores to Nirvana transcripts",id:"assigning-scores-to-nirvana-transcripts",children:[{value:"GRCh37",id:"grch37",children:[],level:3},{value:"GRCh38",id:"grch38",children:[],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],d={toc:c},u="wrapper";function p(e){let{components:n,...t}=e;return(0,r.kt)(u,(0,a.Z)({},d,t,{components:n,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"Amino acid conservation scores are obtained from multiple alignments of vertebrate exomes to the human ones. The score indicate the frequency with which a particular AA is observed in Humans."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Siepel A, Bejerano G, Pedersen JS, Hinrichs AS, Hou M, Rosenbloom K, Clawson H, Spieth J, Hillier LW, Richards S, et al. Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. ",(0,r.kt)("strong",{parentName:"p"},"Genome Res. 2005")," Aug;15(8):1034-50. (",(0,r.kt)("a",{parentName:"p",href:"http://www.genome.org/cgi/doi/10.1101/gr.3715005"},"http://www.genome.org/cgi/doi/10.1101/gr.3715005"),")"))),(0,r.kt)("h2",{id:"fasta-file"},"FASTA File"),(0,r.kt)("p",null,"The exon alignments are provided in FASTA files as follows:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},">ENST00000641515.2_hg38_1_2 3 0 0 chr1:65565-65573+\nMKK\n>ENST00000641515.2_panTro4_1_2 3 0 0 chrUn_GL393541:146907-146915+\nMKK\n>ENST00000641515.2_gorGor3_1_2 3 0 0\n---\n>ENST00000641515.2_ponAbe2_1_2 3 0 0 chr15:99141417-99141425-\nMKK\n>ENST00000641515.2_hg38_2_2 324 0 0 chr1:69037-70008+\nVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLLHFFGGSEMVILIAMGFDRYIAICKPLHYTTIMCGNACVGIMAVTWGIGFLHSVSQLAFAVHLLFCGPNEVDSFYCDLPRVIKLACTDTYRLDIMVIANSGVLTVCSFVLLIISYTIILMTIQHRPLDKSSKALSTLTAHITVVLLFFGPCVFIYAWPFPIKSLDKFLAVFYSVITPLLNPIIYTLRNKDMKTAIRQLRKWDAHSSVKFZ\n>ENST00000641515.2_panTro4_2_2 324 0 0 chrUn_GL393541:151333-152303+\n")),(0,r.kt)("h2",{id:"parsing-fasta"},"Parsing FASTA"),(0,r.kt)("p",null,"For each Ensembl transcript, we will need to aggregate all the exons together for each of the 100 species. From there, we should get a full alignment that can be used to determine conservation. For example, for ENST00000641515.2 we have:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"Human (hg38) MKKVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLL\nChimp MKKVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFL-MLFFVFYGGIVFGNLLIVRIVVSDSHLHSPMYFLLANLSLIDLSLCSVTAPKMITDFFSQRKVISFKGCLVQIFLL\nGorilla ----------------------------------------------------------------------------------------------------------------------\nOrangutan MKKVTAEAISWNESTSKTNNSVVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVIIVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLL\nGibbon ----------------------------------------------------------------------------------------------------------------------\nRhesus MKKVTEAAISWNESTSETNNSIVTEFIFLGLSDSQELQIFLFVLFLVFYGGIVFGNLLIVITVVSDSHLHSPMYLLLANLSVVDLSLSSVTAPKMITDFFSQRKAISFKGCLVQIFLL\nMacaque MKKVTEAAISWNESTSETNNSIVTEFIFLGLSDSQELQIFLFVLFLVFYGGIVFGNLLIVITVVSDSHLHSPMYLLLANLSVIDLSLSSVTAPKMITDFFSQRKAISFKGCLVQIFLL\n")),(0,r.kt)("p",null,"If we look at position 6, we see that humans have an Alanine (A) residue. This residue is shared by Chimp and Orangutan. However, Rhesus and Macaque have a Glutamic acid (E) residue at that position. Moreover, Gorilla and Gibbon don't even have data for that transcript.\nFor position 6, we would say that we have 43% conservation (3/7) since three organisms share the same residue as humans."),(0,r.kt)("h2",{id:"assigning-scores-to-nirvana-transcripts"},"Assigning scores to Nirvana transcripts"),(0,r.kt)("p",null,"The source FASTA file comes with Ensembl/UCSC transcript ids of the transcripts used for alignments. The Nirvana cache has RefSeq and Ensembl transcripts and our first attempt was to map the given Ensembl/UCSC ids to their equivalent RefSeq/Ensembl ids. This attempt was unsuccessful since UCSC Table Browser provided mapping without version numbers. So we proceeded as follows:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"Take proteins which have a unique mapping (and hence one set of conservation scores). For ones that mapped to both ChrX and ChrY, we accepted the one from ChrX."),(0,r.kt)("li",{parentName:"ul"},"A Nirvana transcript having an exact peptide sequence match with a uniquely aligned protein is assigned the corresponding conservation scores.")),(0,r.kt)("p",null,"Unfortunately this left us with a very small number of transcripts having conservation scores."),(0,r.kt)("h3",{id:"grch37"},"GRCh37"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"Source FASTA contained 41957 protein alignments."),(0,r.kt)("li",{parentName:"ul"},"38165 proteins had unique scores."),(0,r.kt)("li",{parentName:"ul"},"88 aligned proteins existed in Nirvana cache."),(0,r.kt)("li",{parentName:"ul"},"118 transcripts had conservation scores.")),(0,r.kt)("h3",{id:"grch38"},"GRCh38"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"Source FASTA contained 110024 protein alignments."),(0,r.kt)("li",{parentName:"ul"},"88961 proteins had unique scores."),(0,r.kt)("li",{parentName:"ul"},"11688 aligned proteins existed in Nirvana cache."),(0,r.kt)("li",{parentName:"ul"},"12098 transcripts had conservation scores.")),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,"GRCh37: ",(0,r.kt)("a",{parentName:"p",href:"http://hgdownload.soe.ucsc.edu/goldenPath/hg19/multiz100way/alignments/knownGene.exonAA.fa.gz"},"http://hgdownload.soe.ucsc.edu/goldenPath/hg19/multiz100way/alignments/knownGene.exonAA.fa.gz")),(0,r.kt)("p",null,"GRCh38: ",(0,r.kt)("a",{parentName:"p",href:"http://hgdownload.soe.ucsc.edu/goldenPath/hg38/multiz100way/alignments/knownGene.exonAA.fa.gz"},"http://hgdownload.soe.ucsc.edu/goldenPath/hg38/multiz100way/alignments/knownGene.exonAA.fa.gz")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)("p",null,"Conservation scores are reported in the transcript section. One score is reported for each alt allele"),(0,r.kt)(o.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/4ff3dfce.06302886.js b/assets/js/4ff3dfce.06302886.js deleted file mode 100644 index 73f085bf..00000000 --- a/assets/js/4ff3dfce.06302886.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3786],{3905:(n,e,t)=>{t.d(e,{Zo:()=>d,kt:()=>u});var a=t(67294);function i(n,e,t){return e in n?Object.defineProperty(n,e,{value:t,enumerable:!0,configurable:!0,writable:!0}):n[e]=t,n}function o(n,e){var t=Object.keys(n);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(n);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(n,e).enumerable}))),t.push.apply(t,a)}return t}function r(n){for(var e=1;e=0||(i[t]=n[t]);return i}(n,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(n);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(n,t)&&(i[t]=n[t])}return i}var c=a.createContext({}),l=function(n){var e=a.useContext(c),t=e;return n&&(t="function"==typeof n?n(e):r(r({},e),n)),t},d=function(n){var e=l(n.components);return a.createElement(c.Provider,{value:e},n.children)},p="mdxType",g={inlineCode:"code",wrapper:function(n){var e=n.children;return a.createElement(a.Fragment,{},e)}},m=a.forwardRef((function(n,e){var t=n.components,i=n.mdxType,o=n.originalType,c=n.parentName,d=s(n,["components","mdxType","originalType","parentName"]),p=l(t),m=i,u=p["".concat(c,".").concat(m)]||p[m]||g[m]||o;return t?a.createElement(u,r(r({ref:e},d),{},{components:t})):a.createElement(u,r({ref:e},d))}));function u(n,e){var t=arguments,i=e&&e.mdxType;if("string"==typeof n||i){var o=t.length,r=new Array(o);r[0]=m;var s={};for(var c in e)hasOwnProperty.call(e,c)&&(s[c]=e[c]);s.originalType=n,s[p]="string"==typeof n?n:i,r[1]=s;for(var l=2;l{t.r(e),t.d(e,{contentTitle:()=>r,default:()=>p,frontMatter:()=>o,metadata:()=>s,toc:()=>c});var a=t(87462),i=(t(67294),t(3905));const o={title:"Parsing Nirvana JSON"},r=void 0,s={unversionedId:"introduction/parsing-json",id:"version-3.17/introduction/parsing-json",title:"Parsing Nirvana JSON",description:"Why JSON?",source:"@site/versioned_docs/version-3.17/introduction/parsing-json.md",sourceDirName:"introduction",slug:"/introduction/parsing-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/introduction/parsing-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/introduction/parsing-json.md",tags:[],version:"3.17",frontMatter:{title:"Parsing Nirvana JSON"},sidebar:"version-3.17/docs",previous:{title:"Getting Started",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/introduction/getting-started"},next:{title:"Annotating COVID-19",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/introduction/covid19"}},c=[{value:"Why JSON?",id:"why-json",children:[{value:"What do other annotators use?",id:"what-do-other-annotators-use",children:[],level:3},{value:"What do we gain by using JSON?",id:"what-do-we-gain-by-using-json",children:[],level:3}],level:2},{value:"Parsing JSON",id:"parsing-json",children:[{value:"Organization",id:"organization",children:[],level:3},{value:"JASIX",id:"jasix",children:[],level:3}],level:2}],l={toc:c},d="wrapper";function p(n){let{components:e,...o}=n;return(0,i.kt)(d,(0,a.Z)({},l,o,{components:e,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"why-json"},"Why JSON?"),(0,i.kt)("p",null,"VCF is a fantastic file format that was developed during the methods development activities within the 1000 Genomes Project. Prior to that, variant callers were outputting information into a variety of tab-delimited formats. Sometimes based on existing standards (like GFF), while most were proprietary. The primary intent of VCF files was to provide a human-readable, standardized representation of genetic variants. Similar to SAM/BAM files, VCF files used BCF files as their binary counterpart."),(0,i.kt)("p",null,"In the very beginning, Nirvana offered VCF output for annotation. While many variant annotators offer an option to output VCF files, one could argue if they are still human-readable. Here's an example from a VCF file produced by VEP v102:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre"},"chr3 107840527 . A ATTTTTTTTT,AT,ATTTTTTTT 153.51 PASS AN=6;MQ=244.10;\nSOR=1.739;QD=2.24;DP=57;AF=0.500,0.167,0.333;FS=0.000;AC=3,1,2;CSQ=TTTTTTTTT|\nintron_variant&non_coding_transcript_variant|MODIFIER|LINC00635|ENSG00000241469|\nTranscript|ENST00000608506.6|lncRNA||4/4|ENST00000608506.6:n.622-132_622-124dup|||||||\nrs35564779||-1||HGNC|HGNC:27184|||5|||||||||Ensembl|||||||||||||||||||||||||||||||||||\n|||||||||0.792|-0.109757,T|intron_variant&non_coding_transcript_variant|MODIFIER|\nLINC00635|ENSG00000241469|Transcript|ENST00000608506.6|lncRNA||4/4|\nENST00000608506.6:n.622-124dup|||||||rs35564779||-1||HGNC|HGNC:27184|||5|||||||||\nEnsembl||||||||||||||||||||||||||||||||||||||||||||0.932|-0.075622,TTTTTTTT|\nintron_variant&non_coding_transcript_variant|MODIFIER|LINC00635|ENSG00000241469|\nTranscript|ENST00000608506.6|lncRNA||4/4|ENST00000608506.6:n.622-131_622-124dup|||||||\nrs35564779||-1||HGNC|HGNC:27184|||5|||||||||Ensembl|||||||||||||||||||||||||||||||||||\n|||||||||0.808|-0.105490,TTTTTTTTT|intron_variant&non_coding_transcript_variant|\nMODIFIER|LINC00636|ENSG00000240423|Transcript|ENST00000649048.1|lncRNA||2/3|\nENST00000649048.1:n.179+5223_179+5231dup|||||||rs35564779||1||HGNC|HGNC:27702|||||||||\n|||Ensembl||||||||||||||||||||||||||||||||||||||||||||0.792|-0.109757, (etc.)\n")),(0,i.kt)("p",null,"Originally Nirvana used the same VCF notation as VEP uses above. The problem is that you end up with a large amount of text that is difficult to parse out by eye and requires the use of several delimiters to divide the information into useful segments. When we originally annotated this variant using VEP, ",(0,i.kt)("strong",{parentName:"p"},"this single variant used 488,909 bytes")," (almost \xbd MB). Surprisingly, we found that this broke some downstream tools that had preconceived notions of how long a single line could be in a VCF file."),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Whitespace is not allowed in the VCF INFO field. This means that if you wanted to express a gene description from OMIM: ",(0,i.kt)("strong",{parentName:"p"},'"HRAS PROTOONCOGENE, GTPase; HRAS"'),", you would need to replace the spaces with something else like an underline. You would also need to hope that the VCF parser correctly handles embedded commas and semicolons in the description."))),(0,i.kt)("h3",{id:"what-do-other-annotators-use"},"What do other annotators use?"),(0,i.kt)("p",null,"Unfortunately, file format standardization has not made it all the way to variant annotation yet. The ",(0,i.kt)("a",{parentName:"p",href:"https://ga4gh-gks.github.io/variant_annotation.html"},"GA4GH Annotation group")," had many discussions on the topic several years ago. While a set of JSON schemas were created in that effort, there wasn't enough momentum to make this a new standard."),(0,i.kt)("p",null,"While there is some overlap in general file formats (JSON vs VCF vs TSV), none of those are compatible with each other. I.e. the VCF representation in VEP and snpEff is different just like the JSON schemas used by VEP, Nirvana, and GA4GH are different."),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Source"),(0,i.kt)("th",{parentName:"tr",align:null},"Formats"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"VEP"),(0,i.kt)("td",{parentName:"tr",align:null},(0,i.kt)("strong",{parentName:"td"},"JSON"),", TSV, VCF")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"snpEff"),(0,i.kt)("td",{parentName:"tr",align:null},"VCF")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"Annovar"),(0,i.kt)("td",{parentName:"tr",align:null},"TSV")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"Nirvana"),(0,i.kt)("td",{parentName:"tr",align:null},(0,i.kt)("strong",{parentName:"td"},"JSON"))),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"GA4GH"),(0,i.kt)("td",{parentName:"tr",align:null},(0,i.kt)("strong",{parentName:"td"},"JSON"))))),(0,i.kt)("p",null,"We are interested in working together with others in the annotation space to develop a common annotation file format. Our belief is that this would accelerate methods development and benchmarking activities within annotation much in the same way the creation of SAM/BAM & VCF/BCF accelerated secondary analysis development."),(0,i.kt)("h3",{id:"what-do-we-gain-by-using-json"},"What do we gain by using JSON?"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"JSON files are better at showing hierarchical and other relational data. For example when we output ClinVar data, we often want to output several overlapping RCV entries (variants coupled with a disease phenotype). In each, we would want to output a list of phenotypes, clinical significance, etc. That is difficult to accomplish in a human-readable way using VCF files (without resorting to growing lexicon of delimiters)."),(0,i.kt)("li",{parentName:"ul"},"JSON files use JavaScript data types, while VCF INFO fields don't directly have data types. Instead, external metadata located in the VCF header is required to indicated the preferred data type."),(0,i.kt)("li",{parentName:"ul"},"JSON files are more verbose. Often this is seen as a negative, but compression largely compensates for this. Given the following excerpt from the VCF example above ",(0,i.kt)("inlineCode",{parentName:"li"},"HGNC:27184|||5|||||||||Ensembl")," it's not immediately obvious what the ",(0,i.kt)("inlineCode",{parentName:"li"},"5")," refers to (without checking the VCF header for details). With JSON files, you would always see a key name associated with a value."),(0,i.kt)("li",{parentName:"ul"},"JSON files can be natively imported into different search and analytics solutions like Elasticsearch and Snowflake."),(0,i.kt)("li",{parentName:"ul"},"JSON strings do not have any limitations on the use of whitespace.")),(0,i.kt)("h2",{id:"parsing-json"},"Parsing JSON"),(0,i.kt)("p",null,"Our JSON files are organized similarly to original VCF variants:"),(0,i.kt)("p",null,(0,i.kt)("img",{src:t(65709).Z})),(0,i.kt)("p",null,"Nirvana JSON files can get very large and sometimes we receive feedback that a bioinformatician tried to read the JSON file into Python or R resulting in a program that ran out of available RAM. This happens because those parsers try to load everything into memory all at once."),(0,i.kt)("p",null,"To get around those issues, we play some clever tricks with newlines that enables our users to parse our JSON files quickly and efficiently."),(0,i.kt)("h3",{id:"organization"},"Organization"),(0,i.kt)("p",null,"Our JSON file is arranged as follows:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the header section is located on the first line"),(0,i.kt)("li",{parentName:"ul"},"each line after that corresponds to a position (same as a row in a VCF file)",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"until you reach the genes section ",(0,i.kt)("inlineCode",{parentName:"li"},'],"genes":[')))),(0,i.kt)("li",{parentName:"ul"},"each line after that corresponds to a gene",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"until you reach the end ",(0,i.kt)("inlineCode",{parentName:"li"},"]}"))))),(0,i.kt)("p",null,"Knowing this, you can load each position line as an independent JSON object and extract the information you need. "),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Jupyter Notebook")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"To demonstrate this, we have put together a ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/Illumina/NirvanaDocumentation/blob/master/static/files/parse-nirvana-json-python.ipynb"},"Jupyter notebook demonstrating how to do this in Python")," and a ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/Illumina/NirvanaDocumentation/blob/master/static/files/parse-nirvana-json-r.ipynb"},"R version")," as well."))),(0,i.kt)("h3",{id:"jasix"},"JASIX"),(0,i.kt)("p",null,"One of the tools that we really like in the VCF ecosystem is ",(0,i.kt)("a",{parentName:"p",href:"https://dx.doi.org/10.1093%2Fbioinformatics%2Fbtq671"},"tabix"),". Unfortunately, tabix only works for tab-delimited file formats. As a result, we created a similar tool for Nirvana JSON files called JASIX."),(0,i.kt)("p",null,"Here's an example of how you might use JASIX:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/Jasix.dll -i dragen.json.gz -q chr1:942450-942455\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the Nirvana JSON path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-q")," argument specifies a genomic range ",(0,i.kt)("em",{parentName:"li"},"(you can use as many of these as you want)"))),(0,i.kt)("p",null,"JASIX also includes additional options for showing the Nirvana header or for extracting different sections (like the genes section)."),(0,i.kt)("p",null,"The output from JASIX is compliant JSON object shown in pretty-printed form:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{"positions":[\n{\n "chromosome": "chr1",\n "position": 942451,\n "refAllele": "T",\n "altAlleles": [\n "C"\n ],\n "quality": 484.23,\n "filters": [\n "PASS"\n ],\n "cytogeneticBand": "1p36.33",\n "samples": [\n {\n "genotype": "1/1",\n "variantFrequencies": [\n 1\n ],\n "totalDepth": 21,\n "genotypeQuality": 60,\n "alleleDepths": [\n 0,\n 21\n ]\n },\n {\n "genotype": "1/1",\n "variantFrequencies": [\n 1\n ],\n "totalDepth": 32,\n "genotypeQuality": 93,\n "alleleDepths": [\n 0,\n 32\n ]\n },\n {\n "genotype": "1/1",\n "variantFrequencies": [\n 1\n ],\n "totalDepth": 36,\n "genotypeQuality": 105,\n "alleleDepths": [\n 0,\n 36\n ]\n }\n ],\n "variants": [\n {\n "vid": "1-942451-T-C",\n "chromosome": "chr1",\n "begin": 942451,\n "end": 942451,\n "refAllele": "T",\n "altAllele": "C",\n "variantType": "SNV",\n "hgvsg": "NC_000001.11:g.942451T>C",\n "phylopScore": -0.1,\n "clinvar": [\n {\n "id": "VCV000836156.1",\n "reviewStatus": "criteria provided, single submitter",\n "significance": [\n "uncertain significance"\n ],\n "refAllele": "T",\n "altAllele": "T",\n "lastUpdatedDate": "2020-08-20"\n },\n {\n "id": "RCV001037211.1",\n "variationId": 836156,\n "reviewStatus": "criteria provided, single submitter",\n "alleleOrigins": [\n "germline"\n ],\n "refAllele": "T",\n "altAllele": "T",\n "phenotypes": [\n "not provided"\n ],\n "medGenIds": [\n "CN517202"\n ],\n "significance": [\n "uncertain significance"\n ],\n "lastUpdatedDate": "2020-08-20",\n "pubMedIds": [\n "28492532"\n ]\n }\n ],\n "dbsnp": [\n "rs6672356"\n ],\n "gnomad": {\n "coverage": 25,\n "allAf": 0.999855,\n "allAn": 123742,\n "allAc": 123724,\n "allHc": 61853,\n "afrAf": 0.999416,\n "afrAn": 10278,\n "afrAc": 10272,\n "afrHc": 5133,\n "amrAf": 0.99995,\n "amrAn": 20008,\n "amrAc": 20007,\n "amrHc": 10003,\n "easAf": 1,\n "easAn": 6054,\n "easAc": 6054,\n "easHc": 3027,\n "finAf": 1,\n "finAn": 8696,\n "finAc": 8696,\n "finHc": 4348,\n "nfeAf": 0.999899,\n "nfeAn": 49590,\n "nfeAc": 49585,\n "nfeHc": 24790,\n "asjAf": 1,\n "asjAn": 7208,\n "asjAc": 7208,\n "asjHc": 3604,\n "sasAf": 0.99967,\n "sasAn": 18160,\n "sasAc": 18154,\n "sasHc": 9074,\n "othAf": 1,\n "othAn": 3748,\n "othAc": 3748,\n "othHc": 1874,\n "maleAf": 0.9999,\n "maleAn": 69780,\n "maleAc": 69773,\n "maleHc": 34883,\n "femaleAf": 0.999796,\n "femaleAn": 53962,\n "femaleAc": 53951,\n "femaleHc": 26970,\n "controlsAllAf": 0.999815,\n "controlsAllAn": 48654,\n "controlsAllAc": 48645\n },\n "oneKg": {\n "allAf": 1,\n "afrAf": 1,\n "amrAf": 1,\n "easAf": 1,\n "eurAf": 1,\n "sasAf": 1,\n "allAn": 5008,\n "afrAn": 1322,\n "amrAn": 694,\n "easAn": 1008,\n "eurAn": 1006,\n "sasAn": 978,\n "allAc": 5008,\n "afrAc": 1322,\n "amrAc": 694,\n "easAc": 1008,\n "eurAc": 1006,\n "sasAc": 978\n },\n "primateAI": [\n {\n "hgnc": "SAMD11",\n "scorePercentile": 0.87\n }\n ],\n "revel": {\n "score": 0.145\n },\n "topmed": {\n "allAf": 0.999809,\n "allAn": 125568,\n "allAc": 125544,\n "allHc": 62760\n },\n "transcripts": [\n {\n "transcript": "ENST00000420190.6",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "downstream_gene_variant"\n ],\n "proteinId": "ENSP00000411579.2"\n },\n {\n "transcript": "ENST00000342066.7",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "1110",\n "cdsPos": "1027",\n "exons": "10/14",\n "proteinPos": "343",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000342066.7:c.1027T>C",\n "hgvsp": "ENSP00000342313.3:p.(Trp343Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000342313.3",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000618181.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "732",\n "cdsPos": "652",\n "exons": "7/11",\n "proteinPos": "218",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000618181.4:c.652T>C",\n "hgvsp": "ENSP00000480870.1:p.(Trp218Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000480870.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000622503.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "1110",\n "cdsPos": "1030",\n "exons": "10/14",\n "proteinPos": "344",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000622503.4:c.1030T>C",\n "hgvsp": "ENSP00000482138.1:p.(Trp344Arg)",\n "isCanonical": true,\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000482138.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000618323.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "cTg/cCg",\n "aminoAcids": "L/P",\n "cdnaPos": "712",\n "cdsPos": "632",\n "exons": "8/12",\n "proteinPos": "211",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000618323.4:c.632T>C",\n "hgvsp": "ENSP00000480678.1:p.(Leu211Pro)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "unknown",\n "proteinId": "ENSP00000480678.1",\n "siftScore": 0.03,\n "siftPrediction": "deleterious - low confidence"\n },\n {\n "transcript": "ENST00000616016.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "ccT/ccC",\n "aminoAcids": "P",\n "cdnaPos": "944",\n "cdsPos": "864",\n "exons": "9/13",\n "proteinPos": "288",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "synonymous_variant"\n ],\n "hgvsc": "ENST00000616016.4:c.864T>C",\n "hgvsp": "ENST00000616016.4:c.864T>C(p.(Pro288=))",\n "proteinId": "ENSP00000478421.1"\n },\n {\n "transcript": "ENST00000618779.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "921",\n "cdsPos": "841",\n "exons": "9/13",\n "proteinPos": "281",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000618779.4:c.841T>C",\n "hgvsp": "ENSP00000484256.1:p.(Trp281Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000484256.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000616125.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "783",\n "cdsPos": "703",\n "exons": "8/12",\n "proteinPos": "235",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000616125.4:c.703T>C",\n "hgvsp": "ENSP00000484643.1:p.(Trp235Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000484643.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000620200.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "cTg/cCg",\n "aminoAcids": "L/P",\n "cdnaPos": "427",\n "cdsPos": "347",\n "exons": "5/9",\n "proteinPos": "116",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000620200.4:c.347T>C",\n "hgvsp": "ENSP00000484820.1:p.(Leu116Pro)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "unknown",\n "proteinId": "ENSP00000484820.1",\n "siftScore": 0.16,\n "siftPrediction": "tolerated - low confidence"\n },\n {\n "transcript": "ENST00000617307.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "867",\n "cdsPos": "787",\n "exons": "9/13",\n "proteinPos": "263",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000617307.4:c.787T>C",\n "hgvsp": "ENSP00000482090.1:p.(Trp263Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000482090.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "NM_152486.2",\n "source": "RefSeq",\n "bioType": "protein_coding",\n "codons": "Cgg/Cgg",\n "aminoAcids": "R",\n "cdnaPos": "1107",\n "cdsPos": "1027",\n "exons": "10/14",\n "proteinPos": "343",\n "geneId": "148398",\n "hgnc": "SAMD11",\n "consequence": [\n "synonymous_variant"\n ],\n "hgvsc": "NM_152486.2:c.1027T>C",\n "hgvsp": "NM_152486.2:c.1027T>C(p.(Arg343=))",\n "isCanonical": true,\n "proteinId": "NP_689699.2"\n },\n {\n "transcript": "ENST00000341065.8",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "750",\n "cdsPos": "751",\n "exons": "8/12",\n "proteinPos": "251",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000341065.8:c.750T>C",\n "hgvsp": "ENSP00000349216.4:p.(Trp251Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000349216.4",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000455979.1",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "507",\n "cdsPos": "508",\n "exons": "4/7",\n "proteinPos": "170",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000455979.1:c.507T>C",\n "hgvsp": "ENSP00000412228.1:p.(Trp170Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000412228.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000478729.1",\n "source": "Ensembl",\n "bioType": "processed_transcript",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "downstream_gene_variant"\n ]\n },\n {\n "transcript": "ENST00000474461.1",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "cdnaPos": "389",\n "exons": "3/4",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "non_coding_transcript_exon_variant"\n ],\n "hgvsc": "ENST00000474461.1:n.389T>C"\n },\n {\n "transcript": "ENST00000466827.1",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "cdnaPos": "191",\n "exons": "2/2",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "non_coding_transcript_exon_variant"\n ],\n "hgvsc": "ENST00000466827.1:n.191T>C"\n },\n {\n "transcript": "ENST00000464948.1",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "cdnaPos": "286",\n "exons": "1/2",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "non_coding_transcript_exon_variant"\n ],\n "hgvsc": "ENST00000464948.1:n.286T>C"\n },\n {\n "transcript": "NM_015658.3",\n "source": "RefSeq",\n "bioType": "protein_coding",\n "geneId": "26155",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ],\n "isCanonical": true,\n "proteinId": "NP_056473.2"\n },\n {\n "transcript": "ENST00000483767.5",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "geneId": "ENSG00000188976",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ]\n },\n {\n "transcript": "ENST00000327044.6",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "geneId": "ENSG00000188976",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ],\n "isCanonical": true,\n "proteinId": "ENSP00000317992.6"\n },\n {\n "transcript": "ENST00000477976.5",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "geneId": "ENSG00000188976",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ]\n },\n {\n "transcript": "ENST00000496938.1",\n "source": "Ensembl",\n "bioType": "processed_transcript",\n "geneId": "ENSG00000188976",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ]\n }\n ]\n }\n ]\n}\n]}\n')))}p.isMDXComponent=!0},65709:(n,e,t)=>{t.d(e,{Z:()=>a});const a=t.p+"assets/images/JSON-Layout-fc8e5c0cf4c8428981cd206fe9b6feac.svg"}}]); \ No newline at end of file diff --git a/assets/js/5062e356.54735591.js b/assets/js/5062e356.54735591.js deleted file mode 100644 index 87f2d37d..00000000 --- a/assets/js/5062e356.54735591.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1888,880],{3905:(e,t,n)=>{n.d(t,{Zo:()=>u,kt:()=>h});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},u=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},c="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,u=l(e,["components","mdxType","originalType","parentName"]),c=d(n),m=r,h=c["".concat(s,".").concat(m)]||c[m]||p[m]||o;return n?a.createElement(h,i(i({ref:t},u),{},{components:n})):a.createElement(h,i({ref:t},u))}));function h(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[c]="string"==typeof e?e:r,i[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>c,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/gerp-json",id:"version-3.18/data-sources/gerp-json",title:"gerp-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/gerp-json.md",sourceDirName:"data-sources",slug:"/data-sources/gerp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gerp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/gerp-json.md",tags:[],version:"3.18",frontMatter:{}},s=[],d={toc:s},u="wrapper";function c(e){let{components:t,...n}=e;return(0,r.kt)(u,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gerpScore": 1.27\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"gerpScore"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: -\u221e to +\u221e")))))}c.isMDXComponent=!0},84614:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>p,frontMatter:()=>i,metadata:()=>s,toc:()=>d});var a=n(87462),r=(n(67294),n(3905)),o=n(61201);const i={title:"GERP"},l=void 0,s={unversionedId:"data-sources/gerp",id:"version-3.18/data-sources/gerp",title:"GERP",description:"Overview",source:"@site/versioned_docs/version-3.18/data-sources/gerp.mdx",sourceDirName:"data-sources",slug:"/data-sources/gerp",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gerp",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/gerp.mdx",tags:[],version:"3.18",frontMatter:{title:"GERP"},sidebar:"docs",previous:{title:"FusionCatcher",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/fusioncatcher"},next:{title:"GME Variome",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gme"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"Source Files",id:"source-files",children:[{value:"Example GRCh37",id:"example-grch37",children:[],level:3},{value:"Example GRCh38",id:"example-grch38",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[{value:"GRCh37",id:"grch37",children:[],level:3},{value:"GRCh38",id:"grch38",children:[],level:3}],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],u={toc:d},c="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"GERP identifies constrained elements in multiple alignments by quantifying substitution deficits.\nThese deficits represent substitutions that would have occurred if the element were neutral DNA, but did not occur because the element has been under functional constraint (Rejected Substitutions).\nNirvana uses GERP++ which is based on a significantly faster and more statistically robust maximum likelihood estimation procedure to compute expected rates of evolution."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},'Davydov, Eugene V., et al. "Identifying a high fraction of the human genome to be under selective constraint using GERP++." ',(0,r.kt)("em",{parentName:"p"},"PLoS computational biology")," ",(0,r.kt)("strong",{parentName:"p"},"6.12")," e1001025 (2010). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1371/journal.pcbi.1001025"},"https://doi.org/10.1371/journal.pcbi.1001025")))),(0,r.kt)("h2",{id:"source-files"},"Source Files"),(0,r.kt)("h3",{id:"example-grch37"},"Example GRCh37"),(0,r.kt)("p",null,"GRCh37 file is a TSV format"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-tsv"},"chr position GERP\n1 12177 0.83\n1 12178 -0.206\n1 12179 -0.492\n1 12180 -1.66\n1 12181 0.83\n1 12182 0.83\n1 12183 -0.417\n1 12184 0.83\n")),(0,r.kt)("h3",{id:"example-grch38"},"Example GRCh38"),(0,r.kt)("p",null,"GRCh38 file is a lift-over BED format"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-tsv"},"chr pos_start pos_end GERP\n1 12646 12647 0.298\n1 12647 12648 2.63\n1 12648 12649 1.87\n1 12649 12650 0.252\n1 12650 12651 -2.06\n1 12651 12652 2.61\n1 12652 12653 3.97\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the CSV file, we are interested in columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"chr")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"position")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"GERP"))),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("p",null,"None"),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("h3",{id:"grch37"},"GRCh37"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html"},"http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html")),(0,r.kt)("h3",{id:"grch38"},"GRCh38"),(0,r.kt)("p",null,"The data is not available for GRCh38 on GERP++ website, and was obtained from ",(0,r.kt)("a",{parentName:"p",href:"https://personal.broadinstitute.org/konradk/loftee_data/GRCh38/"},"https://personal.broadinstitute.org/konradk/loftee_data/GRCh38/")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(o.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/51c1c517.05550088.js b/assets/js/51c1c517.05550088.js deleted file mode 100644 index 4f07e546..00000000 --- a/assets/js/51c1c517.05550088.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1946],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function s(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var i=r.createContext({}),l=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):s(s({},t),e)),n},p=function(e){var t=l(e.components);return r.createElement(i.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,p=c(e,["components","mdxType","originalType","parentName"]),u=l(n),m=a,f=u["".concat(i,".").concat(m)]||u[m]||d[m]||o;return n?r.createElement(f,s(s({ref:t},p),{},{components:n})):r.createElement(f,s({ref:t},p))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,s=new Array(o);s[0]=m;var c={};for(var i in t)hasOwnProperty.call(t,i)&&(c[i]=t[i]);c.originalType=e,c[u]="string"==typeof e?e:a,s[1]=c;for(var l=2;l{n.r(t),n.d(t,{contentTitle:()=>s,default:()=>u,frontMatter:()=>o,metadata:()=>c,toc:()=>i});var r=n(87462),a=(n(67294),n(3905));const o={},s=void 0,c={unversionedId:"data-sources/dbsnp-json",id:"version-3.14/data-sources/dbsnp-json",title:"dbsnp-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/dbsnp-json.md",sourceDirName:"data-sources",slug:"/data-sources/dbsnp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/dbsnp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/dbsnp-json.md",tags:[],version:"3.14",frontMatter:{}},i=[],l={toc:i},p="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},l,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"dbsnp":[\n "rs1042821"\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"dbsnp"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"dbSNP rsIDs")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/51e9fa1e.d53a3e94.js b/assets/js/51e9fa1e.d53a3e94.js deleted file mode 100644 index ee8fb505..00000000 --- a/assets/js/51e9fa1e.d53a3e94.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5348],{3905:(e,n,t)=>{t.d(n,{Zo:()=>u,kt:()=>h});var a=t(67294);function i(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function o(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function r(e){for(var n=1;n=0||(i[t]=e[t]);return i}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(i[t]=e[t])}return i}var s=a.createContext({}),c=function(e){var n=a.useContext(s),t=n;return e&&(t="function"==typeof e?e(n):r(r({},n),e)),t},u=function(e){var n=c(e.components);return a.createElement(s.Provider,{value:n},e.children)},p="mdxType",d={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},m=a.forwardRef((function(e,n){var t=e.components,i=e.mdxType,o=e.originalType,s=e.parentName,u=l(e,["components","mdxType","originalType","parentName"]),p=c(t),m=i,h=p["".concat(s,".").concat(m)]||p[m]||d[m]||o;return t?a.createElement(h,r(r({ref:n},u),{},{components:t})):a.createElement(h,r({ref:n},u))}));function h(e,n){var t=arguments,i=n&&n.mdxType;if("string"==typeof e||i){var o=t.length,r=new Array(o);r[0]=m;var l={};for(var s in n)hasOwnProperty.call(n,s)&&(l[s]=n[s]);l.originalType=e,l[p]="string"==typeof e?e:i,r[1]=l;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>r,default:()=>p,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=t(87462),i=(t(67294),t(3905));const o={title:"Annotating COVID-19"},r=void 0,l={unversionedId:"introduction/covid19",id:"version-3.21/introduction/covid19",title:"Annotating COVID-19",description:"The Nirvana development team is mainly focused on providing annotations for the human genome. This focus allows us to maximize our resources towards understanding human health.",source:"@site/versioned_docs/version-3.21/introduction/covid19.md",sourceDirName:"introduction",slug:"/introduction/covid19",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/introduction/covid19",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/introduction/covid19.md",tags:[],version:"3.21",frontMatter:{title:"Annotating COVID-19"},sidebar:"docs",previous:{title:"Parsing Nirvana JSON",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/introduction/parsing-json"},next:{title:"1000 Genomes",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/1000Genomes"}},s=[{value:"Getting Nirvana",id:"getting-nirvana",children:[],level:2},{value:"Downloading the COVID-19 data files",id:"downloading-the-covid-19-data-files",children:[],level:2},{value:"Download a COVID-19 VCF file",id:"download-a-covid-19-vcf-file",children:[],level:2},{value:"Running Nirvana",id:"running-nirvana",children:[],level:2},{value:"Investigating the Results",id:"investigating-the-results",children:[],level:2}],c={toc:s},u="wrapper";function p(e){let{components:n,...t}=e;return(0,i.kt)(u,(0,a.Z)({},c,t,{components:n,mdxType:"MDXLayout"}),(0,i.kt)("p",null,"The Nirvana development team is mainly focused on providing annotations for the human genome. This focus allows us to maximize our resources towards understanding human health."),(0,i.kt)("p",null,"However, nothing in our architecture prevents us from supporting other genomes. Earlier this year, we had an opportunity to put that statement to the test - we added support for annotating the ",(0,i.kt)("strong",{parentName:"p"},"SARS-CoV-2")," genome, the virus that causes the ",(0,i.kt)("strong",{parentName:"p"},"COVID-19")," disease."),(0,i.kt)("p",null,"In addition to normal transcript annotation, we also supply:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"allele frequencies"),(0,i.kt)("li",{parentName:"ul"},"protein domains")),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"SARS-CoV-2 Galaxy Project")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The allele frequencies used by Nirvana were provided by the ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/galaxyproject/SARS-CoV-2"},"SARS-CoV-2 Galaxy Project"),". This is an international effort that provides ongoing analysis of COVID-19 using Galaxy, BioConda, and public research infrastructures."))),(0,i.kt)("h2",{id:"getting-nirvana"},"Getting Nirvana"),(0,i.kt)("p",null,"If you don't have Nirvana already, please consult our ",(0,i.kt)("a",{parentName:"p",href:"getting-started"},"Getting Started")," page first."),(0,i.kt)("h2",{id:"downloading-the-covid-19-data-files"},"Downloading the COVID-19 data files"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/Covid19Data.zip"},"a data zip file")," containing new gene models, reference, and external data sources for SARS-CoV-2:"),(0,i.kt)("p",null,"Just go to the directory that contains your Nirvana ",(0,i.kt)("inlineCode",{parentName:"p"},"Data")," directory."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"cd ~/Nirvana\ncurl -O https://illumina.github.io/NirvanaDocumentation/files/Covid19Data.zip\nunzip Covid19Data.zip\n")),(0,i.kt)("h2",{id:"download-a-covid-19-vcf-file"},"Download a COVID-19 VCF file"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/Covid19Mutations.vcf.gz"},"a COVID-19 VCF file")," you can play around with:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"curl -O https://illumina.github.io/NirvanaDocumentation/files/Covid19Mutations.vcf.gz\n")),(0,i.kt)("h2",{id:"running-nirvana"},"Running Nirvana"),(0,i.kt)("p",null,"Once you have downloaded the data sets, use the following command to annotate your VCF:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/Nirvana.dll \\\n -c Data/Cache/SARS-CoV-2/SARS-CoV-2 \\\n --sd Data/SupplementaryAnnotation/SARS-CoV-2 \\\n -r Data/References/SARS-CoV-2.ASM985889v3.dat \\\n -i Covid19Mutations.vcf.gz \\\n -o Covid19Mutations\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-c")," argument specifies the cache prefix"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"--sd")," argument specifies the supplementary annotation directory"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-r")," argument specifies the compressed reference path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input VCF path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output filename prefix")),(0,i.kt)("p",null,"When running Nirvana, performance metrics are shown as it evaluates each chromosome in the input VCF file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"---------------------------------------------------------------------------\nNirvana (c) 2020 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0\n---------------------------------------------------------------------------\n\nInitialization Time Positions/s\n---------------------------------------------------------------------------\nCache 00:00:00.0\nSA Position Scan 00:00:00.0 1763\n\nReference Preload Annotation Variants/s\n---------------------------------------------------------------------------\nNC_045512 00:00:00.0 00:00:00.1 173\n\nSummary Time Percent\n---------------------------------------------------------------------------\nInitialization 00:00:00.0 2.0 %\nPreload 00:00:00.0 0.3 %\nAnnotation 00:00:00.1 6.0 %\n\nTime: 00:00:01.5\n")),(0,i.kt)("p",null,"The output will be a JSON file called ",(0,i.kt)("inlineCode",{parentName:"p"},"Covid19Mutations.json.gz"),". Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/Covid19Mutations.json.gz"},"the full JSON file"),"."),(0,i.kt)("h2",{id:"investigating-the-results"},"Investigating the Results"),(0,i.kt)("p",null,"Here's an example of what a COVID-19 variant looks like in the JSON output:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{\n "chromosome":"NC_045512.2",\n "position":27323,\n "refAllele":"C",\n "altAlleles":[\n "T"\n ],\n "filters":[\n "PASS"\n ],\n "proteinDomains":[\n {\n "start":27202,\n "end":27384,\n "proteinId":"YP_009724394.1",\n "domainId":"cl13556",\n "domainName":"Sars6 super family",\n "reciprocalOverlap":0.00546,\n "annotationOverlap":0.00546\n }\n ],\n "variants":[\n {\n "vid":"NC_045512.2-27323-C-T",\n "chromosome":"NC_045512.2",\n "begin":27323,\n "end":27323,\n "refAllele":"C",\n "altAllele":"T",\n "variantType":"SNV",\n "hgvsg":"NC_045512.2:g.27323C>T",\n "alleleFrequency":{\n "refAllele":"C",\n "altAllele":"T",\n "allAc":8,\n "allAn":1058,\n "allAf":0.007561\n },\n "transcripts":[\n {\n "transcript":"YP_009724394.1",\n "source":"RefSeq",\n "bioType":"protein_coding",\n "codons":"tCt/tTt",\n "aminoAcids":"S/F",\n "cdnaPos":"122",\n "cdsPos":"122",\n "exons":"1/1",\n "proteinPos":"41",\n "geneId":"43740572",\n "hgnc":"ORF6",\n "consequence":[\n "missense_variant"\n ],\n "hgvsc":"YP_009724394.1:c.122C>T",\n "hgvsp":"YP_009724394.1:p.(Ser41Phe)",\n "proteinId":"YP_009724394.1"\n },\n {\n "transcript":"YP_009724395.1",\n "source":"RefSeq",\n "bioType":"protein_coding",\n "geneId":"43740573",\n "hgnc":"ORF7a",\n "consequence":[\n "upstream_gene_variant"\n ],\n "proteinId":"YP_009724395.1"\n }\n ]\n }\n ]\n}\n')))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/51ec9460.885958e4.js b/assets/js/51ec9460.885958e4.js deleted file mode 100644 index e62d9f16..00000000 --- a/assets/js/51ec9460.885958e4.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5697,4246],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>f});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),p=d(n),m=r,f=p["".concat(s,".").concat(m)]||p[m]||u[m]||o;return n?a.createElement(f,i(i({ref:t},c),{},{components:n})):a.createElement(f,i({ref:t},c))}));function f(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[p]="string"==typeof e?e:r,i[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>p,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/topmed-json",id:"data-sources/topmed-json",title:"topmed-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/topmed-json.md",sourceDirName:"data-sources",slug:"/data-sources/topmed-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/topmed-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/topmed-json.md",tags:[],version:"current",frontMatter:{}},s=[],d={toc:s},c="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"topmed":{ \n "allAc":20,\n "allAn":125568,\n "allAf":0.000159,\n "allHc":0,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele number. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele frequency (computed by Illumina Connected Annotations)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed homozygous count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}p.isMDXComponent=!0},26891:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>i,metadata:()=>s,toc:()=>d});var a=n(87462),r=(n(67294),n(3905)),o=n(49819);const i={title:"TOPMed"},l=void 0,s={unversionedId:"data-sources/topmed",id:"data-sources/topmed",title:"TOPMed",description:"Overview",source:"@site/docs/data-sources/topmed.mdx",sourceDirName:"data-sources",slug:"/data-sources/topmed",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/topmed",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/topmed.mdx",tags:[],version:"current",frontMatter:{title:"TOPMed"},sidebar:"docs",previous:{title:"Splice AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/splice-ai"},next:{title:"Illumina Connected Annotations JSON File Format",permalink:"/IlluminaConnectedAnnotationsDocumentation/file-formats/illumina-annotator-json-file-format"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"VCF extraction",id:"vcf-extraction",children:[],level:2},{value:"GRCh37 liftover",id:"grch37-liftover",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON output",id:"json-output",children:[],level:2}],c={toc:d},p="wrapper";function u(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"The ",(0,r.kt)("a",{parentName:"p",href:"https://www.nhlbi.nih.gov/science/trans-omics-precision-medicine-topmed-program"},"Trans-Omics for Precision Medicine")," (TOPMed) program, sponsored by the National Institutes of Health (NIH) National Heart, Lung and Blood Institute (NHLBI), is part of a broader Precision Medicine Initiative, which aims to provide disease treatments tailored to an individual\u2019s unique genes and environment. TOPMed contributes to this Initiative through the integration of whole-genome sequencing (WGS) and other omics (e.g., metabolic profiles, epigenomics, protein and RNA expression patterns) data with molecular, behavioral, imaging, environmental, and clinical data."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Kowalski, M.H., Qian, H., Hou, Z., Rosen, J.D., Tapia, A.L., Shan, Y., Jain, D., Argos, M., Arnett, D.K., Avery, C. and Barnes, K.C., 2019. Use of> 100,000 NHLBI Trans-Omics for Precision Medicine (TOPMed) Consortium whole genome sequences improves imputation quality and detection of rare variant associations in admixed African and Hispanic/Latino populations. ",(0,r.kt)("em",{parentName:"p"},"PLoS genetics"),", ",(0,r.kt)("strong",{parentName:"p"},"15(12)"),", p.e1008500."))),(0,r.kt)("h2",{id:"vcf-extraction"},"VCF extraction"),(0,r.kt)("p",null,"We currently extract the following fields from TOPMed VCF file:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},'##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n')),(0,r.kt)("p",null,"Example:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 10132 TOPMed_freeze_5?chr1:10,132 T C 255 SVM VRT=1;NS=62784;AN=125568;AC=32;AF=0.000254842;Het=32;Hom=0 NA:FRQ 125568:0.000254842\n")),(0,r.kt)("h2",{id:"grch37-liftover"},"GRCh37 liftover"),(0,r.kt)("p",null,"The data is not available for GRCh37 on TOPMed website. We performed a liftover from GRCh38 to GRCh37 using dbSNP ids."),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://bravo.sph.umich.edu/freeze5/hg38/download"},"https://bravo.sph.umich.edu/freeze5/hg38/download")),(0,r.kt)("h2",{id:"json-output"},"JSON output"),(0,r.kt)(o.default,{mdxType:"JSON"}))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/51ec9460.a8efbe38.js b/assets/js/51ec9460.a8efbe38.js new file mode 100644 index 00000000..ba3452a9 --- /dev/null +++ b/assets/js/51ec9460.a8efbe38.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5697,4246],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>f});var a=n(7294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),p=d(n),m=r,f=p["".concat(s,".").concat(m)]||p[m]||u[m]||o;return n?a.createElement(f,i(i({ref:t},c),{},{components:n})):a.createElement(f,i({ref:t},c))}));function f(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[p]="string"==typeof e?e:r,i[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>p,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(7462),r=(n(7294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/topmed-json",id:"data-sources/topmed-json",title:"topmed-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/topmed-json.md",sourceDirName:"data-sources",slug:"/data-sources/topmed-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/topmed-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/topmed-json.md",tags:[],version:"current",frontMatter:{}},s=[],d={toc:s},c="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"topmed":{ \n "allAc":20,\n "allAn":125568,\n "allAf":0.000159,\n "allHc":0,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele number. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele frequency (computed by Illumina Connected Annotations)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed homozygous count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}p.isMDXComponent=!0},6891:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>i,metadata:()=>s,toc:()=>d});var a=n(7462),r=(n(7294),n(3905)),o=n(9819);const i={title:"TOPMed"},l=void 0,s={unversionedId:"data-sources/topmed",id:"data-sources/topmed",title:"TOPMed",description:"Overview",source:"@site/docs/data-sources/topmed.mdx",sourceDirName:"data-sources",slug:"/data-sources/topmed",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/topmed",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/topmed.mdx",tags:[],version:"current",frontMatter:{title:"TOPMed"},sidebar:"docs",previous:{title:"Splice AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/splice-ai"},next:{title:"Illumina Connected Annotations JSON File Format",permalink:"/IlluminaConnectedAnnotationsDocumentation/file-formats/illumina-annotator-json-file-format"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"VCF extraction",id:"vcf-extraction",children:[],level:2},{value:"GRCh37 liftover",id:"grch37-liftover",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON output",id:"json-output",children:[],level:2}],c={toc:d},p="wrapper";function u(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"The ",(0,r.kt)("a",{parentName:"p",href:"https://www.nhlbi.nih.gov/science/trans-omics-precision-medicine-topmed-program"},"Trans-Omics for Precision Medicine")," (TOPMed) program, sponsored by the National Institutes of Health (NIH) National Heart, Lung and Blood Institute (NHLBI), is part of a broader Precision Medicine Initiative, which aims to provide disease treatments tailored to an individual\u2019s unique genes and environment. TOPMed contributes to this Initiative through the integration of whole-genome sequencing (WGS) and other omics (e.g., metabolic profiles, epigenomics, protein and RNA expression patterns) data with molecular, behavioral, imaging, environmental, and clinical data."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Kowalski, M.H., Qian, H., Hou, Z., Rosen, J.D., Tapia, A.L., Shan, Y., Jain, D., Argos, M., Arnett, D.K., Avery, C. and Barnes, K.C., 2019. Use of> 100,000 NHLBI Trans-Omics for Precision Medicine (TOPMed) Consortium whole genome sequences improves imputation quality and detection of rare variant associations in admixed African and Hispanic/Latino populations. ",(0,r.kt)("em",{parentName:"p"},"PLoS genetics"),", ",(0,r.kt)("strong",{parentName:"p"},"15(12)"),", p.e1008500."))),(0,r.kt)("h2",{id:"vcf-extraction"},"VCF extraction"),(0,r.kt)("p",null,"We currently extract the following fields from TOPMed VCF file:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},'##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n')),(0,r.kt)("p",null,"Example:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 10132 TOPMed_freeze_5?chr1:10,132 T C 255 SVM VRT=1;NS=62784;AN=125568;AC=32;AF=0.000254842;Het=32;Hom=0 NA:FRQ 125568:0.000254842\n")),(0,r.kt)("h2",{id:"grch37-liftover"},"GRCh37 liftover"),(0,r.kt)("p",null,"The data is not available for GRCh37 on TOPMed website. We performed a liftover from GRCh38 to GRCh37 using dbSNP ids."),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://bravo.sph.umich.edu/freeze5/hg38/download"},"https://bravo.sph.umich.edu/freeze5/hg38/download")),(0,r.kt)("h2",{id:"json-output"},"JSON output"),(0,r.kt)(o.default,{mdxType:"JSON"}))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/5357ef3e.c55d3d97.js b/assets/js/5357ef3e.c55d3d97.js deleted file mode 100644 index 066aa283..00000000 --- a/assets/js/5357ef3e.c55d3d97.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7370,5146],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>D});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var p=a.createContext({}),s=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},c=function(e){var t=s(e.components);return a.createElement(p.Provider,{value:t},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,p=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),d=s(n),u=r,D=d["".concat(p,".").concat(u)]||d[u]||m[u]||i;return n?a.createElement(D,o(o({ref:t},c),{},{components:n})):a.createElement(D,o({ref:t},c))}));function D(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var p in t)hasOwnProperty.call(t,p)&&(l[p]=t[p]);l.originalType=e,l[d]="string"==typeof e?e:r,o[1]=l;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>l,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/splice-ai-json",id:"version-3.17/data-sources/splice-ai-json",title:"splice-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/splice-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/splice-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/splice-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/splice-ai-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],s={toc:p},c="wrapper";function d(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"spliceAI":[ \n {\n "hgnc":"BLCAP",\n "acceptorGainDistance":-3,\n "acceptorGainScore":0.3,\n "donorLossDistance":7,\n "donorLossScore":0.9\n },\n { \n "hgnc":"NNAT",\n "acceptorGainDistance":-1,\n "acceptorGainScore":0.2,\n "donorGainDistance":-2,\n "donorGainScore":0.3\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")))))}d.isMDXComponent=!0},86831:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>o,metadata:()=>p,toc:()=>s});var a=n(87462),r=(n(67294),n(3905)),i=n(52629);const o={title:"Splice AI"},l=void 0,p={unversionedId:"data-sources/splice-ai",id:"version-3.17/data-sources/splice-ai",title:"Splice AI",description:"Overview",source:"@site/versioned_docs/version-3.17/data-sources/splice-ai.mdx",sourceDirName:"data-sources",slug:"/data-sources/splice-ai",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/splice-ai",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/splice-ai.mdx",tags:[],version:"3.17",frontMatter:{title:"Splice AI"},sidebar:"version-3.17/docs",previous:{title:"REVEL",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/revel"},next:{title:"TOPMed",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/topmed"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"VCF File",id:"vcf-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Pre-processing",id:"pre-processing",children:[{value:"Filtering",id:"filtering",children:[],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:s},d="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"SpliceAI, a 32-layer deep neural network, predicts splicing from a pre-mRNA sequence."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"K. Jaganathan, et al. Predicting splicing from primary sequence with deep learning. ",(0,r.kt)("em",{parentName:"p"},"Cell"),", ",(0,r.kt)("strong",{parentName:"p"},"176")," (3) (2019), pp. 535-548 e24"))),(0,r.kt)("h2",{id:"vcf-file"},"VCF File"),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},'##fileformat=VCFv4.0\n##assembly=GRCh37/hg19\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n#CHROM POS ID REF ALT QUAL FILTER INFO\n10 92946 . C T . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0000;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=-26;DP_AL=-10;DP_DG=3;DP_DL=35\n10 92946 . C G . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0008;DS_AL=0.0000;DS_DG=0.0003;DS_DL=0.0000;DP_AG=34;DP_AL=-27;DP_DG=35;DP_DL=1\n10 92946 . C A . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0004;DS_AL=0.0000;DS_DG=0.0001;DS_DL=0.0000;DP_AG=-10;DP_AL=-48;DP_DG=35;DP_DL=-21\n10 92947 . A C . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0002;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=-49;DP_AL=-11;DP_DG=0;DP_DL=34\n10 92947 . A T . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0002;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=33;DP_AL=-11;DP_DG=-22;DP_DL=34\n10 92947 . A G . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0006;DS_AL=0.0000;DS_DG=0.0001;DS_DL=0.0000;DP_AG=33;DP_AL=-11;DP_DG=34;DP_DL=32\n')),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the VCF file, we're mainly interested in the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_AG")," - \u0394 score (acceptor gain)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_AL")," - \u0394 score (acceptor loss)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_DG")," - \u0394 score (donor gain)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_DL")," - \u0394 score (donor loss)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_AG")," - \u0394 position (acceptor gain) relative to the variant position"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_AL")," - \u0394 position (acceptor loss) relative to the variant position"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_DG")," - \u0394 position (donor gain) relative to the variant position"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_DL")," - \u0394 position (donor loss) relative to the variant position")),(0,r.kt)("p",null,"The Splice AI team suggests the following interpretation for the scores:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"center"},"Range"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Confidence"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Pathogenicity"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"0 \u2264 x < 0.1"),(0,r.kt)("td",{parentName:"tr",align:"left"},"low"),(0,r.kt)("td",{parentName:"tr",align:"left"},"likely benign")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"0.1 \u2264 x \u2264 0.5"),(0,r.kt)("td",{parentName:"tr",align:"left"},"medium"),(0,r.kt)("td",{parentName:"tr",align:"left"},"likely pathogenic")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"x > 0.5"),(0,r.kt)("td",{parentName:"tr",align:"left"},"high"),(0,r.kt)("td",{parentName:"tr",align:"left"},"pathogenic")))),(0,r.kt)("h2",{id:"pre-processing"},"Pre-processing"),(0,r.kt)("h3",{id:"filtering"},"Filtering"),(0,r.kt)("p",null,"Splice AI provides a comprehensive list of entries throughout the genome. However, many of the entries have little value. I.e. observing low splice scores in intergenic regions. Not only do these extra entries require more storage, but the unused content has a negative impact on annotation speed."),(0,r.kt)("p",null,"As a result, Nirvana filters out all the values in the low confidence tier except for regions within 15 bp of nascent splice sites. For those regions, we found it useful to see if Splice AI predicted an interruption of the splicing mechanism."),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://basespace.illumina.com/s/5u6ThOblecrh"},"https://basespace.illumina.com/s/5u6ThOblecrh")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(i.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/5373ba12.dcdb0e9b.js b/assets/js/5373ba12.dcdb0e9b.js deleted file mode 100644 index 39254533..00000000 --- a/assets/js/5373ba12.dcdb0e9b.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9666,2164,2116],{3905:(e,t,n)=>{n.d(t,{Zo:()=>m,kt:()=>c});var a=n(67294);function l(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(l[n]=e[n]);return l}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(l[n]=e[n])}return l}var s=a.createContext({}),p=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},m=function(e){var t=p(e.components);return a.createElement(s.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},g=a.forwardRef((function(e,t){var n=e.components,l=e.mdxType,r=e.originalType,s=e.parentName,m=i(e,["components","mdxType","originalType","parentName"]),u=p(n),g=l,c=u["".concat(s,".").concat(g)]||u[g]||d[g]||r;return n?a.createElement(c,o(o({ref:t},m),{},{components:n})):a.createElement(c,o({ref:t},m))}));function c(e,t){var n=arguments,l=t&&t.mdxType;if("string"==typeof e||l){var r=n.length,o=new Array(r);o[0]=g;var i={};for(var s in t)hasOwnProperty.call(t,s)&&(i[s]=t[s]);i.originalType=e,i[u]="string"==typeof e?e:l,o[1]=i;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>u,frontMatter:()=>r,metadata:()=>i,toc:()=>s});var a=n(87462),l=(n(67294),n(3905));const r={},o=void 0,i={unversionedId:"data-sources/gnomad-lof-json",id:"version-3.14/data-sources/gnomad-lof-json",title:"gnomad-lof-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/gnomad-lof-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-lof-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/gnomad-lof-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/gnomad-lof-json.md",tags:[],version:"3.14",frontMatter:{}},s=[],p={toc:s},m="wrapper";function u(e){let{components:t,...n}=e;return(0,l.kt)(m,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD":{ \n "pLi":1.00e0,\n "pNull":8.94e-40,\n "pRec":1.84e-16,\n "synZ":-8.44e-2,\n "misZ":5.96e-1,\n "loeuf":1.13e0\n}\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pLi"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pNull"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pRec"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"synZ"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"misZ"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))))}u.isMDXComponent=!0},82436:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>u,frontMatter:()=>r,metadata:()=>i,toc:()=>s});var a=n(87462),l=(n(67294),n(3905));const r={},o=void 0,i={unversionedId:"data-sources/gnomad-small-variants-json",id:"version-3.14/data-sources/gnomad-small-variants-json",title:"gnomad-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/gnomad-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/gnomad-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/gnomad-small-variants-json.md",tags:[],version:"3.14",frontMatter:{}},s=[],p={toc:s},m="wrapper";function u(e){let{components:t,...n}=e;return(0,l.kt)(m,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomad":{ \n "coverage":20,\n "allAf":0.190317,\n "maleAf":0.193,\n "femaleAf": 0.1935,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "maleAn":15096,\n "femaleAn":15700\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "maleAc":2930,\n "femaleAc": 2931,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "maleHc":280,\n "femaleHc":281,\n "controlsAllAf":0.190317,\n "controlsAllAn":30796,\n "controlsAllAc":5861,\n "lowComplexityRegion":true,\n "failedFilter":true\n}\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"coverage"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the controls subset. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for male population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for female population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the controls subset. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for male population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for female population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the controls subset. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the South Asian population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the South Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the South Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"lowComplexityRegion"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant is located in a low complexity region.")))))}u.isMDXComponent=!0},81467:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>s,default:()=>g,frontMatter:()=>i,metadata:()=>p,toc:()=>m});var a=n(87462),l=(n(67294),n(3905)),r=n(82436),o=n(14466);const i={title:"gnomAD"},s=void 0,p={unversionedId:"data-sources/gnomad",id:"version-3.14/data-sources/gnomad",title:"gnomAD",description:"Overview",source:"@site/versioned_docs/version-3.14/data-sources/gnomad.mdx",sourceDirName:"data-sources",slug:"/data-sources/gnomad",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/gnomad",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/gnomad.mdx",tags:[],version:"3.14",frontMatter:{title:"gnomAD"},sidebar:"version-3.14/docs",previous:{title:"dbSNP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/dbsnp"},next:{title:"Mitochondrial Heteroplasmy",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/mito-heteroplasmy"}},m=[{value:"Overview",id:"overview",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF extraction",id:"vcf-extraction",children:[],level:3},{value:"Computation",id:"computation",children:[],level:3},{value:"Merging genomes and exomes",id:"merging-genomes-and-exomes",children:[],level:3},{value:"Filters",id:"filters",children:[],level:3},{value:"VCF download instructions",id:"vcf-download-instructions",children:[],level:3},{value:"JSON output",id:"json-output",children:[],level:3}],level:2},{value:"LoF Gene Metrics",id:"lof-gene-metrics",children:[{value:"Tab delimited file example",id:"tab-delimited-file-example",children:[],level:3},{value:"JSON key to TSV column mapping",id:"json-key-to-tsv-column-mapping",children:[],level:3},{value:"Gene symbol update",id:"gene-symbol-update",children:[],level:3},{value:"Conflict resolution",id:"conflict-resolution",children:[],level:3},{value:"Download URL",id:"download-url",children:[],level:3},{value:"JSON output",id:"json-output-1",children:[],level:3}],level:2}],u={toc:m},d="wrapper";function g(e){let{components:t,...n}=e;return(0,l.kt)(d,(0,a.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("h2",{id:"overview"},"Overview"),(0,l.kt)("p",null,"The Genome Aggregation Database (",(0,l.kt)("a",{parentName:"p",href:"https://gnomad.broadinstitute.org/"},"gnomAD"),") is a resource developed by an international coalition of investigators, with the goal of aggregating and harmonizing both exome and genome sequencing data from a wide variety of large-scale sequencing projects, and making summary data available for the wider scientific community."),(0,l.kt)("h2",{id:"small-variants"},"Small Variants"),(0,l.kt)("h3",{id:"vcf-extraction"},"VCF extraction"),(0,l.kt)("p",null,"We currently extract the following info fields from gnomAD genome and exome VCF files:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},'##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n')),(0,l.kt)("p",null,"We also extract the following extra fields from gnomAD exome VCF file:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},'##INFO=\n##INFO=\n##INFO=\n')),(0,l.kt)("h3",{id:"computation"},"Computation"),(0,l.kt)("p",null,"Using these, we compute the following:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"Coverage"),(0,l.kt)("li",{parentName:"ul"},"Allele count, Homozygous count, allele number and allele frequencies for:",(0,l.kt)("ul",{parentName:"li"},(0,l.kt)("li",{parentName:"ul"},"Global population"),(0,l.kt)("li",{parentName:"ul"},"African/African Americans"),(0,l.kt)("li",{parentName:"ul"},"Admixed Americans"),(0,l.kt)("li",{parentName:"ul"},"Ashkenazi Jews"),(0,l.kt)("li",{parentName:"ul"},"East Asians"),(0,l.kt)("li",{parentName:"ul"},"Finnish"),(0,l.kt)("li",{parentName:"ul"},"Non-Finnish Europeans"),(0,l.kt)("li",{parentName:"ul"},"South Asian"),(0,l.kt)("li",{parentName:"ul"},"Others (population not assigned)"),(0,l.kt)("li",{parentName:"ul"},"Male"),(0,l.kt)("li",{parentName:"ul"},"Female"),(0,l.kt)("li",{parentName:"ul"},"Controls")))),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Note")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("ul",{parentName:"div"},(0,l.kt)("li",{parentName:"ul"},"Coverage = DP / AN. Frequencies are computed using AC/AN for each population."),(0,l.kt)("li",{parentName:"ul"},"Please note that currently there is no genome sequencing data of south asian (SAS) population available in gnomAD."),(0,l.kt)("li",{parentName:"ul"},"Allele Count, Homozygous count, allele number and allele frequencies for control groups are also provided for the global population.")))),(0,l.kt)("h3",{id:"merging-genomes-and-exomes"},"Merging genomes and exomes"),(0,l.kt)("p",null,"When merging the genomes and exomes, the allele counts and allele numbers will be summed across both of the data sets."),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("ul",{parentName:"div"},(0,l.kt)("li",{parentName:"ul"},"For GRCh37, Nirvana currently uses gnomAD version 2.1 which contains both genomes and exomes data. Genomes and exomes data are merged in the output."),(0,l.kt)("li",{parentName:"ul"},"For GRCh38, Nirvana currently uses gnomAD version 3.0 which doesn't contain the exomes data. Therefore, only genomes data are presented in the output.")))),(0,l.kt)("h3",{id:"filters"},"Filters"),(0,l.kt)("p",null,"The following strategy will be used when there's a conflict in filter status:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"center"}),(0,l.kt)("th",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"th"},"Genomes PASS")),(0,l.kt)("th",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"th"},"Genomes Filtered")))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"td"},"Exomes PASS")),(0,l.kt)("td",{parentName:"tr",align:"center"},"PASS"),(0,l.kt)("td",{parentName:"tr",align:"center"},"Only use exome data")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"td"},"Exomes Filtered")),(0,l.kt)("td",{parentName:"tr",align:"center"},"Only use genome data"),(0,l.kt)("td",{parentName:"tr",align:"center"},"Filtered")))),(0,l.kt)("h3",{id:"vcf-download-instructions"},"VCF download instructions"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://gnomad.broadinstitute.org/downloads"},"https://gnomad.broadinstitute.org/downloads")),(0,l.kt)("h3",{id:"json-output"},"JSON output"),(0,l.kt)(r.default,{mdxType:"JSONV"}),(0,l.kt)("h2",{id:"lof-gene-metrics"},"LoF Gene Metrics"),(0,l.kt)("h3",{id:"tab-delimited-file-example"},"Tab delimited file example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"gene transcript obs_mis exp_mis oe_mis mu_mis possible_mis obs_mis_pphen exp_mis_pphen oe_mis_pphen possible_mis_pphen obs_syn exp_syn oe_syn mu_syn possible_syn obs_lof mu_lof possible_lof exp_lof pLI pNull pRec oe_lof oe_syn_lower oe_syn_upper oe_mis_lower oe_mis_upper oe_lof_lower oe_lof_upper constraint_flag syn_zmis_z lof_z oe_lof_upper_rank oe_lof_upper_bin oe_lof_upper_bin_6 n_sites classic_caf max_af no_lofs obs_het_lof obs_hom_lof defined p exp_hom_lof classic_caf_afr classic_caf_amr classic_caf_asj classic_caf_eas classic_caf_fin classic_caf_nfe classic_caf_oth classic_caf_sas p_afr p_amr p_asj p_eas p_fin p_nfep_oth p_sas transcript_type gene_id transcript_level cds_length num_coding_exons gene_type gene_length exac_pLI exac_obs_lof exac_exp_lof exac_oe_lof brain_expression chromosome start_positionend_position\nMED13 ENST00000397786 871 1.1178e+03 7.7921e-01 5.5598e-05 14195 314 5.2975e+02 5.9273e-01 6708 422 3.8753e+02 1.0890e+00 1.9097e-05 4248 0 4.9203e-06 1257 9.8429e+01 1.0000e+00 8.9436e-40 1.8383e-16 0.0000e+00 1.0050e+00 1.1800e+00 7.3600e-01 8.2400e-01 0.0000e+00 3.0000e-02 -1.3765e+00 2.6232e+00 9.1935e+00 0 0 0 2 1.2058e-05 8.0492e-06 124782 3 0 124785 1.2021e-05 1.8031e-05 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 9.2812e-05 8.8571e-06 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 9.2760e-05 8.8276e-06 0.0000e+00 0.0000e+00 protein_coding ENSG00000108510 2 6522 30 protein_coding 122678 1.0000e+00 0 6.4393e+01 0.0000e+00 NA 17 60019966 60142643\n")),(0,l.kt)("h3",{id:"json-key-to-tsv-column-mapping"},"JSON key to TSV column mapping"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"JSON key"),(0,l.kt)("th",{parentName:"tr",align:null},"TSV column"),(0,l.kt)("th",{parentName:"tr",align:null},"Description"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pLi"),(0,l.kt)("td",{parentName:"tr",align:null},"pLI"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pNull"),(0,l.kt)("td",{parentName:"tr",align:null},"pNull"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pRec"),(0,l.kt)("td",{parentName:"tr",align:null},"pRec"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"synZ"),(0,l.kt)("td",{parentName:"tr",align:null},"syn_z"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"misZ"),(0,l.kt)("td",{parentName:"tr",align:null},"mis_z"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,l.kt)("td",{parentName:"tr",align:null},"oe_lof_upper"),(0,l.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))),(0,l.kt)("h3",{id:"gene-symbol-update"},"Gene symbol update"),(0,l.kt)("p",null,"The input file provides Ensembl gene ids for each entry. We observed that they were unique while gene symbols may be repeated (multiple lines may have the same gene symbol). Since Ensembl gene Ids are more stable, and Nirvana transcript cache data contains Ensembl gene ids, we use these ids to extract the gene symbols from the transcript cache. For example, if ENSG0001 has gene symbol GENE1 in the input but Nirvana cache say ENSG0001 maps to GENE2, we use GENE2 as the gene symbol for that entry."),(0,l.kt)("h3",{id:"conflict-resolution"},"Conflict resolution"),(0,l.kt)("p",null,"gnomAD uses Ensembl GeneID as unique identifiers in the ",(0,l.kt)("a",{parentName:"p",href:"https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz"},"source file")," but Nirvana uses HGNC gene symbols. Multiple Ensembl GeneIDs can map to the same HGNC symbol and therefore may result is conflict."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"MDGA2 ENST00000426342 306 4.0043e+02 7.6419e-01 2.1096e-05 4724 78 1.6525e+02 4.7202e-01 1923 125 1.3737e+02 9.0993e-01 7.1973e-06 1413 4 2.0926e-06 453 3.8316e+01 9.9922e-01 8.6490e-12 7.8128e-04 1.0440e-01 7.8600e-01 1.0560e+00 6.9500e-01 8.4000e-01 5.0000e-02 2.3900e-01 8.2988e-01 1.6769e+00 5.1372e+00 1529 0 0 7 2.8103e-05 4.0317e-06 124784 7 0 124791 2.8047e-05 9.8167e-05 0.0000e+00 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 3.5391e-05 1.6672e-04 3.2680e-05 0.0000e+00 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 3.5308e-05 1.6492e-04 3.2678e-05 protein_coding ENSG00000139915 2 2181 13 protein_coding 835332 9.9322e-01 3 2.7833e+01 1.0779e-01 NA 14 47308826 48144157\nMDGA2 ENST00000439988 438 5.5311e+02 7.9189e-01 2.9490e-05 6608 105 2.0496e+02 5.1228e-01 2386 180 1.9491e+02 9.2351e-01 9.8371e-06 2048 11 2.8074e-06 627 5.1882e+01 6.6457e-01 5.5841e-10 3.3543e-01 2.1202e-01 8.1700e-01 1.0450e+00 7.3100e-01 8.5700e-01 1.3200e-01 3.5100e-01 8.3940e-01 1.7393e+00 5.2595e+00 2989 1 0 9 3.6173e-05 4.0463e-06 124782 9 0 124791 3.6061e-05 1.6228e-04 6.4986e-05 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 4.4275e-05 1.6672e-04 3.2680e-05 6.4577e-05 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 4.4135e-05 1.6492e-04 3.2678e-05 protein_coding ENSG00000272781 3 3075 17 protein_coding 832866 NA NA NA NA NA 14 47311134 48143999\n")),(0,l.kt)("p",null,'In such cases, Nirvana chooses the entry with the smallest "LOEUF" value. The reason for choosing this value can be highlighted by the following table:'),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"right"},"LOEUF decile"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Haplo-insufficient"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Autosomal Dominant"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Autosomal Recessive"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Olfactory Genes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"0-10%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"104"),(0,l.kt)("td",{parentName:"tr",align:"right"},"140"),(0,l.kt)("td",{parentName:"tr",align:"right"},"36"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"10-20%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"47"),(0,l.kt)("td",{parentName:"tr",align:"right"},"128"),(0,l.kt)("td",{parentName:"tr",align:"right"},"72"),(0,l.kt)("td",{parentName:"tr",align:"right"},"1")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"20-30%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"17"),(0,l.kt)("td",{parentName:"tr",align:"right"},"86"),(0,l.kt)("td",{parentName:"tr",align:"right"},"112"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"30-40%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"8"),(0,l.kt)("td",{parentName:"tr",align:"right"},"80"),(0,l.kt)("td",{parentName:"tr",align:"right"},"173"),(0,l.kt)("td",{parentName:"tr",align:"right"},"4")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"40-50%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"7"),(0,l.kt)("td",{parentName:"tr",align:"right"},"65"),(0,l.kt)("td",{parentName:"tr",align:"right"},"206"),(0,l.kt)("td",{parentName:"tr",align:"right"},"8")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"50-60%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"4"),(0,l.kt)("td",{parentName:"tr",align:"right"},"54"),(0,l.kt)("td",{parentName:"tr",align:"right"},"207"),(0,l.kt)("td",{parentName:"tr",align:"right"},"6")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"60-70%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0"),(0,l.kt)("td",{parentName:"tr",align:"right"},"46"),(0,l.kt)("td",{parentName:"tr",align:"right"},"154"),(0,l.kt)("td",{parentName:"tr",align:"right"},"18")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"70-80%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"2"),(0,l.kt)("td",{parentName:"tr",align:"right"},"49"),(0,l.kt)("td",{parentName:"tr",align:"right"},"120"),(0,l.kt)("td",{parentName:"tr",align:"right"},"49")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"80-90%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0"),(0,l.kt)("td",{parentName:"tr",align:"right"},"34"),(0,l.kt)("td",{parentName:"tr",align:"right"},"58"),(0,l.kt)("td",{parentName:"tr",align:"right"},"96")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"90-100%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0"),(0,l.kt)("td",{parentName:"tr",align:"right"},"26"),(0,l.kt)("td",{parentName:"tr",align:"right"},"40"),(0,l.kt)("td",{parentName:"tr",align:"right"},"174")))),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Note")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("ul",{parentName:"div"},(0,l.kt)("li",{parentName:"ul"},"Table source: ",(0,l.kt)("a",{parentName:"li",href:"https://www.biorxiv.org/content/biorxiv/early/2019/01/28/531210.full-text.pdf"},"https://www.biorxiv.org/content/biorxiv/early/2019/01/28/531210.full-text.pdf")),(0,l.kt)("li",{parentName:"ul"},"This table indicates that lower LOEUF scores have more deleterious effect on genes."),(0,l.kt)("li",{parentName:"ul"},"Only 15 out of 19685 genes have conflicting entries.")))),(0,l.kt)("p",null,(0,l.kt)("strong",{parentName:"p"},"List of genes with conflicting entries")),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},'MDGA2:\n {"pLI":9.99e-1,"pRec":7.81e-4,"pNull":8.65e-12,"synZ":8.30e-1,"misZ":1.68e0,"loeuf":2.39e-1}\n {"pLI":6.65e-1,"pRec":3.35e-1,"pNull":5.58e-10,"synZ":8.39e-1,"misZ":1.74e0,"loeuf":3.51e-1}\nCRYBG3:\n {"pLI":9.27e-5,"pRec":1.00e0,"pNull":1.88e-7,"synZ":1.82e0,"misZ":4.68e-1,"loeuf":4.93e-1}\n {"pLI":2.69e-4,"pRec":1.00e0,"pNull":1.20e-4,"synZ":2.63e0,"misZ":9.80e-1,"loeuf":5.98e-1}\nCHTF8:\n {"pLI":8.29e-1,"pRec":1.67e-1,"pNull":3.21e-3,"synZ":1.94e0,"misZ":9.48e-1,"loeuf":5.13e-1}\n {"pLI":3.73e-1,"pRec":5.84e-1,"pNull":4.29e-2,"synZ":3.33e-1,"misZ":2.91e-1,"loeuf":9.92e-1}\nSEPT1:\n {"pLI":6.77e-8,"pRec":8.90e-1,"pNull":1.10e-1,"synZ":1.58e-1,"misZ":1.57e0,"loeuf":9.68e-1}\n {"pLI":1.96e-8,"pRec":6.71e-1,"pNull":3.29e-1,"synZ":1.68e-1,"misZ":1.41e0,"loeuf":1.08e0}\nARL14EPL:\n {"pLI":3.48e-2,"pRec":8.38e-1,"pNull":1.28e-1,"synZ":3.56e-1,"misZ":-1.87e-1,"loeuf":1.23e0}\n {"pLI":3.23e-2,"pRec":8.29e-1,"pNull":1.38e-1,"synZ":1.15e0,"misZ":-4.05e-1,"loeuf":1.26e0}\nUGT2A1:\n {"pLI":2.90e-13,"pRec":1.40e-1,"pNull":8.60e-1,"synZ":-1.29e0,"misZ":-1.77e0,"loeuf":1.18e0}\n {"pLI":3.88e-17,"pRec":2.87e-3,"pNull":9.97e-1,"synZ":-8.00e-1,"misZ":-1.40e0,"loeuf":1.53e0}\nLTB4R2:\n {"pLI":4.39e-4,"pRec":6.71e-1,"pNull":3.29e-1,"synZ":-5.24e-1,"misZ":-2.96e-1,"loeuf":1.40e0}\n {"pLI":1.38e-5,"pRec":4.12e-1,"pNull":5.88e-1,"synZ":-4.58e-1,"misZ":-2.02e-1,"loeuf":1.54e0}\nCDRT1:\n {"pLI":4.98e-14,"pRec":5.31e-1,"pNull":4.69e-1,"synZ":8.18e-1,"misZ":6.57e-1,"loeuf":1.00e0}\n {"pLI":3.50e-3,"pRec":6.37e-1,"pNull":3.59e-1,"synZ":4.89e-1,"misZ":6.90e-1,"loeuf":1.63e0}\nMUC3A:\n {"pLI":1.48e-10,"pRec":5.76e-1,"pNull":4.24e-1,"synZ":5.81e-2,"misZ":-6.01e-1,"loeuf":1.06e0}\n {"pLI":4.03e-1,"pRec":4.79e-1,"pNull":1.17e-1,"synZ":4.05e-2,"misZ":-1.60e-1,"loeuf":1.70e0}\nCOG8:\n {"pLI":2.97e-9,"pRec":5.04e-1,"pNull":4.96e-1,"synZ":-1.35e0,"misZ":-9.37e-2,"loeuf":1.13e0}\n {"pLI":2.31e-3,"pRec":5.47e-1,"pNull":4.50e-1,"synZ":-4.94e-1,"misZ":-1.48e-1,"loeuf":1.76e0}\nAC006486.1:\n {"pLI":9.37e-1,"pRec":6.27e-2,"pNull":2.47e-4,"synZ":1.44e0,"misZ":2.12e0,"loeuf":3.41e-1}\n {"pLI":1.14e-1,"pRec":6.16e-1,"pNull":2.70e-1,"synZ":-7.57e-2,"misZ":8.33e-2,"loeuf":1.84e0}\nAL645922.1:\n {"pLI":4.67e-16,"pRec":1.00e0,"pNull":4.15e-5,"synZ":7.99e-1,"misZ":1.61e0,"loeuf":6.92e-1}\n {"pLI":1.60e-3,"pRec":2.78e-1,"pNull":7.21e-1,"synZ":-7.30e-2,"misZ":3.21e-1,"loeuf":1.96e0}\nNBPF20:\n {"pLI":1.42e-7,"pRec":3.40e-2,"pNull":9.66e-1,"synZ":-1.86e0,"misZ":-2.88e0,"loeuf":1.97e0}\n {"pLI":1.92e-22,"pRec":7.96e-6,"pNull":1.00e0,"synZ":-9.73e0,"misZ":-7.67e0,"loeuf":1.97e0}\nPRAMEF11:\n {"pLI":6.16e-4,"pRec":7.42e-1,"pNull":2.58e-1,"synZ":-4.02e0,"misZ":-3.69e0,"loeuf":1.31e0}\n {"synZ":-3.33e0,"misZ":-2.59e0}\nFAM231D:\n {"synZ":-1.98e0,"misZ":-1.44e0}\n {"synZ":1.07e0,"misZ":3.13e-1}\n')),(0,l.kt)("p",null,(0,l.kt)("strong",{parentName:"p"},"Conflict resolution")),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"Pick the entry with the lowest LOEUF score"),(0,l.kt)("li",{parentName:"ul"},"If the same, pick the lowest pLI"),(0,l.kt)("li",{parentName:"ul"},"Otherwise pick the entry with the max absolute value of synZ + misZ")),(0,l.kt)("h3",{id:"download-url"},"Download URL"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz"},"https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz")),(0,l.kt)("h3",{id:"json-output-1"},"JSON output"),(0,l.kt)(o.default,{mdxType:"JSONG"}))}g.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/dd37cdf2.340e8d36.js b/assets/js/539175fb.630bd587.js similarity index 56% rename from assets/js/dd37cdf2.340e8d36.js rename to assets/js/539175fb.630bd587.js index 5af1a4ba..6dd656c0 100644 --- a/assets/js/dd37cdf2.340e8d36.js +++ b/assets/js/539175fb.630bd587.js @@ -1 +1 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[880],{3905:(e,t,n)=>{n.d(t,{Zo:()=>s,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var i=r.createContext({}),p=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},s=function(e){var t=p(e.components);return r.createElement(i.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,s=c(e,["components","mdxType","originalType","parentName"]),u=p(n),d=a,f=u["".concat(i,".").concat(d)]||u[d]||m[d]||o;return n?r.createElement(f,l(l({ref:t},s),{},{components:n})):r.createElement(f,l({ref:t},s))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=d;var c={};for(var i in t)hasOwnProperty.call(t,i)&&(c[i]=t[i]);c.originalType=e,c[u]="string"==typeof e?e:a,l[1]=c;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>o,metadata:()=>c,toc:()=>i});var r=n(87462),a=(n(67294),n(3905));const o={},l=void 0,c={unversionedId:"data-sources/gerp-json",id:"version-3.18/data-sources/gerp-json",title:"gerp-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/gerp-json.md",sourceDirName:"data-sources",slug:"/data-sources/gerp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gerp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/gerp-json.md",tags:[],version:"3.18",frontMatter:{}},i=[],p={toc:i},s="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(s,(0,r.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"gerpScore": 1.27\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"gerpScore"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"Range: -\u221e to +\u221e")))))}u.isMDXComponent=!0}}]); \ No newline at end of file +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5702],{3905:(e,t,n)=>{n.d(t,{Zo:()=>u,kt:()=>f});var r=n(7294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var i=r.createContext({}),p=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},u=function(e){var t=p(e.components);return r.createElement(i.Provider,{value:t},e.children)},s="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,u=c(e,["components","mdxType","originalType","parentName"]),s=p(n),d=a,f=s["".concat(i,".").concat(d)]||s[d]||m[d]||o;return n?r.createElement(f,l(l({ref:t},u),{},{components:n})):r.createElement(f,l({ref:t},u))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=d;var c={};for(var i in t)hasOwnProperty.call(t,i)&&(c[i]=t[i]);c.originalType=e,c[s]="string"==typeof e?e:a,l[1]=c;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>s,frontMatter:()=>o,metadata:()=>c,toc:()=>i});var r=n(7462),a=(n(7294),n(3905));const o={},l=void 0,c={unversionedId:"data-sources/gerp-json",id:"data-sources/gerp-json",title:"gerp-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gerp-json.md",sourceDirName:"data-sources",slug:"/data-sources/gerp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gerp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gerp-json.md",tags:[],version:"current",frontMatter:{}},i=[],p={toc:i},u="wrapper";function s(e){let{components:t,...n}=e;return(0,a.kt)(u,(0,r.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"gerpScore": 1.27\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"gerpScore"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"Range: -\u221e to +\u221e")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/539175fb.65acb942.js b/assets/js/539175fb.65acb942.js deleted file mode 100644 index 796d8f15..00000000 --- a/assets/js/539175fb.65acb942.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5702],{3905:(e,t,n)=>{n.d(t,{Zo:()=>u,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var i=r.createContext({}),p=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},u=function(e){var t=p(e.components);return r.createElement(i.Provider,{value:t},e.children)},s="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,u=c(e,["components","mdxType","originalType","parentName"]),s=p(n),d=a,f=s["".concat(i,".").concat(d)]||s[d]||m[d]||o;return n?r.createElement(f,l(l({ref:t},u),{},{components:n})):r.createElement(f,l({ref:t},u))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=d;var c={};for(var i in t)hasOwnProperty.call(t,i)&&(c[i]=t[i]);c.originalType=e,c[s]="string"==typeof e?e:a,l[1]=c;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>s,frontMatter:()=>o,metadata:()=>c,toc:()=>i});var r=n(87462),a=(n(67294),n(3905));const o={},l=void 0,c={unversionedId:"data-sources/gerp-json",id:"data-sources/gerp-json",title:"gerp-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gerp-json.md",sourceDirName:"data-sources",slug:"/data-sources/gerp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gerp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gerp-json.md",tags:[],version:"current",frontMatter:{}},i=[],p={toc:i},u="wrapper";function s(e){let{components:t,...n}=e;return(0,a.kt)(u,(0,r.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"gerpScore": 1.27\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"gerpScore"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"Range: -\u221e to +\u221e")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/54b21ebd.e09c6c35.js b/assets/js/54b21ebd.e09c6c35.js deleted file mode 100644 index a86728c9..00000000 --- a/assets/js/54b21ebd.e09c6c35.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1966],{3905:(e,t,n)=>{n.d(t,{Zo:()=>s,kt:()=>f});var r=n(67294);function o(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function a(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}var i=r.createContext({}),p=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},s=function(e){var t=p(e.components);return r.createElement(i.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,o=e.mdxType,a=e.originalType,i=e.parentName,s=c(e,["components","mdxType","originalType","parentName"]),u=p(n),d=o,f=u["".concat(i,".").concat(d)]||u[d]||m[d]||a;return n?r.createElement(f,l(l({ref:t},s),{},{components:n})):r.createElement(f,l({ref:t},s))}));function f(e,t){var n=arguments,o=t&&t.mdxType;if("string"==typeof e||o){var a=n.length,l=new Array(a);l[0]=d;var c={};for(var i in t)hasOwnProperty.call(t,i)&&(c[i]=t[i]);c.originalType=e,c[u]="string"==typeof e?e:o,l[1]=c;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>a,metadata:()=>c,toc:()=>i});var r=n(87462),o=(n(67294),n(3905));const a={},l=void 0,c={unversionedId:"data-sources/phylop-json",id:"version-3.16/data-sources/phylop-json",title:"phylop-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/phylop-json.md",sourceDirName:"data-sources",slug:"/data-sources/phylop-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/phylop-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/phylop-json.md",tags:[],version:"3.16",frontMatter:{}},i=[],p={toc:i},s="wrapper";function u(e){let{components:t,...n}=e;return(0,o.kt)(s,(0,r.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-json"},'"variants":[\n {\n "vid":"2:48010488:A",\n "chromosome":"chr2",\n "begin":48010488,\n "end":48010488,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "phylopScore":0.459\n }\n] \n')),(0,o.kt)("table",null,(0,o.kt)("thead",{parentName:"table"},(0,o.kt)("tr",{parentName:"thead"},(0,o.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,o.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,o.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,o.kt)("tbody",{parentName:"table"},(0,o.kt)("tr",{parentName:"tbody"},(0,o.kt)("td",{parentName:"tr",align:"left"},"phylopScore"),(0,o.kt)("td",{parentName:"tr",align:"center"},"float"),(0,o.kt)("td",{parentName:"tr",align:"left"},"range: -14.08 to 6.424")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/556a5544.7a06e193.js b/assets/js/556a5544.7a06e193.js deleted file mode 100644 index d62e1c09..00000000 --- a/assets/js/556a5544.7a06e193.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7394,2616],{3905:(e,n,t)=>{t.d(n,{Zo:()=>c,kt:()=>g});var a=t(67294);function i(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function r(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function l(e){for(var n=1;n=0||(i[t]=e[t]);return i}(e,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(i[t]=e[t])}return i}var o=a.createContext({}),p=function(e){var n=a.useContext(o),t=n;return e&&(t="function"==typeof e?e(n):l(l({},n),e)),t},c=function(e){var n=p(e.components);return a.createElement(o.Provider,{value:n},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},u=a.forwardRef((function(e,n){var t=e.components,i=e.mdxType,r=e.originalType,o=e.parentName,c=s(e,["components","mdxType","originalType","parentName"]),m=p(t),u=i,g=m["".concat(o,".").concat(u)]||m[u]||d[u]||r;return t?a.createElement(g,l(l({ref:n},c),{},{components:t})):a.createElement(g,l({ref:n},c))}));function g(e,n){var t=arguments,i=n&&n.mdxType;if("string"==typeof e||i){var r=t.length,l=new Array(r);l[0]=u;var s={};for(var o in n)hasOwnProperty.call(n,o)&&(s[o]=n[o]);s.originalType=e,s[m]="string"==typeof e?e:i,l[1]=s;for(var p=2;p{t.r(n),t.d(n,{contentTitle:()=>l,default:()=>m,frontMatter:()=>r,metadata:()=>s,toc:()=>o});var a=t(87462),i=(t(67294),t(3905));const r={},l=void 0,s={unversionedId:"data-sources/clinvar-json",id:"version-3.21/data-sources/clinvar-json",title:"clinvar-json",description:"small variants:",source:"@site/versioned_docs/version-3.21/data-sources/clinvar-json.md",sourceDirName:"data-sources",slug:"/data-sources/clinvar-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/clinvar-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/clinvar-json.md",tags:[],version:"3.21",frontMatter:{}},o=[],p={toc:o},c="wrapper";function m(e){let{components:n,...t}=e;return(0,i.kt)(c,(0,a.Z)({},p,t,{components:n,mdxType:"MDXLayout"}),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"small variants:")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "id":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "significance":[\n "benign"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "lastUpdatedDate":"2020-03-01",\n "isAlleleSpecific":true\n },\n {\n "id":"RCV000030258.4",\n "variationId":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "alleleOrigins":[\n "germline"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "phenotypes":[\n "Lynch syndrome"\n ],\n "medGenIds":[\n "C1333990"\n ],\n "omimIds":[\n "120435"\n ],\n "significance":[\n "benign"\n ],\n "lastUpdatedDate":"2017-05-01",\n "isAlleleSpecific":true\n }\n]\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"large variants:")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "chromosome":"1", \n "begin":629025, \n "end":8537745, \n "variantType":"copy_number_loss", \n "id":"RCV000051993.4", \n "variationId":"VCV000058242.1", \n "reviewStatus":"criteria provided, single submitter", \n "alleleOrigins":[\n "not provided"\n ], \n "phenotypes":[\n "See cases"\n ], \n "significance":[\n "pathogenic"\n ], \n "lastUpdatedDate":"2022-04-21", \n "pubMedIds":[\n "21844811"\n ]\n },\n {\n "id":"VCV000058242.1",\n "reviewStatus":"criteria provided, single submitter",\n "significance":[\n "pathogenic"\n ],\n "lastUpdatedDate":"2022-04-21"\n },\n ......\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"id"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"ClinVar ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"variationId"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"ClinVar VCV ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"variant type")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"reviewStatus"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"alleleOrigins"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"medGenIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"MedGen IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omimIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"OMIM IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"orphanetIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Orphanet IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"significance"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"lastUpdatedDate"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,i.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,i.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the ClinVar alternate allele")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"reviewStatus:")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no assertion provided"),(0,i.kt)("li",{parentName:"ul"},"no assertion criteria provided"),(0,i.kt)("li",{parentName:"ul"},"criteria provided, single submitter"),(0,i.kt)("li",{parentName:"ul"},"practice guideline"),(0,i.kt)("li",{parentName:"ul"},"classified by multiple submitters"),(0,i.kt)("li",{parentName:"ul"},"criteria provided, conflicting interpretations"),(0,i.kt)("li",{parentName:"ul"},"criteria provided, multiple submitters, no conflicts"),(0,i.kt)("li",{parentName:"ul"},"no interpretation for the single variant")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"alleleOrigins:")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"unknown"),(0,i.kt)("li",{parentName:"ul"},"other"),(0,i.kt)("li",{parentName:"ul"},"germline"),(0,i.kt)("li",{parentName:"ul"},"somatic"),(0,i.kt)("li",{parentName:"ul"},"inherited"),(0,i.kt)("li",{parentName:"ul"},"paternal"),(0,i.kt)("li",{parentName:"ul"},"maternal"),(0,i.kt)("li",{parentName:"ul"},"de-novo"),(0,i.kt)("li",{parentName:"ul"},"biparental"),(0,i.kt)("li",{parentName:"ul"},"uniparental"),(0,i.kt)("li",{parentName:"ul"},"not-tested"),(0,i.kt)("li",{parentName:"ul"},"tested-inconclusive")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"significance:")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"uncertain significance"),(0,i.kt)("li",{parentName:"ul"},"not provided"),(0,i.kt)("li",{parentName:"ul"},"benign"),(0,i.kt)("li",{parentName:"ul"},"likely benign"),(0,i.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"pathogenic"),(0,i.kt)("li",{parentName:"ul"},"drug response"),(0,i.kt)("li",{parentName:"ul"},"histocompatibility"),(0,i.kt)("li",{parentName:"ul"},"association"),(0,i.kt)("li",{parentName:"ul"},"risk factor"),(0,i.kt)("li",{parentName:"ul"},"protective"),(0,i.kt)("li",{parentName:"ul"},"affects"),(0,i.kt)("li",{parentName:"ul"},"conflicting data from submitters"),(0,i.kt)("li",{parentName:"ul"},"other"),(0,i.kt)("li",{parentName:"ul"},"no interpretation for the single variant"),(0,i.kt)("li",{parentName:"ul"},"conflicting interpretations of pathogenicity")))}m.isMDXComponent=!0},15175:(e,n,t)=>{t.r(n),t.d(n,{contentTitle:()=>s,default:()=>d,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var a=t(87462),i=(t(67294),t(3905)),r=t(95697);const l={title:"ClinVar"},s=void 0,o={unversionedId:"data-sources/clinvar",id:"version-3.21/data-sources/clinvar",title:"ClinVar",description:"Overview",source:"@site/versioned_docs/version-3.21/data-sources/clinvar.mdx",sourceDirName:"data-sources",slug:"/data-sources/clinvar",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/clinvar",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/clinvar.mdx",tags:[],version:"3.21",frontMatter:{title:"ClinVar"},sidebar:"docs",previous:{title:"ClinGen",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/clingen"},next:{title:"COSMIC",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/cosmic"}},p=[{value:"Overview",id:"overview",children:[],level:2},{value:"RCV File",id:"rcv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Parsing Significance",id:"parsing-significance",children:[],level:4}],level:3}],level:2},{value:"VCV File",id:"vcv-file",children:[{value:"Example",id:"example-1",children:[],level:3},{value:"Parsing",id:"parsing-1",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URLs",id:"download-urls",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2},{value:"Building the supplementary files",id:"building-the-supplementary-files",children:[{value:"Source data files",id:"source-data-files",children:[],level:3}],level:2}],c={toc:p},m="wrapper";function d(e){let{components:n,...l}=e;return(0,i.kt)(m,(0,a.Z)({},c,l,{components:n,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"ClinVar is a freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence. ClinVar thus facilitates access to and communication about the relationships asserted between human variation and observed health status, and the history of that interpretation."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Melissa J Landrum, Jennifer M Lee, Mark Benson, Garth R Brown, Chen Chao, Shanmuga Chitipiralla, Baoshan Gu, Jennifer Hart, Douglas Hoffman, Wonhee Jang, Karen Karapetyan, Kenneth Katz, Chunlei Liu, Zenith Maddipatla, Adriana Malheiro, Kurt McDaniel, Michael Ovetsky, George Riley, George Zhou, J Bradley Holmes, Brandi L Kattman, Donna R Maglott, ClinVar: improving access to variant interpretations and supporting evidence, ",(0,i.kt)("em",{parentName:"p"},"Nucleic Acids Research"),", ",(0,i.kt)("strong",{parentName:"p"},"46"),", Issue D1, 4 January 2018, Pages D1062\u2013D1067, ",(0,i.kt)("a",{parentName:"p",href:"https://doi.org/10.1093/nar/gkx1153"},"https://doi.org/10.1093/nar/gkx1153")))),(0,i.kt)("h2",{id:"rcv-file"},"RCV File"),(0,i.kt)("h3",{id:"example"},"Example"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{target:"_blank",href:t(13386).Z},"a full RCV entry"),"."),(0,i.kt)("h3",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"ID")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{3}","{3}":!0},'\n \n \n\n')),(0,i.kt)("p",null,"The Acc and Version fields are merged to form the ID (RCV000000001.2)"),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"LastUpdatedDate")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{2}","{2}":!0},'\n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Significance")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{5}","{5}":!0},'\n \n \n no assertion criteria provided\n Pathogenic\n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"ReviewStatus")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4}","{4}":!0},'\n \n \n no assertion criteria provided\n Pathogenic\n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Phenotypes")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{2-8}","{2-8}":!0},'\n \n \n \n Joubert syndrome 9\n \n \n \n\n')),(0,i.kt)("p",null,'We only use the field with Type="Preferred". Multiple phenotypes may be reported'),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Location, Variant Type and Variant Id")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{3-12}","{3-12}":!0},'\n\n \n \n \n \n \n \n \n\n')),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"The variant position is extracted from the fields for their respective assemblies."),(0,i.kt)("li",{parentName:"ul"},"Updated records contain positionVCF, referenceAlleleVCF and alternateAlleleVCF fields and when present, we use them to create the variant."),(0,i.kt)("li",{parentName:"ul"},'For older records, since "start\' and "stop" fields are not always available, we use the "display_start" and "display_end" fields.'),(0,i.kt)("li",{parentName:"ul"},"If a required allele is not available, we extract it from the reference sequence."),(0,i.kt)("li",{parentName:"ul"},"Only variants having a dbSNP id are extracted."),(0,i.kt)("li",{parentName:"ul"},"Note that a ClinVar accession may have multiple variants associated with it (possible in different locations)"),(0,i.kt)("li",{parentName:"ul"},"VariantId is extracted from the MeasureSet attributes."),(0,i.kt)("li",{parentName:"ul"},"VariantType is extracted from the Measure attributes.",(0,i.kt)("div",{parentName:"li",className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"unsupported variant types")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"We currently don't support the following variant types:"),(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"Microsatellite"),(0,i.kt)("li",{parentName:"ul"},"protein only"),(0,i.kt)("li",{parentName:"ul"},"fusion"),(0,i.kt)("li",{parentName:"ul"},"Complex"),(0,i.kt)("li",{parentName:"ul"},"Variation"),(0,i.kt)("li",{parentName:"ul"},"Translocation ")))))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"MedGen, OMIM, Orphanet IDs")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4-7}","{4-7}":!0},'\n \n \n \n \n \n \n \n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"AlleleOrigins")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{2}","{2}":!0},"\n germline\n\n")),(0,i.kt)("p",null,"We only extract all Allele Origins from Submissions (SCV) entries."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"PubMedIds")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4,10,16,21}","{4,10,16,21}":!0},'\n \n \n 12114475\n \n \n \n LMM Criteria\n \n 24033266\n \n \n \n \n \n 9113933\n \n \n \n \n 23757202\n \n\n')),(0,i.kt)("p",null,"We only extract all Pubmed Ids from Submissions (SCV) entries."),(0,i.kt)("h4",{id:"parsing-significance"},"Parsing Significance"),(0,i.kt)("p",null,"Extracting significance(s) may involve parsing multiple fields. Take the following snippets into consideration."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{3,8,13-14}","{3,8,13-14}":!0},'\n no assertion criteria provided\n Pathogenic\n\n\n\n criteria provided, multiple submitters, no conflicts\n Pathogenic/Likely pathogenic\n\n\n\n no assertion criteria provided\n Conflicting interpretations of pathogenicity\n Pathogenic(1);Uncertain significance(1)\n\n')),(0,i.kt)("p",null,"Given the evidence, we converted the significance field into an array of strings which may be parsed out of the ",(0,i.kt)("inlineCode",{parentName:"p"},"Descriptions")," or ",(0,i.kt)("inlineCode",{parentName:"p"},"Explanation")," fields."),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Varying Delimiters")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The delimiters in each field may vary. Currently, the delimiters for ",(0,i.kt)("inlineCode",{parentName:"p"},"Description")," are ",(0,i.kt)("inlineCode",{parentName:"p"},",")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"/"),". The delimiters for ",(0,i.kt)("inlineCode",{parentName:"p"},"Explanation")," are ",(0,i.kt)("inlineCode",{parentName:"p"},";")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"/"),"."))),(0,i.kt)("h2",{id:"vcv-file"},"VCV File"),(0,i.kt)("h3",{id:"example-1"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml"},'\n\n\n current\n Homo sapiens\n \n \n \n \n \n 1p36.31\n \n \n \n 601142\n \n \n \n 1p36.31\n \n \n \n 607215\n \n \n GRCh37/hg19 1p36.31(chr1:6051187-6158763)\n copy number gain\n \n 1p36.31\n \n \n \n no interpretation for the single variant\n \n \n \n \n \n \n no interpretation for the single variant\n \n \n no interpretation for the single variant\n \n \n \n \n \n \n \n \n \n\n\n')),(0,i.kt)("h3",{id:"parsing-1"},"Parsing"),(0,i.kt)("p",null,"In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"id")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml"},'\n')),(0,i.kt)("p",null,"The Acc and Version fields are merged to form the ID (RCV000000001.2)"),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"significance")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{7}","{7}":!0},'\n \n \n \n \n \n no interpretation for the single variant\n \n \n \n \n \n\n')),(0,i.kt)("p",null,"May have multiple significances listed."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"reviewStatus")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4}","{4}":!0},"\n \n \n no interpretation for the single variant\n \n \n\n")),(0,i.kt)("h2",{id:"known-issues"},"Known Issues"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"The XML file contains ~1k more entries (out of 162K) than the VCF file"),(0,i.kt)("li",{parentName:"ul"},"The XML file does not have a field indicating that a record is associated with the reference base - something that was present in VCF"),(0,i.kt)("li",{parentName:"ul"},'The XML file contains entries (e.g. RCV000016645 version=1) which have IUPAC ambiguous bases ("R", "Y", "H",\netc.) as their alternate allele')))),(0,i.kt)("h2",{id:"download-urls"},"Download URLs"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz"},"ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz")),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_00-latest.xml.gz"},"https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_00-latest.xml.gz")),(0,i.kt)("h2",{id:"json-output"},"JSON Output"),(0,i.kt)(r.default,{mdxType:"JSON"}),(0,i.kt)("h2",{id:"building-the-supplementary-files"},"Building the supplementary files"),(0,i.kt)("p",null,"The ClinVar ",(0,i.kt)("inlineCode",{parentName:"p"},".nsa")," and ",(0,i.kt)("inlineCode",{parentName:"p"},".nsi")," for Nirvana can be built using the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's ",(0,i.kt)("inlineCode",{parentName:"p"},"clinvar")," subcommand."),(0,i.kt)("h3",{id:"source-data-files"},"Source data files"),(0,i.kt)("p",null,"Two input ",(0,i.kt)("inlineCode",{parentName:"p"},".xml")," files and a ",(0,i.kt)("inlineCode",{parentName:"p"},".version")," file are required in order to build the ",(0,i.kt)("inlineCode",{parentName:"p"},".nsa")," and ",(0,i.kt)("inlineCode",{parentName:"p"},".nsi")," file. You should have the following files:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"ClinVarFullRelease_00-latest.xml.gz ClinVarVariationRelease_00-latest.xml.gz\nClinVarFullRelease_00-latest.xml.gz.version\n")),(0,i.kt)("p",null,"The version file is a text file with the follwoing format."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"NAME=ClinVar\nVERSION=20220505\nDATE=2022-05-05\nDESCRIPTION=A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence\n")),(0,i.kt)("p",null,"The help menu for the utility is as follows:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet SAUtils.dll clinvar\n---------------------------------------------------------------------------\nSAUtils (c) 2022 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.18.1\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll clinvar [options]\nCreates a supplementary database with ClinVar annotations\n\nOPTIONS:\n --ref, -r compressed reference sequence file\n --rcv, -i ClinVar Full release XML file\n --vcv, -c ClinVar Variation release XML file\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\ndotnet SAUtils.dll clinvar\n")),(0,i.kt)("p",null,"Here is a sample execution:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet ~/development/Nirvana/bin/Debug/net6.0/SAUtils.dll clinvar \\\\\n--ref ~/development/References/7/Homo_sapiens.GRCh38.Nirvana.dat --rcv ClinVarFullRelease_00-latest.xml.gz \\\\\n--vcv ClinVarVariationRelease_00-latest.xml.gz --out ~/development/SupplementaryDatabase/63/GRCh38\n---------------------------------------------------------------------------\nSAUtils (c) 2022 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.18.1\n---------------------------------------------------------------------------\n\nFound 1535677 VCV records\nUnknown vcv id:225946 found in RCV000211201.2\nUnknown vcv id:225946 found in RCV000211253.2\nUnknown vcv id:225946 found in RCV000211375.2\nUnknown vcv id:976117 found in RCV001253316.1\nUnknown vcv id:1321016 found in RCV001776995.2\n3 unknown VCVs found in RCVs.\n225946,976117,1321016\n0 unknown VCVs found in RCVs.\nChromosome 1 completed in 00:00:15.1\nChromosome 2 completed in 00:00:20.0\nChromosome 3 completed in 00:00:09.7\nChromosome 4 completed in 00:00:05.9\nChromosome 5 completed in 00:00:09.8\nChromosome 6 completed in 00:00:08.3\nChromosome 7 completed in 00:00:08.7\nChromosome 8 completed in 00:00:06.2\nChromosome 9 completed in 00:00:08.6\nChromosome 10 completed in 00:00:07.0\nChromosome 11 completed in 00:00:11.7\nChromosome 12 completed in 00:00:08.0\nChromosome 13 completed in 00:00:06.3\nChromosome 14 completed in 00:00:06.0\nChromosome 15 completed in 00:00:06.6\nChromosome 16 completed in 00:00:10.8\nChromosome 17 completed in 00:00:13.8\nChromosome 18 completed in 00:00:02.9\nChromosome 19 completed in 00:00:08.7\nChromosome 20 completed in 00:00:03.6\nChromosome 21 completed in 00:00:02.4\nChromosome 22 completed in 00:00:03.6\nChromosome MT completed in 00:00:00.2\nChromosome X completed in 00:00:07.5\nChromosome Y completed in 00:00:00.0\nMaximum bp shifted for any variant:2\nWriting 37097 intervals to database...\n\nTime: 00:13:26.9\n\n")))}d.isMDXComponent=!0},13386:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/files/clinvar-rcv-example-4e0a2f2ac6c70acd0ce41410690b683b.xml"}}]); \ No newline at end of file diff --git a/assets/js/57cffed1.3a2dd7da.js b/assets/js/57cffed1.3a2dd7da.js deleted file mode 100644 index b593efa0..00000000 --- a/assets/js/57cffed1.3a2dd7da.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6192],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var i=r.createContext({}),u=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},p=function(e){var t=u(e.components);return r.createElement(i.Provider,{value:t},e.children)},s="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,p=c(e,["components","mdxType","originalType","parentName"]),s=u(n),m=a,f=s["".concat(i,".").concat(m)]||s[m]||d[m]||o;return n?r.createElement(f,l(l({ref:t},p),{},{components:n})):r.createElement(f,l({ref:t},p))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=m;var c={};for(var i in t)hasOwnProperty.call(t,i)&&(c[i]=t[i]);c.originalType=e,c[s]="string"==typeof e?e:a,l[1]=c;for(var u=2;u{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>s,frontMatter:()=>o,metadata:()=>c,toc:()=>i});var r=n(87462),a=(n(67294),n(3905));const o={},l=void 0,c={unversionedId:"data-sources/dann-json",id:"data-sources/dann-json",title:"dann-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/dann-json.md",sourceDirName:"data-sources",slug:"/data-sources/dann-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dann-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/dann-json.md",tags:[],version:"current",frontMatter:{}},i=[],u={toc:i},p="wrapper";function s(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"dannScore": 0.27\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"dannScore"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1.0")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/57cffed1.8267faf7.js b/assets/js/57cffed1.8267faf7.js new file mode 100644 index 00000000..929a62f3 --- /dev/null +++ b/assets/js/57cffed1.8267faf7.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6192],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>f});var r=n(7294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var i=r.createContext({}),u=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},p=function(e){var t=u(e.components);return r.createElement(i.Provider,{value:t},e.children)},s="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,p=c(e,["components","mdxType","originalType","parentName"]),s=u(n),m=a,f=s["".concat(i,".").concat(m)]||s[m]||d[m]||o;return n?r.createElement(f,l(l({ref:t},p),{},{components:n})):r.createElement(f,l({ref:t},p))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=m;var c={};for(var i in t)hasOwnProperty.call(t,i)&&(c[i]=t[i]);c.originalType=e,c[s]="string"==typeof e?e:a,l[1]=c;for(var u=2;u{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>s,frontMatter:()=>o,metadata:()=>c,toc:()=>i});var r=n(7462),a=(n(7294),n(3905));const o={},l=void 0,c={unversionedId:"data-sources/dann-json",id:"data-sources/dann-json",title:"dann-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/dann-json.md",sourceDirName:"data-sources",slug:"/data-sources/dann-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dann-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/dann-json.md",tags:[],version:"current",frontMatter:{}},i=[],u={toc:i},p="wrapper";function s(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"dannScore": 0.27\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"dannScore"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1.0")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/59016f14.3dc0656a.js b/assets/js/59016f14.3dc0656a.js deleted file mode 100644 index 439a3e5b..00000000 --- a/assets/js/59016f14.3dc0656a.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8244],{3905:(t,n,e)=>{e.d(n,{Zo:()=>m,kt:()=>k});var a=e(67294);function l(t,n,e){return n in t?Object.defineProperty(t,n,{value:e,enumerable:!0,configurable:!0,writable:!0}):t[n]=e,t}function r(t,n){var e=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(t,n).enumerable}))),e.push.apply(e,a)}return e}function o(t){for(var n=1;n=0||(l[e]=t[e]);return l}(t,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,e)&&(l[e]=t[e])}return l}var p=a.createContext({}),u=function(t){var n=a.useContext(p),e=n;return t&&(e="function"==typeof t?t(n):o(o({},n),t)),e},m=function(t){var n=u(t.components);return a.createElement(p.Provider,{value:n},t.children)},d="mdxType",g={inlineCode:"code",wrapper:function(t){var n=t.children;return a.createElement(a.Fragment,{},n)}},N=a.forwardRef((function(t,n){var e=t.components,l=t.mdxType,r=t.originalType,p=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),d=u(e),N=l,k=d["".concat(p,".").concat(N)]||d[N]||g[N]||r;return e?a.createElement(k,o(o({ref:n},m),{},{components:e})):a.createElement(k,o({ref:n},m))}));function k(t,n){var e=arguments,l=n&&n.mdxType;if("string"==typeof t||l){var r=e.length,o=new Array(r);o[0]=N;var i={};for(var p in n)hasOwnProperty.call(n,p)&&(i[p]=n[p]);i.originalType=t,i[d]="string"==typeof t?t:l,o[1]=i;for(var u=2;u{e.r(n),e.d(n,{contentTitle:()=>o,default:()=>d,frontMatter:()=>r,metadata:()=>i,toc:()=>p});var a=e(87462),l=(e(67294),e(3905));const r={},o=void 0,i={unversionedId:"data-sources/gnomad-small-variants-json",id:"version-3.17/data-sources/gnomad-small-variants-json",title:"gnomad-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/gnomad-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/gnomad-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/gnomad-small-variants-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],u={toc:p},m="wrapper";function d(t){let{components:n,...e}=t;return(0,l.kt)(m,(0,a.Z)({},u,e,{components:n,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomad":{ \n "coverage":20,\n "allAf":0.190317,\n "maleAf":0.193,\n "femaleAf": 0.1935,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "maleAn":15096,\n "femaleAn":15700\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "maleAc":2930,\n "femaleAc": 2931,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "maleHc":280,\n "femaleHc":281,\n "controlsAllAf":0.190317,\n "controlsAllAn":30796,\n "controlsAllAc":5861,\n "lowComplexityRegion":true,\n "failedFilter":true\n}\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"coverage"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the controls subset. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for male population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for female population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the controls subset. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for male population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for female population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the controls subset. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the South Asian population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the South Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the South Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"lowComplexityRegion"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant is located in a low complexity region.")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/59a08a3d.912ae4f0.js b/assets/js/59a08a3d.912ae4f0.js deleted file mode 100644 index e147eb94..00000000 --- a/assets/js/59a08a3d.912ae4f0.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6865,3476],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>v});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),p=d(n),u=r,v=p["".concat(s,".").concat(u)]||p[u]||m[u]||o;return n?a.createElement(v,i(i({ref:t},c),{},{components:n})):a.createElement(v,i({ref:t},c))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[p]="string"==typeof e?e:r,i[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>p,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/topmed-json",id:"version-3.17/data-sources/topmed-json",title:"topmed-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/topmed-json.md",sourceDirName:"data-sources",slug:"/data-sources/topmed-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/topmed-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/topmed-json.md",tags:[],version:"3.17",frontMatter:{}},s=[],d={toc:s},c="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"topmed":{ \n "allAc":20,\n "allAn":125568,\n "allAf":0.000159,\n "allHc":0,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele number. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele frequency (computed by Nirvana)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed homozygous count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}p.isMDXComponent=!0},14122:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>i,metadata:()=>s,toc:()=>d});var a=n(87462),r=(n(67294),n(3905)),o=n(63745);const i={title:"TOPMed"},l=void 0,s={unversionedId:"data-sources/topmed",id:"version-3.17/data-sources/topmed",title:"TOPMed",description:"Overview",source:"@site/versioned_docs/version-3.17/data-sources/topmed.mdx",sourceDirName:"data-sources",slug:"/data-sources/topmed",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/topmed",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/topmed.mdx",tags:[],version:"3.17",frontMatter:{title:"TOPMed"},sidebar:"version-3.17/docs",previous:{title:"Splice AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/splice-ai"},next:{title:"Nirvana JSON File Format",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/file-formats/nirvana-json-file-format"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"VCF extraction",id:"vcf-extraction",children:[],level:2},{value:"GRCh37 liftover",id:"grch37-liftover",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON output",id:"json-output",children:[],level:2}],c={toc:d},p="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"The ",(0,r.kt)("a",{parentName:"p",href:"https://www.nhlbi.nih.gov/science/trans-omics-precision-medicine-topmed-program"},"Trans-Omics for Precision Medicine")," (TOPMed) program, sponsored by the National Institutes of Health (NIH) National Heart, Lung and Blood Institute (NHLBI), is part of a broader Precision Medicine Initiative, which aims to provide disease treatments tailored to an individual\u2019s unique genes and environment. TOPMed contributes to this Initiative through the integration of whole-genome sequencing (WGS) and other omics (e.g., metabolic profiles, epigenomics, protein and RNA expression patterns) data with molecular, behavioral, imaging, environmental, and clinical data."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Kowalski, M.H., Qian, H., Hou, Z., Rosen, J.D., Tapia, A.L., Shan, Y., Jain, D., Argos, M., Arnett, D.K., Avery, C. and Barnes, K.C., 2019. Use of> 100,000 NHLBI Trans-Omics for Precision Medicine (TOPMed) Consortium whole genome sequences improves imputation quality and detection of rare variant associations in admixed African and Hispanic/Latino populations. ",(0,r.kt)("em",{parentName:"p"},"PLoS genetics"),", ",(0,r.kt)("strong",{parentName:"p"},"15(12)"),", p.e1008500."))),(0,r.kt)("h2",{id:"vcf-extraction"},"VCF extraction"),(0,r.kt)("p",null,"We currently extract the following fields from TOPMed VCF file:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},'##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n')),(0,r.kt)("p",null,"Example:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 10132 TOPMed_freeze_5?chr1:10,132 T C 255 SVM VRT=1;NS=62784;AN=125568;AC=32;AF=0.000254842;Het=32;Hom=0 NA:FRQ 125568:0.000254842\n")),(0,r.kt)("h2",{id:"grch37-liftover"},"GRCh37 liftover"),(0,r.kt)("p",null,"The data is not available for GRCh37 on TOPMed website. We performed a liftover from GRCh38 to GRCh37 using dbSNP ids."),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://bravo.sph.umich.edu/freeze5/hg38/download"},"https://bravo.sph.umich.edu/freeze5/hg38/download")),(0,r.kt)("h2",{id:"json-output"},"JSON output"),(0,r.kt)(o.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/5a0fd99f.c0b5a14e.js b/assets/js/5a0fd99f.c0b5a14e.js deleted file mode 100644 index a2ca9f58..00000000 --- a/assets/js/5a0fd99f.c0b5a14e.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9151],{3905:(t,n,e)=>{e.d(n,{Zo:()=>m,kt:()=>s});var a=e(67294);function r(t,n,e){return n in t?Object.defineProperty(t,n,{value:e,enumerable:!0,configurable:!0,writable:!0}):t[n]=e,t}function l(t,n){var e=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(t,n).enumerable}))),e.push.apply(e,a)}return e}function o(t){for(var n=1;n=0||(r[e]=t[e]);return r}(t,n);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,e)&&(r[e]=t[e])}return r}var p=a.createContext({}),u=function(t){var n=a.useContext(p),e=n;return t&&(e="function"==typeof t?t(n):o(o({},n),t)),e},m=function(t){var n=u(t.components);return a.createElement(p.Provider,{value:n},t.children)},d="mdxType",g={inlineCode:"code",wrapper:function(t){var n=t.children;return a.createElement(a.Fragment,{},n)}},N=a.forwardRef((function(t,n){var e=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),d=u(e),N=r,s=d["".concat(p,".").concat(N)]||d[N]||g[N]||l;return e?a.createElement(s,o(o({ref:n},m),{},{components:e})):a.createElement(s,o({ref:n},m))}));function s(t,n){var e=arguments,r=n&&n.mdxType;if("string"==typeof t||r){var l=e.length,o=new Array(l);o[0]=N;var i={};for(var p in n)hasOwnProperty.call(n,p)&&(i[p]=n[p]);i.originalType=t,i[d]="string"==typeof t?t:r,o[1]=i;for(var u=2;u{e.r(n),e.d(n,{contentTitle:()=>o,default:()=>d,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=e(87462),r=(e(67294),e(3905));const l={},o=void 0,i={unversionedId:"data-sources/gnomad-genomes-small-variants-json",id:"version-3.2.5/data-sources/gnomad-genomes-small-variants-json",title:"gnomad-genomes-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.2.5/data-sources/gnomad-genomes-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-genomes-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/gnomad-genomes-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/data-sources/gnomad-genomes-small-variants-json.md",tags:[],version:"3.2.5",frontMatter:{}},p=[],u={toc:p},m="wrapper";function d(t){let{components:n,...e}=t;return(0,r.kt)(m,(0,a.Z)({},u,e,{components:n,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomad":{ \n "coverage":20,\n "allAf":0.190317,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"coverage"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/5b433544.c202bb29.js b/assets/js/5b433544.c202bb29.js deleted file mode 100644 index 0a19419a..00000000 --- a/assets/js/5b433544.c202bb29.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7199],{3905:(e,t,n)=>{n.d(t,{Zo:()=>s,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var i=r.createContext({}),p=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},s=function(e){var t=p(e.components);return r.createElement(i.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,s=c(e,["components","mdxType","originalType","parentName"]),u=p(n),d=a,f=u["".concat(i,".").concat(d)]||u[d]||m[d]||o;return n?r.createElement(f,l(l({ref:t},s),{},{components:n})):r.createElement(f,l({ref:t},s))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=d;var c={};for(var i in t)hasOwnProperty.call(t,i)&&(c[i]=t[i]);c.originalType=e,c[u]="string"==typeof e?e:a,l[1]=c;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>o,metadata:()=>c,toc:()=>i});var r=n(87462),a=(n(67294),n(3905));const o={},l=void 0,c={unversionedId:"data-sources/gerp-json",id:"version-3.21/data-sources/gerp-json",title:"gerp-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/gerp-json.md",sourceDirName:"data-sources",slug:"/data-sources/gerp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gerp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/gerp-json.md",tags:[],version:"3.21",frontMatter:{}},i=[],p={toc:i},s="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(s,(0,r.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"gerpScore": 1.27\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"gerpScore"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"Range: -\u221e to +\u221e")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/5b6d49f1.cfbef21c.js b/assets/js/5b6d49f1.cfbef21c.js deleted file mode 100644 index 63d7ad46..00000000 --- a/assets/js/5b6d49f1.cfbef21c.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6569,6729,6698],{3905:(e,t,n)=>{n.d(t,{Zo:()=>u,kt:()=>N});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var p=a.createContext({}),s=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},u=function(e){var t=s(e.components);return a.createElement(p.Provider,{value:t},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},c=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,l=e.originalType,p=e.parentName,u=i(e,["components","mdxType","originalType","parentName"]),m=s(n),c=r,N=m["".concat(p,".").concat(c)]||m[c]||d[c]||l;return n?a.createElement(N,o(o({ref:t},u),{},{components:n})):a.createElement(N,o({ref:t},u))}));function N(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=n.length,o=new Array(l);o[0]=c;var i={};for(var p in t)hasOwnProperty.call(t,p)&&(i[p]=t[p]);i.originalType=e,i[m]="string"==typeof e?e:r,o[1]=i;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>m,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/1000Genomes-snv-json",id:"version-3.18/data-sources/1000Genomes-snv-json",title:"1000Genomes-snv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/1000Genomes-snv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-snv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/1000Genomes-snv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/1000Genomes-snv-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],s={toc:p},u="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(u,(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":{\n "allAf":0.200879,\n "afrAf":0.210287,\n "amrAf":0.139769,\n "easAf":0.275794,\n "eurAf":0.181909,\n "sasAf":0.173824,\n "allAn":5008,\n "afrAn":1322,\n "amrAn":694,\n "easAn":1008,\n "eurAn":1006,\n "sasAn":978,\n "allAc":1006,\n "afrAc":278,\n "amrAc":97,\n "easAc":278,\n "eurAc":183,\n "sasAc":170\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the African super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the African super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the Ad Mixed American super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the Ad Mixed American super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the East Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the East Asian super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the European super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the European super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the South Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the South Asian super population. Non-zero integer.")))))}m.isMDXComponent=!0},17656:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>m,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/1000Genomes-sv-json",id:"version-3.18/data-sources/1000Genomes-sv-json",title:"1000Genomes-sv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/1000Genomes-sv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-sv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/1000Genomes-sv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/1000Genomes-sv-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],s={toc:p},u="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(u,(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":[\n {\n "chromosome":"1",\n "begin":1595369,\n "end":1612441,\n "variantType": "copy_number_variation",\n "id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",\n "allAn": 5008,\n "allAc": 2702,\n "allAf": 0.539537,\n "afrAf": 0.6052,\n "amrAf": 0.3675,\n "eurAf": 0.5357,\n "easAf": 0.5368,\n "sasAf": 0.5797,\n "reciprocalOverlap": 0.07555\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"range: 0 - 1.")))))}m.isMDXComponent=!0},52627:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>p,default:()=>c,frontMatter:()=>i,metadata:()=>s,toc:()=>u});var a=n(87462),r=(n(67294),n(3905)),l=n(24029),o=n(17656);const i={title:"1000 Genomes"},p=void 0,s={unversionedId:"data-sources/1000Genomes",id:"version-3.18/data-sources/1000Genomes",title:"1000 Genomes",description:"Overview",source:"@site/versioned_docs/version-3.18/data-sources/1000Genomes.mdx",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/1000Genomes",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/1000Genomes.mdx",tags:[],version:"3.18",frontMatter:{title:"1000 Genomes"},sidebar:"docs",previous:{title:"Annotating COVID-19",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/introduction/covid19"},next:{title:"Amino Acid Conservation",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/amino-acid-conservation"}},u=[{value:"Overview",id:"overview",children:[],level:2},{value:"Populations",id:"populations",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF File Parsing",id:"vcf-file-parsing",children:[{value:"Conflict Resolution",id:"conflict-resolution",children:[],level:4}],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2},{value:"Structural Variants",id:"structural-variants",children:[{value:"VCF File Parsing",id:"vcf-file-parsing-1",children:[],level:3},{value:"Converting VCF svTypes to SO sequence alterations",id:"converting-vcf-svtypes-to-so-sequence-alterations",children:[{value:"Exceptions",id:"exceptions",children:[],level:4}],level:3}],level:2},{value:"JSON Output",id:"json-output-1",children:[],level:2}],m={toc:u},d="wrapper";function c(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"The goal of the 1000 Genomes Project was to find most genetic variants with frequencies of at least 1% in the populations studied. It was the first project to sequence the genomes of a large number of people, to provide a comprehensive resource on human genetic variation. Data from the 1000 Genomes Project was quickly made available to the worldwide scientific community through freely accessible public databases."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Sudmant, P., Rausch, T., Gardner, E. et al. An integrated map of structural variation in 2,504 human genomes. ",(0,r.kt)("em",{parentName:"p"},"Nature 526"),", 75\u201381 (2015). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1038/nature15394"},"https://doi.org/10.1038/nature15394")))),(0,r.kt)("h2",{id:"populations"},"Populations"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"The super population membership can be found here: (",(0,r.kt)("a",{parentName:"li",href:"http://www.1000genomes.org/category/population/"},"http://www.1000genomes.org/category/population/"),")"),(0,r.kt)("li",{parentName:"ul"},"We want to capture the allele frequencies for all 26 populations as well as the 5 super populations and the total population.")),(0,r.kt)("h2",{id:"small-variants"},"Small Variants"),(0,r.kt)("h3",{id:"vcf-file-parsing"},"VCF File Parsing"),(0,r.kt)("p",null,"The original VCF files come with allele frequency fields (e.g. ALL_AF, AMR_AF) but we recompute them using allele counts and allele numbers in order to get 6 digit precision. The allele counts and allele numbers (e.g. AMR_AC, AMR_AN) are not expressed in the INFO field. Instead the genotypes need to be parsed to compute that information. Our team converted the original data to VCF entries with allele counts and allele numbers like the following."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 15274 rs62636497 A G,T 100 PASS AC=1739,3210;AF=0.347244,0.640974;AN=5008;NS=2504;DP=23255;EAS_AF=0.4812,0.5188;AMR_AF=0.2752,0.7205;AFR_AF=0.323,0.6369;EUR_AF=0.2922,0.7078;SAS_AF=0.3497,0.6472;AA=g|||;VT=SNP;MULTI_ALLELIC;EAS_AN=1008;EAS_AC=485,523;EUR_AN=1006;EUR_AC=294,712;AFR_AN=1322;AFR_AC=427,842;AMR_AN=694;AMR_AC=191,500;SAS_AN=978;SAS_AC=342,633\n")),(0,r.kt)("p",null,"The ancestral allele, if it exists, is the first value in the pipe separated AA fields (the Indel specific REF, ALT, IndelType fields are ignored)."),(0,r.kt)("p",null,"We parse the VCF file and extract the following fields from INFO:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"AA"),(0,r.kt)("li",{parentName:"ul"},"AC"),(0,r.kt)("li",{parentName:"ul"},"AN"),(0,r.kt)("li",{parentName:"ul"},"EAS_AN"),(0,r.kt)("li",{parentName:"ul"},"AMR_AN"),(0,r.kt)("li",{parentName:"ul"},"AFR_AN"),(0,r.kt)("li",{parentName:"ul"},"EUR_AN"),(0,r.kt)("li",{parentName:"ul"},"SAS_AN"),(0,r.kt)("li",{parentName:"ul"},"EAS_AC"),(0,r.kt)("li",{parentName:"ul"},"AMR_AC"),(0,r.kt)("li",{parentName:"ul"},"AFR_AC"),(0,r.kt)("li",{parentName:"ul"},"EUR_AC"),(0,r.kt)("li",{parentName:"ul"},"SAS_AC")),(0,r.kt)("h4",{id:"conflict-resolution"},"Conflict Resolution"),(0,r.kt)("p",null,"We have observed conflicting allele frequency information in the source. Take the following example:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 20505705 rs35377696 C CTCTG,CTG,CTGTG 100 PASS AC=46,1513,152;AF=0.0091853,0.302117,0.0303514;\n1 20505705 rs35377696 C CTG 100 PASS AC=4;AF=0.000798722;\n")),(0,r.kt)("p",null,"That is, the variant 1-20505705-C-CTG has conflicting entries. To get an idea of how frequently we observe this, here is a table summarizing ChrX and all chromosomes. Note that almost all such entries are found in ChrX."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"center"},"Chromosome"),(0,r.kt)("th",{parentName:"tr",align:"left"},"#"," of alleles"),(0,r.kt)("th",{parentName:"tr",align:"center"},"#"," of conflicting alleles"),(0,r.kt)("th",{parentName:"tr",align:"left"},"percentage"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"chrX"),(0,r.kt)("td",{parentName:"tr",align:"left"},"834800"),(0,r.kt)("td",{parentName:"tr",align:"center"},"2733"),(0,r.kt)("td",{parentName:"tr",align:"left"},"0.33%")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"Total"),(0,r.kt)("td",{parentName:"tr",align:"left"},"21413098"),(0,r.kt)("td",{parentName:"tr",align:"center"},"2743"),(0,r.kt)("td",{parentName:"tr",align:"left"},"0.013%")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Currently"),", we removed the allele frequency of the conflicting allele (i.e., insertion TG in the example) but keep allele frequencies of all other alleles in the VCF line."),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Potential Alternate Solutions")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"Remove all alleles that are contained in the vcf lines which have conflicting allele. (Recommended by 1000 genome group Holly Zheng-Bradley, 7/29/2015)"),(0,r.kt)("li",{parentName:"ul"},"Recalculate the allele frequency for the conflicting allele."),(0,r.kt)("li",{parentName:"ul"},"Pick the allele frequency that has the highest data support.")),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/"},"GRCh37"),"\n",(0,r.kt)("a",{parentName:"p",href:"http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/"},"GRCh38")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(l.default,{mdxType:"JSONSNV"}),(0,r.kt)("h2",{id:"structural-variants"},"Structural Variants"),(0,r.kt)("h3",{id:"vcf-file-parsing-1"},"VCF File Parsing"),(0,r.kt)("p",null,"The VCF files contain entries like the following:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103\n22 16050654 esv3647175;esv3647176;esv3647177;esv3647178 A ,,, 100 PASS AC=9,87,599,20;AF=0.00179712,0.0173722,0.119609,0.00399361;AN=5008;CS=DUP_gs;END=16063474;NS=2504;SVTYPE=CNV;DP=22545;EAS_AF=0.001,0.0169,0.2361,0.0099;AMR_AF=0,0.0101,0.219,0.0072;AFR_AF=0.0061,0.0363,0.0053,0;EUR_AF=0,0.007,0.0944,0.003;SAS_AF=0,0.0082,0.1094,0.002;VT=SV GT 3|0 0|0 0|0 0|0 0|0 0|0 0|4\n")),(0,r.kt)("p",null,"Please note that, CNVs are allele-specific. For example, HG00096 is effectively copy number 4, which would be a net gain on chr22."),(0,r.kt)("p",null,"1000 Genomes contains 5 types of structural variants:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"CNV"),(0,r.kt)("li",{parentName:"ul"},"DEL"),(0,r.kt)("li",{parentName:"ul"},"DUP"),(0,r.kt)("li",{parentName:"ul"},"INS"),(0,r.kt)("li",{parentName:"ul"},"INV")),(0,r.kt)("p",null,"Since data of 1000 genomes is provided in VCF format, we assume that the coordinates follow the vcf format, i.e., there is a padding base for symbolic alleles. So all the interval can be interpreted as ","[BEGIN+1, END]",".\nSimilarly, for all other variant types except insertion, END is far larger than BEGIN. The distribution of BEGIN and END for insertions is summarized below."),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Insertion issues")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"END = BEGIN for 6/165"),(0,r.kt)("li",{parentName:"ul"},"END = BEGIN+2 for 93/165"),(0,r.kt)("li",{parentName:"ul"},"END = BEGIN+3 for 11/165"),(0,r.kt)("li",{parentName:"ul"},"END = BEGIN+4 for 11/165"),(0,r.kt)("li",{parentName:"ul"},"END \u2013 BEGIN range from 5 to 1156 for others.")),(0,r.kt)("h3",{id:"converting-vcf-svtypes-to-so-sequence-alterations"},"Converting VCF svTypes to SO sequence alterations"),(0,r.kt)("p",null,"The svType will be captured in our JSON file under the ",(0,r.kt)("a",{parentName:"p",href:"http://www.sequenceontology.org/browser/current_svn/term/SO:0001059"},"sequenceAlteration")," key. Here's the translation we'll use according to svType in 1000 Genomes."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"svType"),(0,r.kt)("th",{parentName:"tr",align:null},"Alternative Alleles contain "),(0,r.kt)("th",{parentName:"tr",align:null},"sequenceAlteration"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"ALU"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"DUP"),(0,r.kt)("td",{parentName:"tr",align:null},"TRUE"),(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_gain")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"CNV"),(0,r.kt)("td",{parentName:"tr",align:null},"TRUE"),(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_gain (observed_gains >0 and observed_losses =0) ",(0,r.kt)("br",null),"copy_number_loss\xa0(observed_gains = 0 and observed_losses > 0) ",(0,r.kt)("br",null),"copy_number_variation (otherwise)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"DEL"),(0,r.kt)("td",{parentName:"tr",align:null},"TRUE"),(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_loss")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"LINE1"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"SVA"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"INV"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"inversion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"INS"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"insertion")))),(0,r.kt)("h4",{id:"exceptions"},"Exceptions"),(0,r.kt)("p",null,(0,r.kt)("em",{parentName:"p"},"We discard structural variants without END")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103\n21 9495848 esv3646347 A 100 PASS AC=1543;AF=0.308107;AN=5008;CS=L1_umary;MEINFO=LINE1,5669,6005,+;NS=2504;SVLEN=336;SVTYPE=LINE1;TSD=null;DP=20015;EAS_AF=0.3125;AMR_AF=0.2911;AFR_AF=0.3026;EUR_AF=0.2922;SAS_AF=0.3395;VT=SV GT 0|0 1|1 1|0 0|1 1|0 1|0 0|0\n")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"CNVs in chrY")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"No other types of structural variants exist in chrY"),(0,r.kt)("li",{parentName:"ul"},'Since copy number is provided in genotype field, we directly parse the copy number from "CN" field.'),(0,r.kt)("li",{parentName:"ul"},"For most CNVs in chrY, the reference copy number is 1, but the refence number for CNVs in segmental duplication sites is 2 ("," in the 2nd example). All segmental duplication calls have identifiers starting with GS_SD_M2.")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00101 HG00103 HG00105 HG00107 HG00108\nY 2888555 CNV_Y_2888555_3014661 T 100 PASS AC=1;AF=0.000817661;AN=1223;END=3014661;NS=1233;SVTYPE=CNV;AMR_AF=0.0000;AFR_AF=0.0000;EUR_AF=0.0000;SAS_AF=0.0019;EAS_AF=0.0000;VT=SV GT:CN:CNL:CNP:CNQ:GP:GQ:PL 0:1:-1000,0,-58.45:-1000,0,-61.55:99:0,-61.55:99:0,585 0:1:-296.36,0,-16.6:-300.46,0,-19.7:99:0,-19.7:99:0,166 0:1:-1000,0,-39.44:-1000,0,-42.54:99:0,-42.54:99:0,394\nY 6128381 GS_SD_M2_Y_6128381_6230094_Y_9650284_9752225 C , 100 PASS AC=4,2;AF=0.00327065,0.00163532;AN=1223;END=6230094;NS=1233;SVTYPE=CNV;AMR_AF=0.0029,0.0029;AFR_AF=0.0016,0.0016;EUR_AF=0.0000,0.0000;SAS_AF=0.0038,0.0000;EAS_AF=0.0000,0.0000;VT=SV;EX_TARGET GT:CN:CNL:CNP:CNQ:GP:GQ 0:2:-1000,-138.78,0,-38.53:-1000,-141.27,0,-41.33:99:0,-141.27,-41.33:99 0:2:-1000,-53.32,0,-17.85:-1000,-55.81,0,-20.64:99:0,-55.81,-20.64:99 0:2:-1000,-71.83,0,-32.5:-1000,-74.32,0,-35.29:99:0,-74.32,-35.29:99 0:2:-1000,-60.96,0,-20.29:-1000,-63.45,0,-23.08:99:0,-63.45,-23.08:99 0:2:-1000,-77.6,0,-31.45:-1000,-80.09,0,-34.24:99:0,-80.09,-34.24:99\n")),(0,r.kt)("h2",{id:"json-output-1"},"JSON Output"),(0,r.kt)(o.default,{mdxType:"JSONSV"}))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/5b7bb28d.48f53962.js b/assets/js/5b7bb28d.48f53962.js deleted file mode 100644 index e9a13e67..00000000 --- a/assets/js/5b7bb28d.48f53962.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2724,216],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>h});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function r(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),m=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):r(r({},t),e)),n},p=function(e){var t=m(e.components);return a.createElement(s.Provider,{value:t},e.children)},d="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,o=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),d=m(n),u=i,h=d["".concat(s,".").concat(u)]||d[u]||c[u]||o;return n?a.createElement(h,r(r({ref:t},p),{},{components:n})):a.createElement(h,r({ref:t},p))}));function h(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var o=n.length,r=new Array(o);r[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[d]="string"==typeof e?e:i,r[1]=l;for(var m=2;m{n.r(t),n.d(t,{contentTitle:()=>r,default:()=>d,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const o={},r=void 0,l={unversionedId:"data-sources/omim-json",id:"data-sources/omim-json",title:"omim-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/omim-json.md",sourceDirName:"data-sources",slug:"/data-sources/omim-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/omim-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/omim-json.md",tags:[],version:"current",frontMatter:{}},s=[{value:"Phenotype",id:"phenotype",children:[],level:4},{value:"Mapping",id:"mapping",children:[],level:4},{value:"Inheritance",id:"inheritance",children:[],level:4},{value:"Comments",id:"comments",children:[],level:4}],m={toc:s},p="wrapper";function d(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"omim":[ \n { \n "mimNumber":600678,\n "geneName":"MutS, E. coli, homolog of, 6",\n "description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",\n "phenotypes":[ \n { \n "mimNumber":614350,\n "phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",\n "description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal dominant"\n ]\n },\n { \n "mimNumber":608089,\n "phenotype":"Endometrial cancer, familial",\n "mapping":"molecular basis of the disorder is known"\n },\n { \n "mimNumber":276300,\n "phenotype":"Mismatch repair cancer syndrome",\n "description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal recessive"\n ],\n "comments" : [\n "contribute to susceptibility to multifactorial disorders or to susceptibility to infection",\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"},"OMIM ID for gene")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"geneName"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"gene name")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,i.kt)("td",{parentName:"tr",align:"left"},"object array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#phenotype"},"Phenotype entry below"))))),(0,i.kt)("h4",{id:"phenotype"},"Phenotype"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"mapping"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#mapping"},"possible values below"))),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"inheritance"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#inheritance"},"possible values below"))),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"comments"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#comments"},"possible values below"))))),(0,i.kt)("h4",{id:"mapping"},"Mapping"),(0,i.kt)("ol",null,(0,i.kt)("li",{parentName:"ol"},"disorder was positioned by mapping of the wild type gene"),(0,i.kt)("li",{parentName:"ol"},"disease phenotype itself was mapped"),(0,i.kt)("li",{parentName:"ol"},"molecular basis of the disorder is known"),(0,i.kt)("li",{parentName:"ol"},"disorder is a chromosome deletion or duplication syndrome")),(0,i.kt)("h4",{id:"inheritance"},"Inheritance"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"autosomal recessive"),(0,i.kt)("li",{parentName:"ul"},"autosomal dominant")),(0,i.kt)("h4",{id:"comments"},"Comments"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"contributes to the susceptibility to multifactorial disorders"),(0,i.kt)("li",{parentName:"ul"},"variations that lead to apparently abnormal laboratory test values"),(0,i.kt)("li",{parentName:"ul"},"unconfirmed mapping")))}d.isMDXComponent=!0},71927:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>c,frontMatter:()=>r,metadata:()=>s,toc:()=>m});var a=n(87462),i=(n(67294),n(3905)),o=n(88010);const r={title:"OMIM"},l=void 0,s={unversionedId:"data-sources/omim",id:"data-sources/omim",title:"OMIM",description:"Overview",source:"@site/docs/data-sources/omim.mdx",sourceDirName:"data-sources",slug:"/data-sources/omim",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/omim",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/omim.mdx",tags:[],version:"current",frontMatter:{title:"OMIM"},sidebar:"docs",previous:{title:"MITOMAP",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap"},next:{title:"PhyloP",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/phylop"}},m=[{value:"Overview",id:"overview",children:[],level:2},{value:"Parse OMIM data",id:"parse-omim-data",children:[{value:"mim2gene.txt",id:"mim2genetxt",children:[],level:3},{value:"OMIM API",id:"omim-api",children:[{value:"Mapping key to content",id:"mapping-key-to-content",children:[],level:4},{value:"Phenotype character to comment",id:"phenotype-character-to-comment",children:[],level:4}],level:3},{value:"Remove links in OMIM descriptions",id:"remove-links-in-omim-descriptions",children:[],level:3}],level:2},{value:"JSON output",id:"json-output",children:[],level:2},{value:"Building the supplementary files",id:"building-the-supplementary-files",children:[],level:2}],p={toc:m},d="wrapper";function c(e){let{components:t,...n}=e;return(0,i.kt)(d,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"OMIM is a comprehensive, authoritative compendium of human genes and genetic phenotypes that is freely available and updated daily."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publications")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Amberger JS, Bocchini CA, Scott AF, Hamosh A. OMIM.org: leveraging knowledge across phenotype-gene relationships. Nucleic Acids Res. 2019 Jan 8;47(D1):D1038-D1043. doi:10.1093/nar/gky1151. PMID: ",(0,i.kt)("a",{parentName:"p",href:"https://pubmed.ncbi.nlm.nih.gov/30445645/"},"30445645"),"."),(0,i.kt)("p",{parentName:"div"},"Amberger JS, Bocchini CA, Schiettecatte FJM, Scott AF, Hamosh A. OMIM.org: Online Mendelian Inheritance in Man (OMIM\xae), an online catalog of human genes and genetic disorders. Nucleic Acids Res. 2015 Jan;43(Database issue):D789-98. PMID: ",(0,i.kt)("a",{parentName:"p",href:"https://pubmed.ncbi.nlm.nih.gov/25428349/"},"25428349"),"."))),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Professional data source")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"This is a Professional data source and is not available freely. Please contact ",(0,i.kt)("a",{parentName:"p",href:"mailto:annotation_support@illumina.com"},"annotation_support@illumina.com")," if you would like to obtain it."))),(0,i.kt)("h2",{id:"parse-omim-data"},"Parse OMIM data"),(0,i.kt)("p",null,"Illumina Connected Annotations uses gene symbols as the gene identifiers internally. To generate the OMIM database, we first map the MIM numbers, which are the primary identifiers used by OMIM, to gene symbols supported by Illumina Connected Annotations. Please note that there can be multiple MIM numbers mapped to one gene symbol. Only MIM numbers successfully mapped to an Illumina Connected Annotations gene symbol are further processed. The OMIM API is used to fetch all the information associated with a gene MIM number, except the gene symbols."),(0,i.kt)("h3",{id:"mim2genetxt"},"mim2gene.txt"),(0,i.kt)("p",null,"This mim2gene.txt (",(0,i.kt)("a",{parentName:"p",href:"http://omim.org/static/omim/data/mim2gene.txt"},"http://omim.org/static/omim/data/mim2gene.txt"),") file provides the mapping between MIM numbers and gene symbols. An example of this file is given below:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre"},"# MIM Number MIM Entry Type (see FAQ 1.3 at https://omim.org/help/faq) Entrez Gene ID (NCBI) Approved Gene Symbol (HGNC) Ensembl Gene ID (Ensembl)\n100050 predominantly phenotypes\n100070 phenotype 100329167\n100100 phenotype\n100200 predominantly phenotypes\n100300 phenotype\n100500 moved/removed\n100600 phenotype\n100640 gene 216 ALDH1A1 ENSG00000165092\n100650 gene/phenotype 217 ALDH2 ENSG00000111275\n100660 gene 218 ALDH3A1 ENSG00000108602\n100670 gene 219 ALDH1B1 ENSG00000137124\n100675 predominantly phenotypes\n100678 gene 39 ACAT2 ENSG00000120437\n")),(0,i.kt)("p",null,'The information in the "Entrez Gene ID (NCBI)", "Approved Gene Symbol (HGNC)" and "Ensembl Gene ID (Ensembl)" columns are used to find the proper gene symbol supported by Illumina Connected Annotations, which may or may not be the same as the gene symbol listed here.'),(0,i.kt)("h3",{id:"omim-api"},"OMIM API"),(0,i.kt)("p",null,"Illumina Connected Annotations retrieves the OMIM annotations from the ",(0,i.kt)("a",{parentName:"p",href:"https://www.omim.org/api"},"OMIM API"),' JSON responses. The "entry" handler is used to fetch all the annotations associated with a given OMIM gene. A sample JSON response from the API is provided there.'),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{\n "omim": {\n "version": "1.0",\n "entryList": [\n {\n "entry": {\n "prefix": "*",\n "mimNumber": 100640,\n "status": "live",\n "titles": {\n "preferredTitle": "ALDEHYDE DEHYDROGENASE 1 FAMILY, MEMBER A1; ALDH1A1",\n "alternativeTitles": "ALDEHYDE DEHYDROGENASE 1; ALDH1;;\\nACETALDEHYDE DEHYDROGENASE 1;;\\nALDH, LIVER CYTOSOLIC;;\\nRETINAL DEHYDROGENASE 1; RALDH1"\n },\n "textSectionList": [\n {\n "textSection": {\n "textSectionName": "description",\n "textSectionTitle": "Description",\n "textSectionContent": "The ALDH1A1 gene encodes a liver cytosolic isoform of acetaldehyde dehydrogenase ({EC 1.2.1.3}), an enzyme involved in the major pathway of alcohol metabolism after alcohol dehydrogenase (ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650}), variation in which has been implicated in different responses to alcohol ingestion.\\n\\nALDH1 is associated with a low Km for NAD, a high Km for acetaldehyde, and is strongly inactivated by disulfiram. ALDH2 is associated with a high Km for NAD, and low Km for acetaldehyde, and is insensitive to inhibition by disulfiram ({4:Hsu et al., 1985})."\n }\n }\n ],\n "geneMap": {\n "sequenceID": 7709,\n "chromosome": 9,\n "chromosomeSymbol": "9",\n "chromosomeSort": 225,\n "chromosomeLocationStart": 72900670,\n "chromosomeLocationEnd": 72953052,\n "transcript": "ENST00000297785.7",\n "cytoLocation": "9q21",\n "computedCytoLocation": "9q21.13",\n "mimNumber": 100640,\n "geneSymbols": "ALDH1A1",\n "geneName": "Aldehyde dehydrogenase-1 family, member A1, soluble",\n "mappingMethod": "REa, A",\n "confidence": "P",\n "mouseGeneSymbol": "Aldh1a1",\n "mouseMgiID": "MGI:1353450",\n "geneInheritance": null\n },\n "externalLinks": {\n "geneIDs": "216",\n "hgncID": "402",\n "ensemblIDs": "ENSG00000165092,ENST00000297785.8",\n "approvedGeneSymbols": "ALDH1A1",\n "ncbiReferenceSequences": "1519246465",\n "proteinSequences": "194378740,211947843,2183299,178400,119582947,119582948,178372,40807656,194375548,30582681,209402710,4262707,194739599,4261625,178394,261487497,16306661,21361176,32815082,118495,62089228",\n "uniGenes": "Hs.76392",\n "swissProtIDs": "P00352",\n "decipherGene": false,\n "umlsIDs": "C1412333",\n "gtr": true,\n "cmgGene": false,\n "keggPathways": true,\n "gwasCatalog": false,\n\n }\n }\n },\n {\n "entry": {\n "prefix": "*",\n "mimNumber": 102560,\n "status": "live",\n "titles": {\n "preferredTitle": "ACTIN, GAMMA-1; ACTG1",\n "alternativeTitles": "ACTIN, GAMMA; ACTG;;\\nCYTOSKELETAL GAMMA-ACTIN;;\\nACTIN, CYTOPLASMIC, 2"\n },\n "textSectionList": [\n {\n "textSection": {\n "textSectionName": "description",\n "textSectionTitle": "Description",\n "textSectionContent": "Actins are a family of highly conserved cytoskeletal proteins that play fundamental roles in nearly all aspects of eukaryotic cell biology. The ability of a cell to divide, move, endocytose, generate contractile force, and maintain shape is reliant upon functional actin-based structures. Actin isoforms are grouped according to expression patterns: muscle actins predominate in striated and smooth muscle (e.g., ACTA1, {102610}, and ACTA2, {102620}, respectively), whereas the 2 cytoplasmic nonmuscle actins, gamma-actin (ACTG1) and beta-actin (ACTB; {102630}), are found in all cells ({13:Sonnemann et al., 2006})."\n }\n }\n ],\n "geneMap": {\n "sequenceID": 13666,\n "chromosome": 17,\n "chromosomeSymbol": "17",\n "chromosomeSort": 947,\n "chromosomeLocationStart": 81509970,\n "chromosomeLocationEnd": 81512798,\n "transcript": "ENST00000331925.7",\n "cytoLocation": "17q25.3",\n "computedCytoLocation": "17q25.3",\n "mimNumber": 102560,\n "geneSymbols": "ACTG1, DFNA20, DFNA26, BRWS2",\n "geneName": "Actin, gamma-1",\n "mappingMethod": "REa, A, Fd",\n "confidence": "C",\n "mouseGeneSymbol": "Actg1",\n "mouseMgiID": "MGI:87906",\n "geneInheritance": null,\n "phenotypeMapList": [\n {\n "phenotypeMap": {\n "mimNumber": 102560,\n "phenotype": "Baraitser-Winter syndrome 2",\n "phenotypeMimNumber": 614583,\n "phenotypicSeriesNumber": "PS243310",\n "phenotypeMappingKey": 3,\n "phenotypeInheritance": "Autosomal dominant"\n }\n },\n {\n "phenotypeMap": {\n "mimNumber": 102560,\n "phenotype": "Deafness, autosomal dominant 20/26",\n "phenotypeMimNumber": 604717,\n "phenotypicSeriesNumber": "PS124900",\n "phenotypeMappingKey": 3,\n "phenotypeInheritance": "Autosomal dominant"\n }\n }\n ]\n }\n }\n }\n ]\n }\n}\n')),(0,i.kt)("p",null,"Content from the OMIM API JSON response is reorganized as shown in the Illumina Connected Annotations ",(0,i.kt)("a",{parentName:"p",href:"#json-output"},"JSON Output")),(0,i.kt)("p",null,"Mappings between the Illumina Connected Annotations JSON output and OMIM JSON API are listed in the table below:"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Illumina Connected Annotations JSON key chain"),(0,i.kt)("th",{parentName:"tr",align:"left"},"OMIM API JSON key chain"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:mimNumber")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:geneName"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:geneName")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:textSectionList:textSection:textSectionContent")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:mimNumber")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:phenotype"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:textSectionList:textSection:textSectionContent")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:mapping"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotypeMappingKey (",(0,i.kt)("a",{parentName:"td",href:"#mapping-key-to-content"},"see mapping below"),")")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:inheritances"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotypeInheritance")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:comments"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotype (",(0,i.kt)("a",{parentName:"td",href:"#phenotype-character-to-comment"},"see mapping below"),")")))),(0,i.kt)("h4",{id:"mapping-key-to-content"},"Mapping key to content"),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"1")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"disorder was positioned by mapping of the wild type gene"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"2")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"disease phenotype itself was mapped"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"3")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"molecular basis of the disorder is known"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"4")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"disorder is a chromosome deletion or duplication syndrome"),(0,i.kt)("br",null)),(0,i.kt)("h4",{id:"phenotype-character-to-comment"},"Phenotype character to comment"),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"?")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"unconfirmed or possibly spurious mapping"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"["),"/",(0,i.kt)("inlineCode",{parentName:"p"},"]")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"nondiseases"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"{"),"/",(0,i.kt)("inlineCode",{parentName:"p"},"}")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"contribute to susceptibility to multifactorial disorders or to susceptibility to infection"),(0,i.kt)("br",null)),(0,i.kt)("h3",{id:"remove-links-in-omim-descriptions"},"Remove links in OMIM descriptions"),(0,i.kt)("p",null,"There are different types of link in the OMIM description section. For example, in above JSON response, we have the description of MIM entry 100640:"),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"The ALDH1A1 gene encodes a liver cytosolic isoform of acetaldehyde dehydrogenase ({EC 1.2.1.3}), an enzyme involved in the major pathway of alcohol metabolism after alcohol dehydrogenase (ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650}), variation in which has been implicated in different responses to alcohol ingestion.\\n\\nALDH1 is associated with a low Km for NAD, a high Km for acetaldehyde, and is strongly inactivated by disulfiram. ALDH2 is associated with a high Km for NAD, and low Km for acetaldehyde, and is insensitive to inhibition by disulfiram ({4:Hsu et al., 1985}).")),(0,i.kt)("p",null,"As the descriptions will be shown as plain text, we remove the curry brackets surrounding links and try to make the text still readable with minimal modifications. Briefly:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},'Links referring to another MIM entry (e.g. {100650}) will be removed. Any word(s) specifically associated with the removed link will also be removed. For example, "(ADH, see {103700})" will become "(ADH)" after the process.'),(0,i.kt)("li",{parentName:"ul"},'Links referring to a literature reference will be processed to remove the internal index and curry brackets. For example, "{4:Hsu et al., 1985}" becomes "Hsu et al., 1985".'),(0,i.kt)("li",{parentName:"ul"},'All the other links will simple have their curry brackets removed. For example, "{EC 1.2.1.3}" becomes "EC 1.2.1.3".'),(0,i.kt)("li",{parentName:"ul"},'If the content within a pair of parentheses becomes empty after being processed, the parentheses need to be removed as well and its surrounding white spaces should be properly processed. For example, "ALDH2 ({100650})," will become "ALDH2,".')),(0,i.kt)("p",null,"Here is a list of examples about how the description section supposed to be processed:"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Original text"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Processed text"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"({516030}, {516040}, and {516050})"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., D1, {168461}; D2, {123833}; D3, {123834})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., D1; D2; D3)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(desmocollins; see DSC2, {125645})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(desmocollins; see DSC2)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., see {102700}, {300755})"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(ADH). See also liver mitochondrial ALDH2")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(see, e.g., CACNA1A; {601011})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(see, e.g., CACNA1A)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., GSTA1; {138359}), mu (e.g., {138350})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., GSTA1), mu")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(NFKB; see {164011})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(NFKB)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(see ISGF3G, {147574})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(see ISGF3G)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(DCK; {EC 2.7.1.74}; {125450})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(DCK; EC 2.7.1.74)")))),(0,i.kt)("h2",{id:"json-output"},"JSON output"),(0,i.kt)(o.default,{mdxType:"JSON"}),(0,i.kt)("h2",{id:"building-the-supplementary-files"},"Building the supplementary files"),(0,i.kt)("p",null,"The first step in builing the OMIM ",(0,i.kt)("inlineCode",{parentName:"p"},".nga")," files is to use the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's subcommand ",(0,i.kt)("inlineCode",{parentName:"p"},"downloadOMIM")," to download the necessary data. In order to download the data the user must possess an API key obtained from OMIM. This key has to be set as the environment variable ",(0,i.kt)("em",{parentName:"p"},"OmimApiKey"),"."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},'export OmimApiKey=\nSAUtils.dll downloadOMIM\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll downloadomim [options]\nDownload the OMIM gene annotation data\n\nOPTIONS:\n --cache, -c \n input cache directory\n --ref, -r input reference filename\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\ndotnet SAUtils.dll downloadOMIM --ref References/7/Homo_sapiens.GRCh38.Nirvana.dat --uga Cache/ --out ExternalDataSources/OMIM/2021-06-14\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\nGene Symbol Update Statistics\n============================================\n{\n "NumGeneSymbolsUpToDate": 16788,\n "NumGeneSymbolsUpdated": 95,\n "NumGenesWhereBothIdsAreNull": 0,\n "NumGeneSymbolsNotInCache": 106,\n "NumResolvedGeneSymbolConflicts": 15,\n "NumUnresolvedGeneSymbolConflicts": 0\n}\n\nTime: 00:04:08.9\n')),(0,i.kt)("p",null,"Once the download has succeeded, the ",(0,i.kt)("inlineCode",{parentName:"p"},"nga")," files can be produced using the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's subcommand ",(0,i.kt)("inlineCode",{parentName:"p"},"omim"),"."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet SAUtils.dll omim\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll omim [options]\nCreates a gene annotation database from OMIM data\n\nOPTIONS:\n --m2g, -m MimToGeneSymbol tsv file\n --json, -j OMIM entry json file\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\n\ndotnet SAUtils.dll omim --m2g ExternalDataSources/OMIM/2021-06-14/MimToGeneSymbol.tsv --json ExternalDataSources/OMIM/2021-06-14/MimEntries.json.gz --out SupplementaryDatabase/63/\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\n\nTime: 00:00:04.5\n")))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/5b7bb28d.79f6e2d6.js b/assets/js/5b7bb28d.79f6e2d6.js new file mode 100644 index 00000000..c24f66d6 --- /dev/null +++ b/assets/js/5b7bb28d.79f6e2d6.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8943,216],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>h});var a=n(7294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function r(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),m=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):r(r({},t),e)),n},p=function(e){var t=m(e.components);return a.createElement(s.Provider,{value:t},e.children)},d="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,o=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),d=m(n),u=i,h=d["".concat(s,".").concat(u)]||d[u]||c[u]||o;return n?a.createElement(h,r(r({ref:t},p),{},{components:n})):a.createElement(h,r({ref:t},p))}));function h(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var o=n.length,r=new Array(o);r[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[d]="string"==typeof e?e:i,r[1]=l;for(var m=2;m{n.r(t),n.d(t,{contentTitle:()=>r,default:()=>d,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(7462),i=(n(7294),n(3905));const o={},r=void 0,l={unversionedId:"data-sources/omim-json",id:"data-sources/omim-json",title:"omim-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/omim-json.md",sourceDirName:"data-sources",slug:"/data-sources/omim-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/omim-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/omim-json.md",tags:[],version:"current",frontMatter:{}},s=[{value:"Phenotype",id:"phenotype",children:[],level:4},{value:"Mapping",id:"mapping",children:[],level:4},{value:"Inheritance",id:"inheritance",children:[],level:4},{value:"Comments",id:"comments",children:[],level:4}],m={toc:s},p="wrapper";function d(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"omim":[ \n { \n "mimNumber":600678,\n "geneName":"MutS, E. coli, homolog of, 6",\n "description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",\n "phenotypes":[ \n { \n "mimNumber":614350,\n "phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",\n "description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal dominant"\n ]\n },\n { \n "mimNumber":608089,\n "phenotype":"Endometrial cancer, familial",\n "mapping":"molecular basis of the disorder is known"\n },\n { \n "mimNumber":276300,\n "phenotype":"Mismatch repair cancer syndrome",\n "description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal recessive"\n ],\n "comments" : [\n "contribute to susceptibility to multifactorial disorders or to susceptibility to infection",\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"},"OMIM ID for gene")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"geneName"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"gene name")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,i.kt)("td",{parentName:"tr",align:"left"},"object array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#phenotype"},"Phenotype entry below"))))),(0,i.kt)("h4",{id:"phenotype"},"Phenotype"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"mapping"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#mapping"},"possible values below"))),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"inheritance"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#inheritance"},"possible values below"))),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"comments"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#comments"},"possible values below"))))),(0,i.kt)("h4",{id:"mapping"},"Mapping"),(0,i.kt)("ol",null,(0,i.kt)("li",{parentName:"ol"},"disorder was positioned by mapping of the wild type gene"),(0,i.kt)("li",{parentName:"ol"},"disease phenotype itself was mapped"),(0,i.kt)("li",{parentName:"ol"},"molecular basis of the disorder is known"),(0,i.kt)("li",{parentName:"ol"},"disorder is a chromosome deletion or duplication syndrome")),(0,i.kt)("h4",{id:"inheritance"},"Inheritance"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"autosomal recessive"),(0,i.kt)("li",{parentName:"ul"},"autosomal dominant")),(0,i.kt)("h4",{id:"comments"},"Comments"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"contributes to the susceptibility to multifactorial disorders"),(0,i.kt)("li",{parentName:"ul"},"variations that lead to apparently abnormal laboratory test values"),(0,i.kt)("li",{parentName:"ul"},"unconfirmed mapping")))}d.isMDXComponent=!0},1927:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>c,frontMatter:()=>r,metadata:()=>s,toc:()=>m});var a=n(7462),i=(n(7294),n(3905)),o=n(8010);const r={title:"OMIM"},l=void 0,s={unversionedId:"data-sources/omim",id:"data-sources/omim",title:"OMIM",description:"Overview",source:"@site/docs/data-sources/omim.mdx",sourceDirName:"data-sources",slug:"/data-sources/omim",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/omim",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/omim.mdx",tags:[],version:"current",frontMatter:{title:"OMIM"},sidebar:"docs",previous:{title:"MITOMAP",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap"},next:{title:"PhyloP",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/phylop"}},m=[{value:"Overview",id:"overview",children:[],level:2},{value:"Parse OMIM data",id:"parse-omim-data",children:[{value:"mim2gene.txt",id:"mim2genetxt",children:[],level:3},{value:"OMIM API",id:"omim-api",children:[{value:"Mapping key to content",id:"mapping-key-to-content",children:[],level:4},{value:"Phenotype character to comment",id:"phenotype-character-to-comment",children:[],level:4}],level:3},{value:"Remove links in OMIM descriptions",id:"remove-links-in-omim-descriptions",children:[],level:3}],level:2},{value:"JSON output",id:"json-output",children:[],level:2},{value:"Building the supplementary files",id:"building-the-supplementary-files",children:[],level:2}],p={toc:m},d="wrapper";function c(e){let{components:t,...n}=e;return(0,i.kt)(d,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"OMIM is a comprehensive, authoritative compendium of human genes and genetic phenotypes that is freely available and updated daily."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publications")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Amberger JS, Bocchini CA, Scott AF, Hamosh A. OMIM.org: leveraging knowledge across phenotype-gene relationships. Nucleic Acids Res. 2019 Jan 8;47(D1):D1038-D1043. doi:10.1093/nar/gky1151. PMID: ",(0,i.kt)("a",{parentName:"p",href:"https://pubmed.ncbi.nlm.nih.gov/30445645/"},"30445645"),"."),(0,i.kt)("p",{parentName:"div"},"Amberger JS, Bocchini CA, Schiettecatte FJM, Scott AF, Hamosh A. OMIM.org: Online Mendelian Inheritance in Man (OMIM\xae), an online catalog of human genes and genetic disorders. Nucleic Acids Res. 2015 Jan;43(Database issue):D789-98. PMID: ",(0,i.kt)("a",{parentName:"p",href:"https://pubmed.ncbi.nlm.nih.gov/25428349/"},"25428349"),"."))),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Professional data source")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"This is a Professional data source and is not available freely. Please contact ",(0,i.kt)("a",{parentName:"p",href:"mailto:annotation_support@illumina.com"},"annotation_support@illumina.com")," if you would like to obtain it."))),(0,i.kt)("h2",{id:"parse-omim-data"},"Parse OMIM data"),(0,i.kt)("p",null,"Illumina Connected Annotations uses gene symbols as the gene identifiers internally. To generate the OMIM database, we first map the MIM numbers, which are the primary identifiers used by OMIM, to gene symbols supported by Illumina Connected Annotations. Please note that there can be multiple MIM numbers mapped to one gene symbol. Only MIM numbers successfully mapped to an Illumina Connected Annotations gene symbol are further processed. The OMIM API is used to fetch all the information associated with a gene MIM number, except the gene symbols."),(0,i.kt)("h3",{id:"mim2genetxt"},"mim2gene.txt"),(0,i.kt)("p",null,"This mim2gene.txt (",(0,i.kt)("a",{parentName:"p",href:"http://omim.org/static/omim/data/mim2gene.txt"},"http://omim.org/static/omim/data/mim2gene.txt"),") file provides the mapping between MIM numbers and gene symbols. An example of this file is given below:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre"},"# MIM Number MIM Entry Type (see FAQ 1.3 at https://omim.org/help/faq) Entrez Gene ID (NCBI) Approved Gene Symbol (HGNC) Ensembl Gene ID (Ensembl)\n100050 predominantly phenotypes\n100070 phenotype 100329167\n100100 phenotype\n100200 predominantly phenotypes\n100300 phenotype\n100500 moved/removed\n100600 phenotype\n100640 gene 216 ALDH1A1 ENSG00000165092\n100650 gene/phenotype 217 ALDH2 ENSG00000111275\n100660 gene 218 ALDH3A1 ENSG00000108602\n100670 gene 219 ALDH1B1 ENSG00000137124\n100675 predominantly phenotypes\n100678 gene 39 ACAT2 ENSG00000120437\n")),(0,i.kt)("p",null,'The information in the "Entrez Gene ID (NCBI)", "Approved Gene Symbol (HGNC)" and "Ensembl Gene ID (Ensembl)" columns are used to find the proper gene symbol supported by Illumina Connected Annotations, which may or may not be the same as the gene symbol listed here.'),(0,i.kt)("h3",{id:"omim-api"},"OMIM API"),(0,i.kt)("p",null,"Illumina Connected Annotations retrieves the OMIM annotations from the ",(0,i.kt)("a",{parentName:"p",href:"https://www.omim.org/api"},"OMIM API"),' JSON responses. The "entry" handler is used to fetch all the annotations associated with a given OMIM gene. A sample JSON response from the API is provided there.'),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{\n "omim": {\n "version": "1.0",\n "entryList": [\n {\n "entry": {\n "prefix": "*",\n "mimNumber": 100640,\n "status": "live",\n "titles": {\n "preferredTitle": "ALDEHYDE DEHYDROGENASE 1 FAMILY, MEMBER A1; ALDH1A1",\n "alternativeTitles": "ALDEHYDE DEHYDROGENASE 1; ALDH1;;\\nACETALDEHYDE DEHYDROGENASE 1;;\\nALDH, LIVER CYTOSOLIC;;\\nRETINAL DEHYDROGENASE 1; RALDH1"\n },\n "textSectionList": [\n {\n "textSection": {\n "textSectionName": "description",\n "textSectionTitle": "Description",\n "textSectionContent": "The ALDH1A1 gene encodes a liver cytosolic isoform of acetaldehyde dehydrogenase ({EC 1.2.1.3}), an enzyme involved in the major pathway of alcohol metabolism after alcohol dehydrogenase (ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650}), variation in which has been implicated in different responses to alcohol ingestion.\\n\\nALDH1 is associated with a low Km for NAD, a high Km for acetaldehyde, and is strongly inactivated by disulfiram. ALDH2 is associated with a high Km for NAD, and low Km for acetaldehyde, and is insensitive to inhibition by disulfiram ({4:Hsu et al., 1985})."\n }\n }\n ],\n "geneMap": {\n "sequenceID": 7709,\n "chromosome": 9,\n "chromosomeSymbol": "9",\n "chromosomeSort": 225,\n "chromosomeLocationStart": 72900670,\n "chromosomeLocationEnd": 72953052,\n "transcript": "ENST00000297785.7",\n "cytoLocation": "9q21",\n "computedCytoLocation": "9q21.13",\n "mimNumber": 100640,\n "geneSymbols": "ALDH1A1",\n "geneName": "Aldehyde dehydrogenase-1 family, member A1, soluble",\n "mappingMethod": "REa, A",\n "confidence": "P",\n "mouseGeneSymbol": "Aldh1a1",\n "mouseMgiID": "MGI:1353450",\n "geneInheritance": null\n },\n "externalLinks": {\n "geneIDs": "216",\n "hgncID": "402",\n "ensemblIDs": "ENSG00000165092,ENST00000297785.8",\n "approvedGeneSymbols": "ALDH1A1",\n "ncbiReferenceSequences": "1519246465",\n "proteinSequences": "194378740,211947843,2183299,178400,119582947,119582948,178372,40807656,194375548,30582681,209402710,4262707,194739599,4261625,178394,261487497,16306661,21361176,32815082,118495,62089228",\n "uniGenes": "Hs.76392",\n "swissProtIDs": "P00352",\n "decipherGene": false,\n "umlsIDs": "C1412333",\n "gtr": true,\n "cmgGene": false,\n "keggPathways": true,\n "gwasCatalog": false,\n\n }\n }\n },\n {\n "entry": {\n "prefix": "*",\n "mimNumber": 102560,\n "status": "live",\n "titles": {\n "preferredTitle": "ACTIN, GAMMA-1; ACTG1",\n "alternativeTitles": "ACTIN, GAMMA; ACTG;;\\nCYTOSKELETAL GAMMA-ACTIN;;\\nACTIN, CYTOPLASMIC, 2"\n },\n "textSectionList": [\n {\n "textSection": {\n "textSectionName": "description",\n "textSectionTitle": "Description",\n "textSectionContent": "Actins are a family of highly conserved cytoskeletal proteins that play fundamental roles in nearly all aspects of eukaryotic cell biology. The ability of a cell to divide, move, endocytose, generate contractile force, and maintain shape is reliant upon functional actin-based structures. Actin isoforms are grouped according to expression patterns: muscle actins predominate in striated and smooth muscle (e.g., ACTA1, {102610}, and ACTA2, {102620}, respectively), whereas the 2 cytoplasmic nonmuscle actins, gamma-actin (ACTG1) and beta-actin (ACTB; {102630}), are found in all cells ({13:Sonnemann et al., 2006})."\n }\n }\n ],\n "geneMap": {\n "sequenceID": 13666,\n "chromosome": 17,\n "chromosomeSymbol": "17",\n "chromosomeSort": 947,\n "chromosomeLocationStart": 81509970,\n "chromosomeLocationEnd": 81512798,\n "transcript": "ENST00000331925.7",\n "cytoLocation": "17q25.3",\n "computedCytoLocation": "17q25.3",\n "mimNumber": 102560,\n "geneSymbols": "ACTG1, DFNA20, DFNA26, BRWS2",\n "geneName": "Actin, gamma-1",\n "mappingMethod": "REa, A, Fd",\n "confidence": "C",\n "mouseGeneSymbol": "Actg1",\n "mouseMgiID": "MGI:87906",\n "geneInheritance": null,\n "phenotypeMapList": [\n {\n "phenotypeMap": {\n "mimNumber": 102560,\n "phenotype": "Baraitser-Winter syndrome 2",\n "phenotypeMimNumber": 614583,\n "phenotypicSeriesNumber": "PS243310",\n "phenotypeMappingKey": 3,\n "phenotypeInheritance": "Autosomal dominant"\n }\n },\n {\n "phenotypeMap": {\n "mimNumber": 102560,\n "phenotype": "Deafness, autosomal dominant 20/26",\n "phenotypeMimNumber": 604717,\n "phenotypicSeriesNumber": "PS124900",\n "phenotypeMappingKey": 3,\n "phenotypeInheritance": "Autosomal dominant"\n }\n }\n ]\n }\n }\n }\n ]\n }\n}\n')),(0,i.kt)("p",null,"Content from the OMIM API JSON response is reorganized as shown in the Illumina Connected Annotations ",(0,i.kt)("a",{parentName:"p",href:"#json-output"},"JSON Output")),(0,i.kt)("p",null,"Mappings between the Illumina Connected Annotations JSON output and OMIM JSON API are listed in the table below:"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Illumina Connected Annotations JSON key chain"),(0,i.kt)("th",{parentName:"tr",align:"left"},"OMIM API JSON key chain"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:mimNumber")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:geneName"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:geneName")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:textSectionList:textSection:textSectionContent")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:mimNumber")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:phenotype"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:textSectionList:textSection:textSectionContent")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:mapping"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotypeMappingKey (",(0,i.kt)("a",{parentName:"td",href:"#mapping-key-to-content"},"see mapping below"),")")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:inheritances"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotypeInheritance")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:comments"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotype (",(0,i.kt)("a",{parentName:"td",href:"#phenotype-character-to-comment"},"see mapping below"),")")))),(0,i.kt)("h4",{id:"mapping-key-to-content"},"Mapping key to content"),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"1")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"disorder was positioned by mapping of the wild type gene"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"2")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"disease phenotype itself was mapped"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"3")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"molecular basis of the disorder is known"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"4")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"disorder is a chromosome deletion or duplication syndrome"),(0,i.kt)("br",null)),(0,i.kt)("h4",{id:"phenotype-character-to-comment"},"Phenotype character to comment"),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"?")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"unconfirmed or possibly spurious mapping"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"["),"/",(0,i.kt)("inlineCode",{parentName:"p"},"]")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"nondiseases"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"{"),"/",(0,i.kt)("inlineCode",{parentName:"p"},"}")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"contribute to susceptibility to multifactorial disorders or to susceptibility to infection"),(0,i.kt)("br",null)),(0,i.kt)("h3",{id:"remove-links-in-omim-descriptions"},"Remove links in OMIM descriptions"),(0,i.kt)("p",null,"There are different types of link in the OMIM description section. For example, in above JSON response, we have the description of MIM entry 100640:"),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"The ALDH1A1 gene encodes a liver cytosolic isoform of acetaldehyde dehydrogenase ({EC 1.2.1.3}), an enzyme involved in the major pathway of alcohol metabolism after alcohol dehydrogenase (ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650}), variation in which has been implicated in different responses to alcohol ingestion.\\n\\nALDH1 is associated with a low Km for NAD, a high Km for acetaldehyde, and is strongly inactivated by disulfiram. ALDH2 is associated with a high Km for NAD, and low Km for acetaldehyde, and is insensitive to inhibition by disulfiram ({4:Hsu et al., 1985}).")),(0,i.kt)("p",null,"As the descriptions will be shown as plain text, we remove the curry brackets surrounding links and try to make the text still readable with minimal modifications. Briefly:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},'Links referring to another MIM entry (e.g. {100650}) will be removed. Any word(s) specifically associated with the removed link will also be removed. For example, "(ADH, see {103700})" will become "(ADH)" after the process.'),(0,i.kt)("li",{parentName:"ul"},'Links referring to a literature reference will be processed to remove the internal index and curry brackets. For example, "{4:Hsu et al., 1985}" becomes "Hsu et al., 1985".'),(0,i.kt)("li",{parentName:"ul"},'All the other links will simple have their curry brackets removed. For example, "{EC 1.2.1.3}" becomes "EC 1.2.1.3".'),(0,i.kt)("li",{parentName:"ul"},'If the content within a pair of parentheses becomes empty after being processed, the parentheses need to be removed as well and its surrounding white spaces should be properly processed. For example, "ALDH2 ({100650})," will become "ALDH2,".')),(0,i.kt)("p",null,"Here is a list of examples about how the description section supposed to be processed:"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Original text"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Processed text"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"({516030}, {516040}, and {516050})"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., D1, {168461}; D2, {123833}; D3, {123834})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., D1; D2; D3)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(desmocollins; see DSC2, {125645})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(desmocollins; see DSC2)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., see {102700}, {300755})"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(ADH). See also liver mitochondrial ALDH2")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(see, e.g., CACNA1A; {601011})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(see, e.g., CACNA1A)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., GSTA1; {138359}), mu (e.g., {138350})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., GSTA1), mu")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(NFKB; see {164011})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(NFKB)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(see ISGF3G, {147574})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(see ISGF3G)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(DCK; {EC 2.7.1.74}; {125450})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(DCK; EC 2.7.1.74)")))),(0,i.kt)("h2",{id:"json-output"},"JSON output"),(0,i.kt)(o.default,{mdxType:"JSON"}),(0,i.kt)("h2",{id:"building-the-supplementary-files"},"Building the supplementary files"),(0,i.kt)("p",null,"The first step in builing the OMIM ",(0,i.kt)("inlineCode",{parentName:"p"},".nga")," files is to use the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's subcommand ",(0,i.kt)("inlineCode",{parentName:"p"},"downloadOMIM")," to download the necessary data. In order to download the data the user must possess an API key obtained from OMIM. This key has to be set as the environment variable ",(0,i.kt)("em",{parentName:"p"},"OmimApiKey"),"."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},'export OmimApiKey=\nSAUtils.dll downloadOMIM\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll downloadomim [options]\nDownload the OMIM gene annotation data\n\nOPTIONS:\n --cache, -c \n input cache directory\n --ref, -r input reference filename\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\ndotnet SAUtils.dll downloadOMIM --ref References/7/Homo_sapiens.GRCh38.Nirvana.dat --uga Cache/ --out ExternalDataSources/OMIM/2021-06-14\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\nGene Symbol Update Statistics\n============================================\n{\n "NumGeneSymbolsUpToDate": 16788,\n "NumGeneSymbolsUpdated": 95,\n "NumGenesWhereBothIdsAreNull": 0,\n "NumGeneSymbolsNotInCache": 106,\n "NumResolvedGeneSymbolConflicts": 15,\n "NumUnresolvedGeneSymbolConflicts": 0\n}\n\nTime: 00:04:08.9\n')),(0,i.kt)("p",null,"Once the download has succeeded, the ",(0,i.kt)("inlineCode",{parentName:"p"},"nga")," files can be produced using the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's subcommand ",(0,i.kt)("inlineCode",{parentName:"p"},"omim"),"."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet SAUtils.dll omim\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll omim [options]\nCreates a gene annotation database from OMIM data\n\nOPTIONS:\n --m2g, -m MimToGeneSymbol tsv file\n --json, -j OMIM entry json file\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\n\ndotnet SAUtils.dll omim --m2g ExternalDataSources/OMIM/2021-06-14/MimToGeneSymbol.tsv --json ExternalDataSources/OMIM/2021-06-14/MimEntries.json.gz --out SupplementaryDatabase/63/\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\n\nTime: 00:00:04.5\n")))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/5c85804c.4061821a.js b/assets/js/5c85804c.4061821a.js deleted file mode 100644 index c8db4603..00000000 --- a/assets/js/5c85804c.4061821a.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7583],{3905:(e,n,t)=>{t.d(n,{Zo:()=>p,kt:()=>g});var a=t(67294);function i(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function r(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function o(e){for(var n=1;n=0||(i[t]=e[t]);return i}(e,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(i[t]=e[t])}return i}var l=a.createContext({}),c=function(e){var n=a.useContext(l),t=n;return e&&(t="function"==typeof e?e(n):o(o({},n),e)),t},p=function(e){var n=c(e.components);return a.createElement(l.Provider,{value:n},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},u=a.forwardRef((function(e,n){var t=e.components,i=e.mdxType,r=e.originalType,l=e.parentName,p=s(e,["components","mdxType","originalType","parentName"]),d=c(t),u=i,g=d["".concat(l,".").concat(u)]||d[u]||m[u]||r;return t?a.createElement(g,o(o({ref:n},p),{},{components:t})):a.createElement(g,o({ref:n},p))}));function g(e,n){var t=arguments,i=n&&n.mdxType;if("string"==typeof e||i){var r=t.length,o=new Array(r);o[0]=u;var s={};for(var l in n)hasOwnProperty.call(n,l)&&(s[l]=n[l]);s.originalType=e,s[d]="string"==typeof e?e:i,o[1]=s;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>o,default:()=>d,frontMatter:()=>r,metadata:()=>s,toc:()=>l});var a=t(87462),i=(t(67294),t(3905));const r={title:"Gene Fusion Detection"},o=void 0,s={unversionedId:"core-functionality/gene-fusions",id:"version-3.17/core-functionality/gene-fusions",title:"Gene Fusion Detection",description:"Overview",source:"@site/versioned_docs/version-3.17/core-functionality/gene-fusions.md",sourceDirName:"core-functionality",slug:"/core-functionality/gene-fusions",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/core-functionality/gene-fusions",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/core-functionality/gene-fusions.md",tags:[],version:"3.17",frontMatter:{title:"Gene Fusion Detection"},sidebar:"version-3.17/docs",previous:{title:"Canonical Transcripts",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/core-functionality/canonical-transcripts"},next:{title:"MNV Recomposition",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/core-functionality/mnv-recomposition"}},l=[{value:"Overview",id:"overview",children:[],level:2},{value:"Approach",id:"approach",children:[{value:"Variant Types",id:"variant-types",children:[],level:3},{value:"Criteria",id:"criteria",children:[],level:3}],level:2},{value:"ETV6/RUNX1 Example",id:"etv6runx1-example",children:[{value:"VCF",id:"vcf",children:[],level:3},{value:"JSON Output",id:"json-output",children:[{value:"Gene Fusion Data Sources",id:"gene-fusion-data-sources",children:[],level:4},{value:"Consequences",id:"consequences",children:[],level:4},{value:"Gene Fusions Section",id:"gene-fusions-section",children:[],level:4}],level:3}],level:2}],c={toc:l},p="wrapper";function d(e){let{components:n,...r}=e;return(0,i.kt)(p,(0,a.Z)({},c,r,{components:n,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"Gene fusions often result from large genomic rearrangements such as structural variants. While WGS secondary analysis pipelines typically contain alignment and variant calling stages, very few of them contain dedicated gene fusion callers. When they are included, they are usually associated with RNA-Seq pipelines where gene fusions can be readily observed."),(0,i.kt)("p",null,"Since gene fusions are frequently observed in cancer and since many sequencing experiments do not include paired RNA-Seq data, we have added gene fusion detection and annotation to Nirvana."),(0,i.kt)("p",null,"The rich diversity in gene fusion architectures and their likely mechanisms can be seen below:"),(0,i.kt)("p",null,(0,i.kt)("img",{src:t(59468).Z})),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Kumar-Sinha, C., Kalyana-Sundaram, S. & Chinnaiyan, A.M. ",(0,i.kt)("a",{parentName:"p",href:"https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-015-0252-1"},"Landscape of gene fusions in epithelial cancers: seq and ye shall find"),". Genome Med 7, 129 (2015)"))),(0,i.kt)("h2",{id:"approach"},"Approach"),(0,i.kt)("p",null,"Nirvana uses structural variant calls to evaluate if they form either putative intra-chromosomal or inter-chromosomal gene fusions. Let's consider two transcripts, ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_014206.3")," (",(0,i.kt)("strong",{parentName:"p"},"TMEM258"),") and ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_013402.4")," (",(0,i.kt)("strong",{parentName:"p"},"FADS1"),"). Both of these genes are on the reverse strand in the genome. The vertical bar indicates the breakpoint where these transcripts are fused:"),(0,i.kt)("p",null,(0,i.kt)("img",{alt:"TMEM258 & FADS1 transcripts",src:t(24012).Z})),(0,i.kt)("p",null,"The above explains where the transcripts are fused together, but it doesn't explain in which orientation. By using the directionality encoded in the translocation breakend, we can rearrange these two transcripts in four ways:"),(0,i.kt)("p",null,(0,i.kt)("img",{alt:"TMEM258 & FADS1 gene fusions",src:t(53137).Z})),(0,i.kt)("p",null,"Only two of the combinations yields a fusion contains both the transcription start site (TSS) and the stop codon. In one case, we can even detect an in-frame gene fusion."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Interpreting translocation breakends")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"At first glance, translocation breakends are a bit daunting. However, once you understand how they work, they're actually quite simple. For more information, we recommend reading section 5.4 in the ",(0,i.kt)("a",{parentName:"p",href:"https://samtools.github.io/hts-specs/VCFv4.2.pdf"},"VCF 4.2 specification"),"."),(0,i.kt)("table",{parentName:"div"},(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"REF"),(0,i.kt)("th",{parentName:"tr",align:"left"},"ALT"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Meaning"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"t[p["),(0,i.kt)("td",{parentName:"tr",align:"left"},"piece extending to the right of p is joined after t")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"t]p]"),(0,i.kt)("td",{parentName:"tr",align:"left"},"reverse comp piece extending left of p is joined after t")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"]p]t"),(0,i.kt)("td",{parentName:"tr",align:"left"},"piece extending to the left of p is joined before t")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"[p[t"),(0,i.kt)("td",{parentName:"tr",align:"left"},"reverse comp piece extending right of p is joined before t")))))),(0,i.kt)("h3",{id:"variant-types"},"Variant Types"),(0,i.kt)("p",null,"Specifically we can identify gene fusions from the following structural variant types:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"deletions (",(0,i.kt)("inlineCode",{parentName:"li"},""),")"),(0,i.kt)("li",{parentName:"ul"},"tandem_duplications (",(0,i.kt)("inlineCode",{parentName:"li"},""),")"),(0,i.kt)("li",{parentName:"ul"},"inversions (",(0,i.kt)("inlineCode",{parentName:"li"},""),")"),(0,i.kt)("li",{parentName:"ul"},"translocation breakpoints (",(0,i.kt)("inlineCode",{parentName:"li"},"AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911["),") ")),(0,i.kt)("h3",{id:"criteria"},"Criteria"),(0,i.kt)("p",null,"The following criteria must be met for Nirvana to identify a gene fusion:"),(0,i.kt)("ol",null,(0,i.kt)("li",{parentName:"ol"},"After accounting for gene orientation and genomic rearrangements, both transcripts must have the same orientation"),(0,i.kt)("li",{parentName:"ol"},"Both transcripts must be from the same transcript source (i.e. we won't mix and match between RefSeq and Ensembl transcripts)"),(0,i.kt)("li",{parentName:"ol"},"Both transcripts must belong to different genes"),(0,i.kt)("li",{parentName:"ol"},"Both transcripts cannot have a coding region that already overlaps without the variant (i.e. in cases where two genes naturally overlap, we don't want to call a gene fusion)")),(0,i.kt)("h2",{id:"etv6runx1-example"},"ETV6/RUNX1 Example"),(0,i.kt)("p",null,"ETV6/RUNX1 is the most common gene fusion in childhood B-cell precursor acute lymphoblastic leukemia (ALL). Patients with this translocation are associated with a good prognosis and excellent response to treatment."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Sun C., Chang L., Zhu X. ",(0,i.kt)("a",{parentName:"p",href:"https://www.oncotarget.com/article/16367/text/"},"Pathogenesis of ETV6/RUNX1-positive childhood acute lymphoblastic leukemia and mechanisms underlying its relapse"),". Oncotarget. 2017; 8: 35445-35459"))),(0,i.kt)("h3",{id:"vcf"},"VCF"),(0,i.kt)("p",null,"Here's a simplified representation of the translocation breakends called by the Manta structural variant caller:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\nchr12 12026270 . C [chr21:36420865[C . PASS SVTYPE=BND\nchr12 12026305 . A A]chr21:36420571] . PASS SVTYPE=BND\nchr21 36420571 . C C]chr12:12026305] . PASS SVTYPE=BND\nchr21 36420865 . C [chr12:12026270[C . PASS SVTYPE=BND\n")),(0,i.kt)("p",null,"When you put these calls together, the resulting genomic rearrangement looks something like this:"),(0,i.kt)("p",null,(0,i.kt)("img",{src:t(97798).Z})),(0,i.kt)("h3",{id:"json-output"},"JSON Output"),(0,i.kt)("p",null,"The annotation for the first variant in the VCF looks like this:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{139,141-198,211,213-222}","{139,141-198,211,213-222}":!0},'{\n "chromosome": "chr12",\n "position": 12026270,\n "refAllele": "C",\n "altAlleles": [\n "[chr21:36420865[C"\n ],\n "filters": [\n "PASS"\n ],\n "cytogeneticBand": "12p13.2",\n "clingen": [\n {\n "chromosome": "12",\n "begin": 173786,\n "end": 34835837,\n "variantType": "copy_number_gain",\n "id": "nsv995956",\n "clinicalInterpretation": "pathogenic",\n "phenotypes": [\n "Decreased calvarial ossification",\n "Delayed gross motor development",\n "Feeding difficulties",\n "Frontal bossing",\n "Morphological abnormality of the central nervous system",\n "Patchy alopecia"\n ],\n "phenotypeIds": [\n "HP:0002007",\n "HP:0002011",\n "HP:0002194",\n "HP:0002232",\n "HP:0005474",\n "HP:0011968",\n "MedGen:C0232466",\n "MedGen:C1862862",\n "MedGen:CN001816",\n "MedGen:CN001820",\n "MedGen:CN001989",\n "MedGen:CN004852"\n ],\n "observedGains": 1,\n "validated": true\n }\n ],\n "variants": [\n {\n "vid": "12-12026270-C-[chr21:36420865[C",\n "chromosome": "chr12",\n "begin": 12026270,\n "end": 12026270,\n "isStructuralVariant": true,\n "refAllele": "C",\n "altAllele": "[chr21:36420865[C",\n "variantType": "translocation_breakend",\n "cosmicGeneFusions": [\n {\n "id": "COSF2245",\n "numSamples": 249,\n "geneSymbols": [\n "ETV6",\n "RUNX1"\n ],\n "hgvsr": "ENST00000396373.4(ETV6):r.1_1283::ENST00000300305.3(RUNX1):r.504_6222",\n "histologies": [\n {\n "name": "acute lymphoblastic B cell leukaemia",\n "numSamples": 169\n },\n {\n "name": "acute lymphoblastic leukaemia",\n "numSamples": 80\n }\n ],\n "sites": [\n {\n "name": "haematopoietic and lymphoid tissue",\n "numSamples": 249\n }\n ],\n "pubMedIds": [\n 7761424,\n 7780150,\n 8609706,\n 8751464,\n 8982044,\n 9067587,\n 9207408,\n 9226156,\n 9628428,\n 10463610,\n 10774753,\n 11091202,\n 12621238,\n 12661004,\n 12750722,\n 15104290,\n 15642392,\n 24557455,\n 26925663\n ]\n }\n ],\n "fusionCatcher": [\n {\n "genes": {\n "first": {\n "hgnc": "ETV6",\n "isOncogene": true\n },\n "second": {\n "hgnc": "RUNX1",\n "isOncogene": true\n }\n },\n "somaticSources": [\n "DepMap CCLE",\n "Cancer Genome Project",\n "ChimerKB 4.0",\n "ChimerPub 4.0",\n "ChimerSeq 4.0",\n "Known",\n "Mitelman DB",\n "OncoKB",\n "TICdb"\n ]\n }\n ],\n "transcripts": [\n {\n "transcript": "ENST00000396373.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "introns": "5/7",\n "geneId": "ENSG00000139083",\n "hgnc": "ETV6",\n "consequence": [\n "transcript_variant",\n "unidirectional_gene_fusion"\n ],\n "geneFusions": [\n {\n "transcript": "ENST00000437180.1",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000437180.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"\n },\n {\n "transcript": "ENST00000300305.3",\n "bioType": "protein_coding",\n "intron": 1,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000300305.3(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"\n },\n {\n "transcript": "ENST00000482318.1",\n "bioType": "nonsense_mediated_decay",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000482318.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"\n },\n {\n "transcript": "ENST00000486278.2",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000486278.2(RUNX1):r.?_-15+274::ENST00000396373.4(ETV6):r.1009+3367_?"\n },\n {\n "transcript": "ENST00000455571.1",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000455571.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"\n },\n {\n "transcript": "ENST00000475045.2",\n "bioType": "protein_coding",\n "intron": 11,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000475045.2(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"\n },\n {\n "transcript": "ENST00000416754.1",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000416754.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"\n }\n ],\n "isCanonical": true,\n "proteinId": "ENSP00000379658.3"\n },\n {\n "transcript": "NM_001987.4",\n "source": "RefSeq",\n "bioType": "protein_coding",\n "introns": "5/7",\n "geneId": "2120",\n "hgnc": "ETV6",\n "consequence": [\n "transcript_variant",\n "unidirectional_gene_fusion"\n ],\n "geneFusions": [\n {\n "transcript": "NM_001754.4",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "861",\n "hgnc": "RUNX1",\n "hgvsr": "NM_001754.4(RUNX1):r.?_58+274::NM_001987.4(ETV6):r.1009+3367_?"\n }\n ],\n "isCanonical": true,\n "proteinId": "NP_001978.1"\n }\n ]\n }\n ]\n}\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"transcript"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"transcript ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"bioType"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"descriptions of the ",(0,i.kt)("a",{parentName:"td",href:"https://uswest.ensembl.org/info/genome/genebuild/biotypes.html"},"biotypes from Ensembl"))),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"exon"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"},"exon that contained fusion breakpoint")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"intron"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"},"intron that contained fusion breakpoint")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"geneId"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"gene ID. e.g. ENSG00000116062")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hgvsr"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"HGVS RNA nomenclature")))),(0,i.kt)("h4",{id:"gene-fusion-data-sources"},"Gene Fusion Data Sources"),(0,i.kt)("p",null,"To provide more context to our gene fusions, we provide the following gene fusion data sources:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"../data-sources/cosmic"},"COSMIC")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"../data-sources/fusioncatcher"},"FusionCatcher"))),(0,i.kt)("h4",{id:"consequences"},"Consequences"),(0,i.kt)("p",null,"When a gene fusion is identified, we add the following Sequence Ontology consequence:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{3}","{3}":!0},' "consequence": [\n "transcript_variant",\n "unidirectional_gene_fusion"\n ],\n')),(0,i.kt)("h4",{id:"gene-fusions-section"},"Gene Fusions Section"),(0,i.kt)("p",null,"The ",(0,i.kt)("inlineCode",{parentName:"p"},"geneFusions")," section is contained within the object of the originating transcript. It will contain all the pairwise gene fusions that obey the criteria outline above. In the case of ",(0,i.kt)("inlineCode",{parentName:"p"},"ENST00000396373.4"),", there 7 other Ensembl transcripts that would produce a gene fusion. For ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001987.4"),", there was only one transcript (",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001754.4"),") that produce a gene fusion."),(0,i.kt)("p",null,"For each originating transcript, we report the following for each partner transcript:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"transcript ID"),(0,i.kt)("li",{parentName:"ul"},"gene ID"),(0,i.kt)("li",{parentName:"ul"},"HGNC gene symbol"),(0,i.kt)("li",{parentName:"ul"},"transcript bio type (e.g. protein_coding)"),(0,i.kt)("li",{parentName:"ul"},"intron or exon number containing the breakpoint"),(0,i.kt)("li",{parentName:"ul"},"HGVS RNA notation")),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Before Nirvana 3.15, we provided HGVS coding notation. However, HGVS r. notation is more appropriate for these types fusion splicing events (see ",(0,i.kt)("a",{parentName:"p",href:"https://varnomen.hgvs.org/bg-material/consultation/svd-wg007"},"HGVS SVD-WG007"),")."))),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{8}","{8}":!0},' "geneFusions": [\n {\n "transcript": "NM_001754.4",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "861",\n "hgnc": "RUNX1",\n "hgvsr": "NM_001754.4(RUNX1):r.?_58+274::NM_001987.4(ETV6):r.1009+3367_?"\n }\n ],\n')),(0,i.kt)("p",null,"The HGVS RNA notation above indicates that the gene fusion starts with ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001754.4")," (RUNX1) until CDS position 58 and continues with ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001987.4")," (ETV6). ",(0,i.kt)("inlineCode",{parentName:"p"},"1009+3367")," indicates that the fusion occurred 3367 bp within intron 2."))}d.isMDXComponent=!0},53137:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/TMEM258_FADS1_GeneFusions-e5e3758ea9d2c07d3591e3801b2bf7e3.svg"},24012:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/TMEM258_FADS1_Transcripts-fe1b9c6be1f7cbfefbce887f8cec5d58.svg"},97798:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/etv6-runx1-fusion-ec8f4312c9aca496bde0d6e2b1bbd50d.svg"},59468:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/gene-fusions-fig2-1cce8ac31b00465c8d36bdc47ec3309e.svg"}}]); \ No newline at end of file diff --git a/assets/js/5d1e2784.8efb0a61.js b/assets/js/5d1e2784.8efb0a61.js new file mode 100644 index 00000000..6bcd9cb5 --- /dev/null +++ b/assets/js/5d1e2784.8efb0a61.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1311],{3905:(e,t,n)=>{n.d(t,{Zo:()=>m,kt:()=>u});var a=n(7294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},m=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},h=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,m=l(e,["components","mdxType","originalType","parentName"]),p=d(n),h=i,u=p["".concat(s,".").concat(h)]||p[h]||c[h]||r;return n?a.createElement(u,o(o({ref:t},m),{},{components:n})):a.createElement(u,o({ref:t},m))}));function u(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,o=new Array(r);o[0]=h;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[p]="string"==typeof e?e:i,o[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var a=n(7462),i=(n(7294),n(3905));const r={title:"Mitochondrial Heteroplasmy"},o=void 0,l={unversionedId:"data-sources/mito-heteroplasmy",id:"data-sources/mito-heteroplasmy",title:"Mitochondrial Heteroplasmy",description:"Overview",source:"@site/docs/data-sources/mito-heteroplasmy.md",sourceDirName:"data-sources",slug:"/data-sources/mito-heteroplasmy",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mito-heteroplasmy",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/mito-heteroplasmy.md",tags:[],version:"current",frontMatter:{title:"Mitochondrial Heteroplasmy"},sidebar:"docs",previous:{title:"gnomAD",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad"},next:{title:"MITOMAP",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"JSON File",id:"json-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Binning VRF Data",id:"binning-vrf-data",children:[],level:4},{value:"Pre-processing the Data",id:"pre-processing-the-data",children:[],level:4},{value:"Algorithm",id:"algorithm",children:[],level:4}],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],d={toc:s},m="wrapper";function p(e){let{components:t,...n}=e;return(0,i.kt)(m,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"Mitochondrial Heteroplasmy is an aggregate population data set that characterizes the amount of heteroplasmy observed for each variant. The latest version of this data set is based on re-processed 1000 Genomes Project data using the Illumina DRAGEN pipeline."),(0,i.kt)("h2",{id:"json-file"},"JSON File"),(0,i.kt)("h3",{id:"example"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{\n "T:C":{\n "ad":[\n 1,\n 1,\n 1,\n 1,\n 1,\n 1\n ],\n "allele_type":"alt",\n "vrf":[\n 0.002369668246445498,\n 0.0024937655860349127,\n 0.0016129032258064516,\n 0.0025188916876574307,\n 0.0022935779816513763,\n 0.002008032128514056\n ],\n "vrf_stats":{\n "kurtosis":38.889891511122556,\n "max":0.0025188916876574307,\n "mean":5.4052190471990743e-05,\n "min":0.0,\n "nobs":246,\n "skewness":6.346664692283075,\n "stdev":0.0003461416264750575,\n "variance":1.1981402557879823e-07\n }\n }\n}\n\n')),(0,i.kt)("h3",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"From the JSON file, we're mainly interested in the following keys:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"variant")," (i.e. ",(0,i.kt)("inlineCode",{parentName:"li"},"T:C"),")"),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"ad")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"vrf")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"nobs")," (number of observations)")),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Adjusting for null observations")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The ",(0,i.kt)("inlineCode",{parentName:"p"},"nobs")," value indicates how many observations were made. Ideally this would have been represented in the ",(0,i.kt)("inlineCode",{parentName:"p"},"ad")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"vrf")," arrays, but it's left as an exercise for the reader."))),(0,i.kt)("h4",{id:"binning-vrf-data"},"Binning VRF Data"),(0,i.kt)("p",null,"The ",(0,i.kt)("inlineCode",{parentName:"p"},"vrf")," (variant read frequency) array in the JSON object above is paired with with the ",(0,i.kt)("inlineCode",{parentName:"p"},"ad")," array (allele depths) shown above."),(0,i.kt)("p",null,"The data in the JSON object has a crazy number of significant digits. This means that as the number of samples increase, this array will grow. To make this more future-proof, Illumina Connected Annotations bins everything according to 0.1% increments."),(0,i.kt)("p",null,"With the binned data, we end up having 775 distinct ",(0,i.kt)("inlineCode",{parentName:"p"},"vrf")," values in the entire JSON file. This also means that the variant with the largest number of VRFs would originally have 246 entries, but due to binning this will decrease to 143."),(0,i.kt)("h4",{id:"pre-processing-the-data"},"Pre-processing the Data"),(0,i.kt)("p",null,"The JSON file is converted into a small TSV file that is embedded in Illumina Connected Annotations. Here is an example of the TSV file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS REF ALT VRF_BINS VRF_COUNTS\nchrM 1 G . 0.981,0.987,0.988,0.989,0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.998,0.999 1,2,2,4,7,8,11,19,43,60,48,64,499,1736\nchrM 2 A . 0.981,0.987,0.988,0.989,0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.998,0.999 1,2,2,4,7,8,11,19,43,60,48,64,499,1736\n")),(0,i.kt)("h4",{id:"algorithm"},"Algorithm"),(0,i.kt)("p",null,"Illumina Connected Annotations will calculate mitochondrial heteroplasmy data for every sample in the VCF. Using the computed VRF for each sample, we compute where in the empirical mitochondrial heteroplasmy distribution that VRF occurs and express that as a percentile."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Percentiles")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Illumina Connected Annotations uses the ",(0,i.kt)("a",{parentName:"p",href:"https://en.wikipedia.org/wiki/Percentile"},"statistical definition of percentile")," (indicating the value below which a given percentage of observations in a group of observations falls). Unless the sample's VRF is higher than all the VRFs represented in the distribution, the range will be [0, 1)."))),(0,i.kt)("h2",{id:"download-url"},"Download URL"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Unavailable")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The original data set is only available internally at Illumina at the moment."))),(0,i.kt)("h2",{id:"json-output"},"JSON Output"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{14-17}","{14-17}":!0},'"samples":[\n {\n "genotype":"0/1",\n "variantFrequencies":[\n 0.333,\n 0.5\n ],\n ],\n "alleleDepths":[\n 10,\n 20,\n 30\n ],\n "heteroplasmyPercentile":[\n 23.13,\n 12.65\n ]\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"heteroplasmyPercentile"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"one percentile for each variant frequency (each alternate allele)")))))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/5d1e2784.da544a50.js b/assets/js/5d1e2784.da544a50.js deleted file mode 100644 index 4a931a77..00000000 --- a/assets/js/5d1e2784.da544a50.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1311],{3905:(e,t,n)=>{n.d(t,{Zo:()=>m,kt:()=>u});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},m=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},h=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,m=l(e,["components","mdxType","originalType","parentName"]),p=d(n),h=i,u=p["".concat(s,".").concat(h)]||p[h]||c[h]||r;return n?a.createElement(u,o(o({ref:t},m),{},{components:n})):a.createElement(u,o({ref:t},m))}));function u(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,o=new Array(r);o[0]=h;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[p]="string"==typeof e?e:i,o[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={title:"Mitochondrial Heteroplasmy"},o=void 0,l={unversionedId:"data-sources/mito-heteroplasmy",id:"data-sources/mito-heteroplasmy",title:"Mitochondrial Heteroplasmy",description:"Overview",source:"@site/docs/data-sources/mito-heteroplasmy.md",sourceDirName:"data-sources",slug:"/data-sources/mito-heteroplasmy",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mito-heteroplasmy",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/mito-heteroplasmy.md",tags:[],version:"current",frontMatter:{title:"Mitochondrial Heteroplasmy"},sidebar:"docs",previous:{title:"gnomAD",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad"},next:{title:"MITOMAP",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"JSON File",id:"json-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Binning VRF Data",id:"binning-vrf-data",children:[],level:4},{value:"Pre-processing the Data",id:"pre-processing-the-data",children:[],level:4},{value:"Algorithm",id:"algorithm",children:[],level:4}],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],d={toc:s},m="wrapper";function p(e){let{components:t,...n}=e;return(0,i.kt)(m,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"Mitochondrial Heteroplasmy is an aggregate population data set that characterizes the amount of heteroplasmy observed for each variant. The latest version of this data set is based on re-processed 1000 Genomes Project data using the Illumina DRAGEN pipeline."),(0,i.kt)("h2",{id:"json-file"},"JSON File"),(0,i.kt)("h3",{id:"example"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{\n "T:C":{\n "ad":[\n 1,\n 1,\n 1,\n 1,\n 1,\n 1\n ],\n "allele_type":"alt",\n "vrf":[\n 0.002369668246445498,\n 0.0024937655860349127,\n 0.0016129032258064516,\n 0.0025188916876574307,\n 0.0022935779816513763,\n 0.002008032128514056\n ],\n "vrf_stats":{\n "kurtosis":38.889891511122556,\n "max":0.0025188916876574307,\n "mean":5.4052190471990743e-05,\n "min":0.0,\n "nobs":246,\n "skewness":6.346664692283075,\n "stdev":0.0003461416264750575,\n "variance":1.1981402557879823e-07\n }\n }\n}\n\n')),(0,i.kt)("h3",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"From the JSON file, we're mainly interested in the following keys:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"variant")," (i.e. ",(0,i.kt)("inlineCode",{parentName:"li"},"T:C"),")"),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"ad")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"vrf")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"nobs")," (number of observations)")),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Adjusting for null observations")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The ",(0,i.kt)("inlineCode",{parentName:"p"},"nobs")," value indicates how many observations were made. Ideally this would have been represented in the ",(0,i.kt)("inlineCode",{parentName:"p"},"ad")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"vrf")," arrays, but it's left as an exercise for the reader."))),(0,i.kt)("h4",{id:"binning-vrf-data"},"Binning VRF Data"),(0,i.kt)("p",null,"The ",(0,i.kt)("inlineCode",{parentName:"p"},"vrf")," (variant read frequency) array in the JSON object above is paired with with the ",(0,i.kt)("inlineCode",{parentName:"p"},"ad")," array (allele depths) shown above."),(0,i.kt)("p",null,"The data in the JSON object has a crazy number of significant digits. This means that as the number of samples increase, this array will grow. To make this more future-proof, Illumina Connected Annotations bins everything according to 0.1% increments."),(0,i.kt)("p",null,"With the binned data, we end up having 775 distinct ",(0,i.kt)("inlineCode",{parentName:"p"},"vrf")," values in the entire JSON file. This also means that the variant with the largest number of VRFs would originally have 246 entries, but due to binning this will decrease to 143."),(0,i.kt)("h4",{id:"pre-processing-the-data"},"Pre-processing the Data"),(0,i.kt)("p",null,"The JSON file is converted into a small TSV file that is embedded in Illumina Connected Annotations. Here is an example of the TSV file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS REF ALT VRF_BINS VRF_COUNTS\nchrM 1 G . 0.981,0.987,0.988,0.989,0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.998,0.999 1,2,2,4,7,8,11,19,43,60,48,64,499,1736\nchrM 2 A . 0.981,0.987,0.988,0.989,0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.998,0.999 1,2,2,4,7,8,11,19,43,60,48,64,499,1736\n")),(0,i.kt)("h4",{id:"algorithm"},"Algorithm"),(0,i.kt)("p",null,"Illumina Connected Annotations will calculate mitochondrial heteroplasmy data for every sample in the VCF. Using the computed VRF for each sample, we compute where in the empirical mitochondrial heteroplasmy distribution that VRF occurs and express that as a percentile."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Percentiles")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Illumina Connected Annotations uses the ",(0,i.kt)("a",{parentName:"p",href:"https://en.wikipedia.org/wiki/Percentile"},"statistical definition of percentile")," (indicating the value below which a given percentage of observations in a group of observations falls). Unless the sample's VRF is higher than all the VRFs represented in the distribution, the range will be [0, 1)."))),(0,i.kt)("h2",{id:"download-url"},"Download URL"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Unavailable")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The original data set is only available internally at Illumina at the moment."))),(0,i.kt)("h2",{id:"json-output"},"JSON Output"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{14-17}","{14-17}":!0},'"samples":[\n {\n "genotype":"0/1",\n "variantFrequencies":[\n 0.333,\n 0.5\n ],\n ],\n "alleleDepths":[\n 10,\n 20,\n 30\n ],\n "heteroplasmyPercentile":[\n 23.13,\n 12.65\n ]\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"heteroplasmyPercentile"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"one percentile for each variant frequency (each alternate allele)")))))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/5d851e34.130cd643.js b/assets/js/5d851e34.130cd643.js deleted file mode 100644 index 65d46aab..00000000 --- a/assets/js/5d851e34.130cd643.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7795,5160,8462],{3905:(e,t,a)=>{a.d(t,{Zo:()=>d,kt:()=>h});var n=a(67294);function i(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function r(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function o(e){for(var t=1;t=0||(i[a]=e[a]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(i[a]=e[a])}return i}var s=n.createContext({}),m=function(e){var t=n.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):o(o({},t),e)),a},d=function(e){var t=m(e.components);return n.createElement(s.Provider,{value:t},e.children)},p="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},u=n.forwardRef((function(e,t){var a=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,d=l(e,["components","mdxType","originalType","parentName"]),p=m(a),u=i,h=p["".concat(s,".").concat(u)]||p[u]||c[u]||r;return a?n.createElement(h,o(o({ref:t},d),{},{components:a})):n.createElement(h,o({ref:t},d))}));function h(e,t){var a=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=a.length,o=new Array(r);o[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[p]="string"==typeof e?e:i,o[1]=l;for(var m=2;m{a.r(t),a.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var n=a(87462),i=(a(67294),a(3905));const r={},o=void 0,l={unversionedId:"data-sources/mitomap-small-variants-json",id:"data-sources/mitomap-small-variants-json",title:"mitomap-small-variants-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/mitomap-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/mitomap-small-variants-json.md",tags:[],version:"current",frontMatter:{}},s=[],m={toc:s},d="wrapper";function p(e){let{components:t,...a}=e;return(0,i.kt)(d,(0,n.Z)({},m,a,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "refAllele":"G",\n "altAllele":"A",\n "diseases":[ \n "Bipolar disorder",\n "Melanoma"\n ],\n "hasHomoplasmy":false,\n "hasHeteroplasmy":true,\n "status":"Reported",\n "clinicalSignificance":"confirmed pathogenic",\n "scorePercentile":83.30,\n "numGenBankFullLengthSeqs":2,\n "pubMedIds":["2316527","6299878","6301949"],\n "isAlleleSpecific":true\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"diseases"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"associated diseases")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hasHomoplasmy"),(0,i.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hasHeteroplasmy"),(0,i.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"status"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"record status")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"clinicalSignificance"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"predicted pathogenicity")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float"),(0,i.kt)("td",{parentName:"tr",align:"left"},"MitoTIP score")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numGenBankFullLengthSeqs"),(0,i.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,i.kt)("td",{parentName:"tr",align:"left"},"# of GenBank full-length sequences")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,i.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,i.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the MITOMAP alternate allele")))))}p.isMDXComponent=!0},58898:(e,t,a)=>{a.r(t),a.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var n=a(87462),i=(a(67294),a(3905));const r={},o=void 0,l={unversionedId:"data-sources/mitomap-structural-variants-json",id:"data-sources/mitomap-structural-variants-json",title:"mitomap-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/mitomap-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/mitomap-structural-variants-json.md",tags:[],version:"current",frontMatter:{}},s=[],m={toc:s},d="wrapper";function p(e){let{components:t,...a}=e;return(0,i.kt)(d,(0,n.Z)({},m,a,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "chromosome":"MT",\n "begin":3166,\n "end":14152,\n "variantType":"deletion",\n "reciprocalOverlap":0.18068,\n "annotationOverlap":0.42405\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,i.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"end"),(0,i.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"annotationOverlap"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")))))}p.isMDXComponent=!0},17763:(e,t,a)=>{a.r(t),a.d(t,{contentTitle:()=>s,default:()=>u,frontMatter:()=>l,metadata:()=>m,toc:()=>d});var n=a(87462),i=(a(67294),a(3905)),r=a(88181),o=a(58898);const l={title:"MITOMAP"},s=void 0,m={unversionedId:"data-sources/mitomap",id:"data-sources/mitomap",title:"MITOMAP",description:"Overview",source:"@site/docs/data-sources/mitomap.mdx",sourceDirName:"data-sources",slug:"/data-sources/mitomap",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/mitomap.mdx",tags:[],version:"current",frontMatter:{title:"MITOMAP"},sidebar:"docs",previous:{title:"Mitochondrial Heteroplasmy",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mito-heteroplasmy"},next:{title:"OMIM",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/omim"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"Scraping HTML Pages",id:"scraping-html-pages",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Allele Parsing",id:"allele-parsing",children:[],level:4}],level:3}],level:2},{value:"PostgreSQL Dump File",id:"postgresql-dump-file",children:[{value:"Example",id:"example-1",children:[],level:3},{value:"Parsing",id:"parsing-1",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URLs",id:"download-urls",children:[],level:2},{value:"JSON Output",id:"json-output",children:[{value:"Small Variants",id:"small-variants",children:[],level:3},{value:"Structural Variants",id:"structural-variants",children:[],level:3}],level:2}],p={toc:d},c="wrapper";function u(e){let{components:t,...l}=e;return(0,i.kt)(c,(0,n.Z)({},p,l,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"MITOMAP provides a compendium of polymorphisms and mutations in human mitochondrial DNA."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Lott, M.T., Leipzig, J.N., Derbeneva, O., Xie, H.M., Chalkia, D., Sarmady, M., Procaccio, V., and Wallace, D.C. mtDNA variation and analysis using MITOMAP and MITOMASTER. ",(0,i.kt)("em",{parentName:"p"},"Current Protocols in Bioinformatics")," 1(123):1.23.1-26 (2013). ",(0,i.kt)("a",{parentName:"p",href:"http://www.mitomap.org"},"http://www.mitomap.org")))),(0,i.kt)("h2",{id:"scraping-html-pages"},"Scraping HTML Pages"),(0,i.kt)("h3",{id:"example"},"Example"),(0,i.kt)("p",null,"MITOMAP is unique in that it doesn't offer the data in a downloadable format. As a result, the annotation content in Illumina Connected Annotations is scraped from the following MITOMAP pages:"),(0,i.kt)("ol",null,(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/PolymorphismsControl"},"mtDNA Control Region Sequence Variants")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/PolymorphismsCoding"},"mtDNA Coding Region & RNA Sequence Variants")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/MutationsRNA"},"Reported Mitochondrial DNA Base Substitution Diseases: rRNA/tRNA mutations")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/MutationsCodingControl"},"Reported Mitochondrial DNA Base Substitution Diseases: Coding and Control Region Point Mutations")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/DeletionsSingle"},"Reported mtDNA Deletions")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/InsertionsSimple"},"mtDNA Simple Insertions"))),(0,i.kt)("p",null,(0,i.kt)("img",{src:a(90059).Z})),(0,i.kt)("h3",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"Here's what the HTML code looks like:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-html"},"[\"582\",\"MT-TF\",\"Mitochondrial myopathy\",\"T582C\",\"tRNA Phe\",\"-\",\"+\",\"Reported\",\"72.90% \",\"0\",\"2\"],\n[\"583\",\"MT-TF\",\"MELAS / MM & EXIT\",\"G583A\",\"tRNA Phe\",\"-\",\"+\",\"Cfrm\",\"93.10% \",\"0\",\"3\"],\n")),(0,i.kt)("p",null,"We're mainly interested in the following columns (numbers indicate the HTML page above):"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"Position",(0,i.kt)("sup",null,"1,2,3,4")),(0,i.kt)("li",{parentName:"ul"},"Disease",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Nucleotide Change",(0,i.kt)("sup",null,"1,2")),(0,i.kt)("li",{parentName:"ul"},"Allele",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Homoplasmy",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Heteroplasmy",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Status",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"MitoTIP",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"GB Seqs FL(CR)",(0,i.kt)("sup",null,"1,2,3,4")),(0,i.kt)("li",{parentName:"ul"},"Deletion Junction",(0,i.kt)("sup",null,"5")),(0,i.kt)("li",{parentName:"ul"},"Insert (nt)",(0,i.kt)("sup",null,"6")),(0,i.kt)("li",{parentName:"ul"},"Insert Point (nt)",(0,i.kt)("sup",null,"6")),(0,i.kt)("li",{parentName:"ul"},"References/Curated References",(0,i.kt)("sup",null,"1,2,3,4"))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"MitoTIP")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The MitoTIP information is used to populate the ",(0,i.kt)("inlineCode",{parentName:"p"},"clinicalSignificance")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"scorePercentile"),' JSON keys. The "frequency alert" entries are skipped since it\'s not directly relevant to clinical significance.'))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Left alignment")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Many of the variants in MITOMAP have not been normalized. As part of our import procedure, we left align all insertions and deletions."))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Variant Enumeration")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Sometimes MITOMAP provides data that indicates that multiple values have been observed. Some examples of this are ",(0,i.kt)("inlineCode",{parentName:"p"},"C-C(2-8)")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"A-AC or ACC"),". Alternate alleles containing IUPAC ambiguity codes are similarly enumerated."))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Inversions")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"MITOMAP inversions are currently treated as MNVs."))),(0,i.kt)("h4",{id:"allele-parsing"},"Allele Parsing"),(0,i.kt)("p",null,"The following MITOMAP allele parsing conventions are supported:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"C123T"),(0,i.kt)("li",{parentName:"ul"},"16021_16022del"),(0,i.kt)("li",{parentName:"ul"},"8042del2"),(0,i.kt)("li",{parentName:"ul"},"C9537insC"),(0,i.kt)("li",{parentName:"ul"},"3902_3908invACCTTGC"),(0,i.kt)("li",{parentName:"ul"},"A-AC or ACC"),(0,i.kt)("li",{parentName:"ul"},"C-C(2-8)"),(0,i.kt)("li",{parentName:"ul"},"8042delAT")),(0,i.kt)("h2",{id:"postgresql-dump-file"},"PostgreSQL Dump File"),(0,i.kt)("h3",{id:"example-1"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"COPY mitomap.reference (id, authors, title, publication, editors, volume, number, pages, date, city, publisher, keywords, abstract, nlmid) FROM stdin;\n1 Albring, M., Griffith, J. and Attardi, G. Association of a protein structure of probable membrane derivation with HeLa cell mitochondrial DNA near its origin of replication Proceedings of the National Academy of Sciences of the United States of America . 74 4 1348-1352 1977 . . Deoxyribonucleoproteins; DNA Replication; DNA, Mitochondrial; Hela Cells; Membrane Proteins; Microscopy, Electron; Molecular Weight; Neoplasm Proteins; Protein Binding Almost all (about 95 percent) of the mitochondrial DNA molecules released by Triton X-100 lysis of HeLa cell mitochondria in the presence of 0.15 M salt are associated with a single protein-containing structure varying in appearance between a 10-20 nm knob and a 100-500 nm membrane-like patch. Analysis by high resolution electron microscopy and by polyacrylamide gel electrophoresis after cleavage of mitochondrial DNA with the endonucleases EcoRI, HindIII, and Hpa II has shown that the protein structure is attached to the DNA in the region of the D-loop, and probably near the origin of mitochondrial DNA replication. The data strongly suggest that HeLa cell mitochondrial DNA is attached in vivo to the inner mitochondrial membrane at or near the origin of replication, and that a membrane fragment of variable size remains associated with the DNA during the isolation. After sodium dodecyl sulfate extraction of mitochondrial DNA, a small 5-10 nm protein is found at the same site on a fraction of the mitochondrial DNA molecules. 266177\n2 Anderson, S., Bankier, A.T., Barrell, B.G., de Bruijn, M.H., Coulson, A.R., Drouin, J., Eperon, I.C., Nierlich, D.P., Roe, B.A., Sanger, F., Schreier, P.H., Smith, A.J., Staden, R., Young, I.G. Sequence and organization of the human mitochondrial genome Nature . 290 5806 457-465 1981 . . Base Sequence; Codon; DNA Replication; mtDNA; Evolution; Genes, Structural; Human; Nucleic Acid Precursors; Peptide Chain Initiation; Peptide Chain Termination; RNA, Ribosomal; RNA, Transfer; Transcription, Genetic The complete sequence of the 16,569-base pair human mitochondrial genome is presented. The genes for the 12S and 16S rRNAs, 22 tRNAs, cytochrome c oxidase subunits I, II and III, ATPase subunit 6, cytochrome b and eight other predicted protein coding genes have been located. The sequence shows extreme economy in that the genes have none or only a few noncoding bases between them, and in many cases the termination codons are not coded in the DNA but are created post- transcriptionally by polyadenylation of the mRNAs. 7219534\n")),(0,i.kt)("h3",{id:"parsing-1"},"Parsing"),(0,i.kt)("p",null,"From the PostgreSQL dump file, we're interested in parsing the mapping between reference IDs and the PubMed IDs:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"id"),(0,i.kt)("li",{parentName:"ul"},"nlmid")),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Why not use the PostgreSQL file for everything?")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Ideally we would use this file for parsing all of our data, but the schema contains 80+ tables and we haven't invested the time yet to see how the tables are linked together to produce the 6 main HTML pages that we're interested in."))),(0,i.kt)("h2",{id:"known-issues"},"Known Issues"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Duplicated records")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Multiple records describing the same nucleotide change are merged into the same record. If any conflicting information is found (homoplasmy, heteroplasmy, status, clinical significance, score percentile, end coordinate, variant type), an exception is thrown."),(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"For diseases and PubMed IDs, we take the union of the values in the duplicated records."),(0,i.kt)("li",{parentName:"ul"},"For full length GenBank sequences, we take the largest number from each of the duplicated records since it provides the strongest evidence for this variant.")))),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Skipped records")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Records that represent an alternate notation of the original variant are skipped. Similarly some variants with confusing alleles (T961delT+ / -C(n)ins) are also skipped."))),(0,i.kt)("h2",{id:"download-urls"},"Download URLs"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"see ",(0,i.kt)("a",{parentName:"li",href:"#example"},"HTML Pages")," above"),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/downloads/mitomap.dump.sql.gz"},"PostgreSQL dump file"))),(0,i.kt)("h2",{id:"json-output"},"JSON Output"),(0,i.kt)("h3",{id:"small-variants"},"Small Variants"),(0,i.kt)(r.default,{mdxType:"SmallJSON"}),(0,i.kt)("h3",{id:"structural-variants"},"Structural Variants"),(0,i.kt)(o.default,{mdxType:"SVJSON"}))}u.isMDXComponent=!0},90059:(e,t,a)=>{a.d(t,{Z:()=>n});const n=a.p+"assets/images/MITOMAP-d8d4dd35c2336fdba5fcced77ec438e6.png"}}]); \ No newline at end of file diff --git a/assets/js/5d851e34.a3d5eaf5.js b/assets/js/5d851e34.a3d5eaf5.js new file mode 100644 index 00000000..ab459339 --- /dev/null +++ b/assets/js/5d851e34.a3d5eaf5.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7795,5160,8462],{3905:(e,t,a)=>{a.d(t,{Zo:()=>d,kt:()=>h});var n=a(7294);function i(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function r(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function o(e){for(var t=1;t=0||(i[a]=e[a]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(i[a]=e[a])}return i}var s=n.createContext({}),m=function(e){var t=n.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):o(o({},t),e)),a},d=function(e){var t=m(e.components);return n.createElement(s.Provider,{value:t},e.children)},p="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},u=n.forwardRef((function(e,t){var a=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,d=l(e,["components","mdxType","originalType","parentName"]),p=m(a),u=i,h=p["".concat(s,".").concat(u)]||p[u]||c[u]||r;return a?n.createElement(h,o(o({ref:t},d),{},{components:a})):n.createElement(h,o({ref:t},d))}));function h(e,t){var a=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=a.length,o=new Array(r);o[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[p]="string"==typeof e?e:i,o[1]=l;for(var m=2;m{a.r(t),a.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var n=a(7462),i=(a(7294),a(3905));const r={},o=void 0,l={unversionedId:"data-sources/mitomap-small-variants-json",id:"data-sources/mitomap-small-variants-json",title:"mitomap-small-variants-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/mitomap-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/mitomap-small-variants-json.md",tags:[],version:"current",frontMatter:{}},s=[],m={toc:s},d="wrapper";function p(e){let{components:t,...a}=e;return(0,i.kt)(d,(0,n.Z)({},m,a,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "refAllele":"G",\n "altAllele":"A",\n "diseases":[ \n "Bipolar disorder",\n "Melanoma"\n ],\n "hasHomoplasmy":false,\n "hasHeteroplasmy":true,\n "status":"Reported",\n "clinicalSignificance":"confirmed pathogenic",\n "scorePercentile":83.30,\n "numGenBankFullLengthSeqs":2,\n "pubMedIds":["2316527","6299878","6301949"],\n "isAlleleSpecific":true\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"diseases"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"associated diseases")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hasHomoplasmy"),(0,i.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hasHeteroplasmy"),(0,i.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"status"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"record status")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"clinicalSignificance"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"predicted pathogenicity")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float"),(0,i.kt)("td",{parentName:"tr",align:"left"},"MitoTIP score")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numGenBankFullLengthSeqs"),(0,i.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,i.kt)("td",{parentName:"tr",align:"left"},"# of GenBank full-length sequences")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,i.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,i.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the MITOMAP alternate allele")))))}p.isMDXComponent=!0},8898:(e,t,a)=>{a.r(t),a.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var n=a(7462),i=(a(7294),a(3905));const r={},o=void 0,l={unversionedId:"data-sources/mitomap-structural-variants-json",id:"data-sources/mitomap-structural-variants-json",title:"mitomap-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/mitomap-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/mitomap-structural-variants-json.md",tags:[],version:"current",frontMatter:{}},s=[],m={toc:s},d="wrapper";function p(e){let{components:t,...a}=e;return(0,i.kt)(d,(0,n.Z)({},m,a,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "chromosome":"MT",\n "begin":3166,\n "end":14152,\n "variantType":"deletion",\n "reciprocalOverlap":0.18068,\n "annotationOverlap":0.42405\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,i.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"end"),(0,i.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"annotationOverlap"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")))))}p.isMDXComponent=!0},7763:(e,t,a)=>{a.r(t),a.d(t,{contentTitle:()=>s,default:()=>u,frontMatter:()=>l,metadata:()=>m,toc:()=>d});var n=a(7462),i=(a(7294),a(3905)),r=a(8181),o=a(8898);const l={title:"MITOMAP"},s=void 0,m={unversionedId:"data-sources/mitomap",id:"data-sources/mitomap",title:"MITOMAP",description:"Overview",source:"@site/docs/data-sources/mitomap.mdx",sourceDirName:"data-sources",slug:"/data-sources/mitomap",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/mitomap.mdx",tags:[],version:"current",frontMatter:{title:"MITOMAP"},sidebar:"docs",previous:{title:"Mitochondrial Heteroplasmy",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mito-heteroplasmy"},next:{title:"OMIM",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/omim"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"Scraping HTML Pages",id:"scraping-html-pages",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Allele Parsing",id:"allele-parsing",children:[],level:4}],level:3}],level:2},{value:"PostgreSQL Dump File",id:"postgresql-dump-file",children:[{value:"Example",id:"example-1",children:[],level:3},{value:"Parsing",id:"parsing-1",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URLs",id:"download-urls",children:[],level:2},{value:"JSON Output",id:"json-output",children:[{value:"Small Variants",id:"small-variants",children:[],level:3},{value:"Structural Variants",id:"structural-variants",children:[],level:3}],level:2}],p={toc:d},c="wrapper";function u(e){let{components:t,...l}=e;return(0,i.kt)(c,(0,n.Z)({},p,l,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"MITOMAP provides a compendium of polymorphisms and mutations in human mitochondrial DNA."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Lott, M.T., Leipzig, J.N., Derbeneva, O., Xie, H.M., Chalkia, D., Sarmady, M., Procaccio, V., and Wallace, D.C. mtDNA variation and analysis using MITOMAP and MITOMASTER. ",(0,i.kt)("em",{parentName:"p"},"Current Protocols in Bioinformatics")," 1(123):1.23.1-26 (2013). ",(0,i.kt)("a",{parentName:"p",href:"http://www.mitomap.org"},"http://www.mitomap.org")))),(0,i.kt)("h2",{id:"scraping-html-pages"},"Scraping HTML Pages"),(0,i.kt)("h3",{id:"example"},"Example"),(0,i.kt)("p",null,"MITOMAP is unique in that it doesn't offer the data in a downloadable format. As a result, the annotation content in Illumina Connected Annotations is scraped from the following MITOMAP pages:"),(0,i.kt)("ol",null,(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/PolymorphismsControl"},"mtDNA Control Region Sequence Variants")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/PolymorphismsCoding"},"mtDNA Coding Region & RNA Sequence Variants")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/MutationsRNA"},"Reported Mitochondrial DNA Base Substitution Diseases: rRNA/tRNA mutations")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/MutationsCodingControl"},"Reported Mitochondrial DNA Base Substitution Diseases: Coding and Control Region Point Mutations")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/DeletionsSingle"},"Reported mtDNA Deletions")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/InsertionsSimple"},"mtDNA Simple Insertions"))),(0,i.kt)("p",null,(0,i.kt)("img",{src:a(5085).Z})),(0,i.kt)("h3",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"Here's what the HTML code looks like:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-html"},"[\"582\",\"MT-TF\",\"Mitochondrial myopathy\",\"T582C\",\"tRNA Phe\",\"-\",\"+\",\"Reported\",\"72.90% \",\"0\",\"2\"],\n[\"583\",\"MT-TF\",\"MELAS / MM & EXIT\",\"G583A\",\"tRNA Phe\",\"-\",\"+\",\"Cfrm\",\"93.10% \",\"0\",\"3\"],\n")),(0,i.kt)("p",null,"We're mainly interested in the following columns (numbers indicate the HTML page above):"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"Position",(0,i.kt)("sup",null,"1,2,3,4")),(0,i.kt)("li",{parentName:"ul"},"Disease",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Nucleotide Change",(0,i.kt)("sup",null,"1,2")),(0,i.kt)("li",{parentName:"ul"},"Allele",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Homoplasmy",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Heteroplasmy",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Status",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"MitoTIP",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"GB Seqs FL(CR)",(0,i.kt)("sup",null,"1,2,3,4")),(0,i.kt)("li",{parentName:"ul"},"Deletion Junction",(0,i.kt)("sup",null,"5")),(0,i.kt)("li",{parentName:"ul"},"Insert (nt)",(0,i.kt)("sup",null,"6")),(0,i.kt)("li",{parentName:"ul"},"Insert Point (nt)",(0,i.kt)("sup",null,"6")),(0,i.kt)("li",{parentName:"ul"},"References/Curated References",(0,i.kt)("sup",null,"1,2,3,4"))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"MitoTIP")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The MitoTIP information is used to populate the ",(0,i.kt)("inlineCode",{parentName:"p"},"clinicalSignificance")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"scorePercentile"),' JSON keys. The "frequency alert" entries are skipped since it\'s not directly relevant to clinical significance.'))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Left alignment")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Many of the variants in MITOMAP have not been normalized. As part of our import procedure, we left align all insertions and deletions."))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Variant Enumeration")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Sometimes MITOMAP provides data that indicates that multiple values have been observed. Some examples of this are ",(0,i.kt)("inlineCode",{parentName:"p"},"C-C(2-8)")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"A-AC or ACC"),". Alternate alleles containing IUPAC ambiguity codes are similarly enumerated."))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Inversions")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"MITOMAP inversions are currently treated as MNVs."))),(0,i.kt)("h4",{id:"allele-parsing"},"Allele Parsing"),(0,i.kt)("p",null,"The following MITOMAP allele parsing conventions are supported:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"C123T"),(0,i.kt)("li",{parentName:"ul"},"16021_16022del"),(0,i.kt)("li",{parentName:"ul"},"8042del2"),(0,i.kt)("li",{parentName:"ul"},"C9537insC"),(0,i.kt)("li",{parentName:"ul"},"3902_3908invACCTTGC"),(0,i.kt)("li",{parentName:"ul"},"A-AC or ACC"),(0,i.kt)("li",{parentName:"ul"},"C-C(2-8)"),(0,i.kt)("li",{parentName:"ul"},"8042delAT")),(0,i.kt)("h2",{id:"postgresql-dump-file"},"PostgreSQL Dump File"),(0,i.kt)("h3",{id:"example-1"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"COPY mitomap.reference (id, authors, title, publication, editors, volume, number, pages, date, city, publisher, keywords, abstract, nlmid) FROM stdin;\n1 Albring, M., Griffith, J. and Attardi, G. Association of a protein structure of probable membrane derivation with HeLa cell mitochondrial DNA near its origin of replication Proceedings of the National Academy of Sciences of the United States of America . 74 4 1348-1352 1977 . . Deoxyribonucleoproteins; DNA Replication; DNA, Mitochondrial; Hela Cells; Membrane Proteins; Microscopy, Electron; Molecular Weight; Neoplasm Proteins; Protein Binding Almost all (about 95 percent) of the mitochondrial DNA molecules released by Triton X-100 lysis of HeLa cell mitochondria in the presence of 0.15 M salt are associated with a single protein-containing structure varying in appearance between a 10-20 nm knob and a 100-500 nm membrane-like patch. Analysis by high resolution electron microscopy and by polyacrylamide gel electrophoresis after cleavage of mitochondrial DNA with the endonucleases EcoRI, HindIII, and Hpa II has shown that the protein structure is attached to the DNA in the region of the D-loop, and probably near the origin of mitochondrial DNA replication. The data strongly suggest that HeLa cell mitochondrial DNA is attached in vivo to the inner mitochondrial membrane at or near the origin of replication, and that a membrane fragment of variable size remains associated with the DNA during the isolation. After sodium dodecyl sulfate extraction of mitochondrial DNA, a small 5-10 nm protein is found at the same site on a fraction of the mitochondrial DNA molecules. 266177\n2 Anderson, S., Bankier, A.T., Barrell, B.G., de Bruijn, M.H., Coulson, A.R., Drouin, J., Eperon, I.C., Nierlich, D.P., Roe, B.A., Sanger, F., Schreier, P.H., Smith, A.J., Staden, R., Young, I.G. Sequence and organization of the human mitochondrial genome Nature . 290 5806 457-465 1981 . . Base Sequence; Codon; DNA Replication; mtDNA; Evolution; Genes, Structural; Human; Nucleic Acid Precursors; Peptide Chain Initiation; Peptide Chain Termination; RNA, Ribosomal; RNA, Transfer; Transcription, Genetic The complete sequence of the 16,569-base pair human mitochondrial genome is presented. The genes for the 12S and 16S rRNAs, 22 tRNAs, cytochrome c oxidase subunits I, II and III, ATPase subunit 6, cytochrome b and eight other predicted protein coding genes have been located. The sequence shows extreme economy in that the genes have none or only a few noncoding bases between them, and in many cases the termination codons are not coded in the DNA but are created post- transcriptionally by polyadenylation of the mRNAs. 7219534\n")),(0,i.kt)("h3",{id:"parsing-1"},"Parsing"),(0,i.kt)("p",null,"From the PostgreSQL dump file, we're interested in parsing the mapping between reference IDs and the PubMed IDs:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"id"),(0,i.kt)("li",{parentName:"ul"},"nlmid")),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Why not use the PostgreSQL file for everything?")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Ideally we would use this file for parsing all of our data, but the schema contains 80+ tables and we haven't invested the time yet to see how the tables are linked together to produce the 6 main HTML pages that we're interested in."))),(0,i.kt)("h2",{id:"known-issues"},"Known Issues"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Duplicated records")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Multiple records describing the same nucleotide change are merged into the same record. If any conflicting information is found (homoplasmy, heteroplasmy, status, clinical significance, score percentile, end coordinate, variant type), an exception is thrown."),(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"For diseases and PubMed IDs, we take the union of the values in the duplicated records."),(0,i.kt)("li",{parentName:"ul"},"For full length GenBank sequences, we take the largest number from each of the duplicated records since it provides the strongest evidence for this variant.")))),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Skipped records")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Records that represent an alternate notation of the original variant are skipped. Similarly some variants with confusing alleles (T961delT+ / -C(n)ins) are also skipped."))),(0,i.kt)("h2",{id:"download-urls"},"Download URLs"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"see ",(0,i.kt)("a",{parentName:"li",href:"#example"},"HTML Pages")," above"),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/downloads/mitomap.dump.sql.gz"},"PostgreSQL dump file"))),(0,i.kt)("h2",{id:"json-output"},"JSON Output"),(0,i.kt)("h3",{id:"small-variants"},"Small Variants"),(0,i.kt)(r.default,{mdxType:"SmallJSON"}),(0,i.kt)("h3",{id:"structural-variants"},"Structural Variants"),(0,i.kt)(o.default,{mdxType:"SVJSON"}))}u.isMDXComponent=!0},5085:(e,t,a)=>{a.d(t,{Z:()=>n});const n=a.p+"assets/images/MITOMAP-d8d4dd35c2336fdba5fcced77ec438e6.png"}}]); \ No newline at end of file diff --git a/assets/js/5dd9300a.34f93084.js b/assets/js/5dd9300a.34f93084.js deleted file mode 100644 index 012cd0b1..00000000 --- a/assets/js/5dd9300a.34f93084.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8907,1912],{3905:(t,e,n)=>{n.d(e,{Zo:()=>p,kt:()=>g});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function o(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function i(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var d=a.createContext({}),s=function(t){var e=a.useContext(d),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},p=function(t){var e=s(t.components);return a.createElement(d.Provider,{value:e},t.children)},c="mdxType",u={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},m=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,o=t.originalType,d=t.parentName,p=l(t,["components","mdxType","originalType","parentName"]),c=s(n),m=r,g=c["".concat(d,".").concat(m)]||c[m]||u[m]||o;return n?a.createElement(g,i(i({ref:e},p),{},{components:n})):a.createElement(g,i({ref:e},p))}));function g(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var o=n.length,i=new Array(o);i[0]=m;var l={};for(var d in e)hasOwnProperty.call(e,d)&&(l[d]=e[d]);l.originalType=t,l[c]="string"==typeof t?t:r,i[1]=l;for(var s=2;s{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>c,frontMatter:()=>o,metadata:()=>l,toc:()=>d});var a=n(87462),r=(n(67294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/decipher-json",id:"data-sources/decipher-json",title:"decipher-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/decipher-json.md",sourceDirName:"data-sources",slug:"/data-sources/decipher-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/decipher-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/decipher-json.md",tags:[],version:"current",frontMatter:{}},d=[],s={toc:d},p="wrapper";function c(t){let{components:e,...n}=t;return(0,r.kt)(p,(0,a.Z)({},s,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"decipher":[\n {\n "chromosome":"1",\n "begin":13516,\n "end":91073,\n "numDeletions":27,\n "deletionFrequency":0.675,\n "numDuplications":27,\n "duplicationFrequency":0.675,\n "sampleSize":40,\n "reciprocalOverlap": 0.27555,\n "annotationOverlap": 0.5901\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"numDeletions"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"# of observed deletions")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"deletionFrequency"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"deletion frequency")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"numDuplications"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"# of observed duplications")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"duplicationFrequency"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"duplication frequency")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sampleSize"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"total # of samples")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% annotation overlap")))))}c.isMDXComponent=!0},61389:(t,e,n)=>{n.r(e),n.d(e,{contentTitle:()=>l,default:()=>u,frontMatter:()=>i,metadata:()=>d,toc:()=>s});var a=n(87462),r=(n(67294),n(3905)),o=n(94072);const i={title:"DECIPHER"},l=void 0,d={unversionedId:"data-sources/decipher",id:"data-sources/decipher",title:"DECIPHER",description:"Overview",source:"@site/docs/data-sources/decipher.mdx",sourceDirName:"data-sources",slug:"/data-sources/decipher",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/decipher",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/decipher.mdx",tags:[],version:"current",frontMatter:{title:"DECIPHER"},sidebar:"docs",previous:{title:"dbSNP",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dbsnp"},next:{title:"FusionCatcher",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/fusioncatcher"}},s=[{value:"Overview",id:"overview",children:[{value:"TSV Extraction",id:"tsv-extraction",children:[{value:"Parsing",id:"parsing",children:[],level:4}],level:3}],level:2},{value:"Download URL",id:"download-url",children:[{value:"JSON output",id:"json-output",children:[],level:3}],level:2}],p={toc:s},c="wrapper";function u(t){let{components:e,...n}=t;return(0,r.kt)(c,(0,a.Z)({},p,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://www.deciphergenomics.org/"},"DECIPHER")," (DatabasE of genomiC varIation and Phenotype in Humans using Ensembl Resources) is an interactive web-based database which incorporates a suite of tools designed to aid the interpretation of genomic variants."),(0,r.kt)("p",null,"DECIPHER enhances clinical diagnosis by retrieving information from a variety of bioinformatics resources relevant to the variant found in the patient. The patient's variant is displayed in the context of both normal variation and pathogenic variation reported at that locus thereby facilitating interpretation."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"DECIPHER: Database of Chromosomal Imbalance and Phenotype in Humans using Ensembl Resources. Firth, H.V. et al., 2009. Am.J.Hum.Genet 84, 524-533 (DOI: dx.doi.org/10/1016/j.ajhg.2009.03.010)"))),(0,r.kt)("h3",{id:"tsv-extraction"},"TSV Extraction"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#population_cnv_id chr start end deletion_observations deletion_frequency deletion_standard_error duplication_observations duplication_frequency duplication_standard_error observations frequency standard_error type sample_size study\n1 1 10529 177368 0 0 1 3 0.075 0.555277708 3 0.075 0.555277708 1 40 42M calls\n2 1 13516 91073 0 0 1 27 0.675 0.109713431 27 0.675 0.109713431 1 40 42M calls\n3 1 18888 35451 0 0 1 2 0.002366864 0.706269473 2 0.002366864 0.706269473 1 845 DDD\n")),(0,r.kt)("h4",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"We parse the DECIPHER tsv file and extract the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"chr"),(0,r.kt)("li",{parentName:"ul"},"start"),(0,r.kt)("li",{parentName:"ul"},"end"),(0,r.kt)("li",{parentName:"ul"},"deletion_observations"),(0,r.kt)("li",{parentName:"ul"},"deletion_frequency"),(0,r.kt)("li",{parentName:"ul"},"duplication_observations"),(0,r.kt)("li",{parentName:"ul"},"duplication_frequency"),(0,r.kt)("li",{parentName:"ul"},"sample_size")),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://www.deciphergenomics.org/files/downloads/population_cnv_grch38.txt.gz"},"https://www.deciphergenomics.org/files/downloads/population_cnv_grch38.txt.gz"),"\n",(0,r.kt)("a",{parentName:"p",href:"https://www.deciphergenomics.org/files/downloads/population_cnv_grch37.txt.gz"},"https://www.deciphergenomics.org/files/downloads/population_cnv_grch37.txt.gz")),(0,r.kt)("h3",{id:"json-output"},"JSON output"),(0,r.kt)(o.default,{mdxType:"JSON"}))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/5dd9300a.9b67eb49.js b/assets/js/5dd9300a.9b67eb49.js new file mode 100644 index 00000000..d1858717 --- /dev/null +++ b/assets/js/5dd9300a.9b67eb49.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8907,1912],{3905:(t,e,n)=>{n.d(e,{Zo:()=>p,kt:()=>g});var a=n(7294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function o(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function i(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var d=a.createContext({}),s=function(t){var e=a.useContext(d),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},p=function(t){var e=s(t.components);return a.createElement(d.Provider,{value:e},t.children)},c="mdxType",u={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},m=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,o=t.originalType,d=t.parentName,p=l(t,["components","mdxType","originalType","parentName"]),c=s(n),m=r,g=c["".concat(d,".").concat(m)]||c[m]||u[m]||o;return n?a.createElement(g,i(i({ref:e},p),{},{components:n})):a.createElement(g,i({ref:e},p))}));function g(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var o=n.length,i=new Array(o);i[0]=m;var l={};for(var d in e)hasOwnProperty.call(e,d)&&(l[d]=e[d]);l.originalType=t,l[c]="string"==typeof t?t:r,i[1]=l;for(var s=2;s{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>c,frontMatter:()=>o,metadata:()=>l,toc:()=>d});var a=n(7462),r=(n(7294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/decipher-json",id:"data-sources/decipher-json",title:"decipher-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/decipher-json.md",sourceDirName:"data-sources",slug:"/data-sources/decipher-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/decipher-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/decipher-json.md",tags:[],version:"current",frontMatter:{}},d=[],s={toc:d},p="wrapper";function c(t){let{components:e,...n}=t;return(0,r.kt)(p,(0,a.Z)({},s,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"decipher":[\n {\n "chromosome":"1",\n "begin":13516,\n "end":91073,\n "numDeletions":27,\n "deletionFrequency":0.675,\n "numDuplications":27,\n "duplicationFrequency":0.675,\n "sampleSize":40,\n "reciprocalOverlap": 0.27555,\n "annotationOverlap": 0.5901\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"numDeletions"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"# of observed deletions")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"deletionFrequency"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"deletion frequency")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"numDuplications"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"# of observed duplications")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"duplicationFrequency"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"duplication frequency")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sampleSize"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"total # of samples")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% annotation overlap")))))}c.isMDXComponent=!0},1389:(t,e,n)=>{n.r(e),n.d(e,{contentTitle:()=>l,default:()=>u,frontMatter:()=>i,metadata:()=>d,toc:()=>s});var a=n(7462),r=(n(7294),n(3905)),o=n(4072);const i={title:"DECIPHER"},l=void 0,d={unversionedId:"data-sources/decipher",id:"data-sources/decipher",title:"DECIPHER",description:"Overview",source:"@site/docs/data-sources/decipher.mdx",sourceDirName:"data-sources",slug:"/data-sources/decipher",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/decipher",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/decipher.mdx",tags:[],version:"current",frontMatter:{title:"DECIPHER"},sidebar:"docs",previous:{title:"dbSNP",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dbsnp"},next:{title:"FusionCatcher",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/fusioncatcher"}},s=[{value:"Overview",id:"overview",children:[{value:"TSV Extraction",id:"tsv-extraction",children:[{value:"Parsing",id:"parsing",children:[],level:4}],level:3}],level:2},{value:"Download URL",id:"download-url",children:[{value:"JSON output",id:"json-output",children:[],level:3}],level:2}],p={toc:s},c="wrapper";function u(t){let{components:e,...n}=t;return(0,r.kt)(c,(0,a.Z)({},p,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://www.deciphergenomics.org/"},"DECIPHER")," (DatabasE of genomiC varIation and Phenotype in Humans using Ensembl Resources) is an interactive web-based database which incorporates a suite of tools designed to aid the interpretation of genomic variants."),(0,r.kt)("p",null,"DECIPHER enhances clinical diagnosis by retrieving information from a variety of bioinformatics resources relevant to the variant found in the patient. The patient's variant is displayed in the context of both normal variation and pathogenic variation reported at that locus thereby facilitating interpretation."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"DECIPHER: Database of Chromosomal Imbalance and Phenotype in Humans using Ensembl Resources. Firth, H.V. et al., 2009. Am.J.Hum.Genet 84, 524-533 (DOI: dx.doi.org/10/1016/j.ajhg.2009.03.010)"))),(0,r.kt)("h3",{id:"tsv-extraction"},"TSV Extraction"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#population_cnv_id chr start end deletion_observations deletion_frequency deletion_standard_error duplication_observations duplication_frequency duplication_standard_error observations frequency standard_error type sample_size study\n1 1 10529 177368 0 0 1 3 0.075 0.555277708 3 0.075 0.555277708 1 40 42M calls\n2 1 13516 91073 0 0 1 27 0.675 0.109713431 27 0.675 0.109713431 1 40 42M calls\n3 1 18888 35451 0 0 1 2 0.002366864 0.706269473 2 0.002366864 0.706269473 1 845 DDD\n")),(0,r.kt)("h4",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"We parse the DECIPHER tsv file and extract the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"chr"),(0,r.kt)("li",{parentName:"ul"},"start"),(0,r.kt)("li",{parentName:"ul"},"end"),(0,r.kt)("li",{parentName:"ul"},"deletion_observations"),(0,r.kt)("li",{parentName:"ul"},"deletion_frequency"),(0,r.kt)("li",{parentName:"ul"},"duplication_observations"),(0,r.kt)("li",{parentName:"ul"},"duplication_frequency"),(0,r.kt)("li",{parentName:"ul"},"sample_size")),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://www.deciphergenomics.org/files/downloads/population_cnv_grch38.txt.gz"},"https://www.deciphergenomics.org/files/downloads/population_cnv_grch38.txt.gz"),"\n",(0,r.kt)("a",{parentName:"p",href:"https://www.deciphergenomics.org/files/downloads/population_cnv_grch37.txt.gz"},"https://www.deciphergenomics.org/files/downloads/population_cnv_grch37.txt.gz")),(0,r.kt)("h3",{id:"json-output"},"JSON output"),(0,r.kt)(o.default,{mdxType:"JSON"}))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/5e91274e.edc11bc3.js b/assets/js/5e91274e.edc11bc3.js deleted file mode 100644 index 36fc21c6..00000000 --- a/assets/js/5e91274e.edc11bc3.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9995],{3905:(t,e,n)=>{n.d(e,{Zo:()=>c,kt:()=>k});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function i(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function o(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var p=r.createContext({}),d=function(t){var e=r.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},c=function(t){var e=d(t.components);return r.createElement(p.Provider,{value:e},t.children)},m="mdxType",s={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},u=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,i=t.originalType,p=t.parentName,c=l(t,["components","mdxType","originalType","parentName"]),m=d(n),u=a,k=m["".concat(p,".").concat(u)]||m[u]||s[u]||i;return n?r.createElement(k,o(o({ref:e},c),{},{components:n})):r.createElement(k,o({ref:e},c))}));function k(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var p in e)hasOwnProperty.call(e,p)&&(l[p]=e[p]);l.originalType=t,l[m]="string"==typeof t?t:a,o[1]=l;for(var d=2;d{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>m,frontMatter:()=>i,metadata:()=>l,toc:()=>p});var r=n(87462),a=(n(67294),n(3905));const i={title:"Dependencies"},o=void 0,l={unversionedId:"introduction/dependencies",id:"version-3.18/introduction/dependencies",title:"Dependencies",description:"All of the following dependencies have been included in this repository.",source:"@site/versioned_docs/version-3.18/introduction/dependencies.md",sourceDirName:"introduction",slug:"/introduction/dependencies",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/introduction/dependencies",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/introduction/dependencies.md",tags:[],version:"3.18",frontMatter:{title:"Dependencies"},sidebar:"docs",previous:{title:"Introduction",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/"},next:{title:"Getting Started",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/introduction/getting-started"}},p=[],d={toc:p},c="wrapper";function m(t){let{components:e,...n}=t;return(0,a.kt)(c,(0,r.Z)({},d,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("p",null,"All of the following dependencies have been included in this repository."),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Name"),(0,a.kt)("th",{parentName:"tr",align:"center"},"License"),(0,a.kt)("th",{parentName:"tr",align:null},"Usage"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/aws/aws-extensions-for-dotnet-cli"},"Amazon.Lambda")),(0,a.kt)("td",{parentName:"tr",align:"center"},"Apache"),(0,a.kt)("td",{parentName:"tr",align:null},"AWS extensions for .NET CLI")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/aws/aws-sdk-net/"},"AWSSDK")),(0,a.kt)("td",{parentName:"tr",align:"center"},"Apache"),(0,a.kt)("td",{parentName:"tr",align:null},"AWS Lambda, S3, SNS support")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://www.newtonsoft.com/json"},"Json.NET")),(0,a.kt)("td",{parentName:"tr",align:"center"},"MIT"),(0,a.kt)("td",{parentName:"tr",align:null},"JASIX utility")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/ebiggers/libdeflate"},"libdeflate")),(0,a.kt)("td",{parentName:"tr",align:"center"},"MIT"),(0,a.kt)("td",{parentName:"tr",align:null},"BlockCompression library")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/moq/moq4"},"Moq")),(0,a.kt)("td",{parentName:"tr",align:"center"},"BSD"),(0,a.kt)("td",{parentName:"tr",align:null},"Mocking framework for unit tests")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"http://www.ndesk.org/Options"},"NDesk.Options")),(0,a.kt)("td",{parentName:"tr",align:"center"},"MIT/X11"),(0,a.kt)("td",{parentName:"tr",align:null},"CommandLine library")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/xunit/xunit"},"xUnit")),(0,a.kt)("td",{parentName:"tr",align:"center"},"Apache"),(0,a.kt)("td",{parentName:"tr",align:null},"Unit testing framework")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/Dead2/zlib-ng"},"zlib-ng")),(0,a.kt)("td",{parentName:"tr",align:"center"},"zlib"),(0,a.kt)("td",{parentName:"tr",align:null},"BlockCompression library")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/facebook/zstd"},"zstd")),(0,a.kt)("td",{parentName:"tr",align:"center"},"BSD"),(0,a.kt)("td",{parentName:"tr",align:null},"BlockCompression library")))))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/5ef708da.aeb507f8.js b/assets/js/5ef708da.aeb507f8.js deleted file mode 100644 index d37b9de3..00000000 --- a/assets/js/5ef708da.aeb507f8.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8841],{3905:(t,e,n)=>{n.d(e,{Zo:()=>c,kt:()=>g});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var p=a.createContext({}),u=function(t){var e=a.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},c=function(t){var e=u(t.components);return a.createElement(p.Provider,{value:e},t.children)},s="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},m=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,c=i(t,["components","mdxType","originalType","parentName"]),s=u(n),m=r,g=s["".concat(p,".").concat(m)]||s[m]||d[m]||l;return n?a.createElement(g,o(o({ref:e},c),{},{components:n})):a.createElement(g,o({ref:e},c))}));function g(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=m;var i={};for(var p in e)hasOwnProperty.call(e,p)&&(i[p]=e[p]);i.originalType=t,i[s]="string"==typeof t?t:r,o[1]=i;for(var u=2;u{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>s,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/clingen-json",id:"version-3.17/data-sources/clingen-json",title:"clingen-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/clingen-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/clingen-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/clingen-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],u={toc:p},c="wrapper";function s(t){let{components:e,...n}=t;return(0,r.kt)(c,(0,a.Z)({},u,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingen":[\n {\n "chromosome":"17",\n "begin":525,\n "end":14667519,\n "variantType":"copy_number_gain",\n "id":"nsv996083",\n "clinicalInterpretation":"pathogenic",\n "observedGains":1,\n "validated":true,\n "phenotypes":[\n "Intrauterine growth retardation"\n ],\n "phenotypeIds":[\n "HP:0001511",\n "MedGen:C1853481"\n ],\n "reciprocalOverlap":0.00131\n },\n {\n "chromosome":"17",\n "begin":45835,\n "end":7600330,\n "variantType":"copy_number_loss",\n "id":"nsv869419",\n "clinicalInterpretation":"pathogenic",\n "observedLosses":1,\n "validated":true,\n "phenotypes":[\n "Developmental delay AND/OR other significant developmental or morphological phenotypes"\n ],\n "reciprocalOverlap":0.00254\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingen"),(0,r.kt)("td",{parentName:"tr",align:null},"object array"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Any of the\xa0sequence alterations defined here.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Identifier from the data source. Alternatively a VID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clinicalInterpretation"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"observedGains"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,r.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"observedLosses"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,r.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"validated"),(0,r.kt)("td",{parentName:"tr",align:null},"boolean"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:null},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"Description of the phenotype.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"phenotypeIds"),(0,r.kt)("td",{parentName:"tr",align:null},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"Description of the phenotype IDs.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"clinicalInterpretation")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"curated benign"),(0,r.kt)("li",{parentName:"ul"},"curated pathogenic"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"path gain"),(0,r.kt)("li",{parentName:"ul"},"path loss"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"uncertain")))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/5f2579f8.f3191e4c.js b/assets/js/5f2579f8.f3191e4c.js deleted file mode 100644 index ca7eaff4..00000000 --- a/assets/js/5f2579f8.f3191e4c.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8706],{3905:(e,n,t)=>{t.d(n,{Zo:()=>p,kt:()=>h});var a=t(67294);function i(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function r(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function o(e){for(var n=1;n=0||(i[t]=e[t]);return i}(e,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(i[t]=e[t])}return i}var s=a.createContext({}),c=function(e){var n=a.useContext(s),t=n;return e&&(t="function"==typeof e?e(n):o(o({},n),e)),t},p=function(e){var n=c(e.components);return a.createElement(s.Provider,{value:n},e.children)},d="mdxType",u={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},m=a.forwardRef((function(e,n){var t=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),d=c(t),m=i,h=d["".concat(s,".").concat(m)]||d[m]||u[m]||r;return t?a.createElement(h,o(o({ref:n},p),{},{components:t})):a.createElement(h,o({ref:n},p))}));function h(e,n){var t=arguments,i=n&&n.mdxType;if("string"==typeof e||i){var r=t.length,o=new Array(r);o[0]=m;var l={};for(var s in n)hasOwnProperty.call(n,s)&&(l[s]=n[s]);l.originalType=e,l[d]="string"==typeof e?e:i,o[1]=l;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>o,default:()=>d,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var a=t(87462),i=(t(67294),t(3905));const r={title:"Gene Fusion Detection"},o=void 0,l={unversionedId:"core-functionality/gene-fusions",id:"version-3.14/core-functionality/gene-fusions",title:"Gene Fusion Detection",description:"Overview",source:"@site/versioned_docs/version-3.14/core-functionality/gene-fusions.md",sourceDirName:"core-functionality",slug:"/core-functionality/gene-fusions",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/core-functionality/gene-fusions",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/core-functionality/gene-fusions.md",tags:[],version:"3.14",frontMatter:{title:"Gene Fusion Detection"},sidebar:"version-3.14/docs",previous:{title:"Variant IDs",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/core-functionality/variant-ids"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Approach",id:"approach",children:[{value:"Variant Types",id:"variant-types",children:[],level:3},{value:"Criteria",id:"criteria",children:[],level:3}],level:2},{value:"ETV6/RUNX1 Example",id:"etv6runx1-example",children:[{value:"VCF",id:"vcf",children:[{value:"Interpreting translocation breakends",id:"interpreting-translocation-breakends",children:[],level:4},{value:"Visualization",id:"visualization",children:[],level:4}],level:3},{value:"JSON Output",id:"json-output",children:[{value:"Consequences",id:"consequences",children:[],level:4},{value:"Introns & Exons",id:"introns--exons",children:[],level:4},{value:"HGVS coding notation",id:"hgvs-coding-notation",children:[],level:4}],level:3}],level:2}],c={toc:s},p="wrapper";function d(e){let{components:n,...r}=e;return(0,i.kt)(p,(0,a.Z)({},c,r,{components:n,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"Gene fusions often result from large genomic rearrangements such as structural variants. While WGS secondary analysis pipelines typically contain alignment and variant calling stages, very few of them contain dedicated gene fusion callers. When they are included, they are usually associated with RNA-Seq pipelines where gene fusions can be readily observed."),(0,i.kt)("p",null,"Since gene fusions are frequently observed in cancer and since many sequencing experiments do not include paired RNA-Seq data, we have added gene fusion detection and annotation to Nirvana."),(0,i.kt)("p",null,"The rich diversity in gene fusion architectures and their likely mechanisms can be seen below:"),(0,i.kt)("p",null,(0,i.kt)("img",{src:t(96721).Z})),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Kumar-Sinha, C., Kalyana-Sundaram, S. & Chinnaiyan, A.M. ",(0,i.kt)("a",{parentName:"p",href:"https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-015-0252-1"},"Landscape of gene fusions in epithelial cancers: seq and ye shall find"),". Genome Med 7, 129 (2015)"))),(0,i.kt)("h2",{id:"approach"},"Approach"),(0,i.kt)("p",null,"Nirvana uses structural variant calls to evaluate if they form either putative intra-chromosomal or inter-chromosomal gene fusions. "),(0,i.kt)("p",null,"For each originating transcript, we report the following:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"originating intron or exon number"),(0,i.kt)("li",{parentName:"ul"},"for each partner transcript fused with the originating transcript, we report:",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"HGVS coding notation"),(0,i.kt)("li",{parentName:"ul"},"partner intron or exon number")))),(0,i.kt)("h3",{id:"variant-types"},"Variant Types"),(0,i.kt)("p",null,"Specifically we can identify gene fusions from the following structural variant types:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"deletions (",(0,i.kt)("inlineCode",{parentName:"li"},""),")"),(0,i.kt)("li",{parentName:"ul"},"tandem_duplications (",(0,i.kt)("inlineCode",{parentName:"li"},""),")"),(0,i.kt)("li",{parentName:"ul"},"inversions (",(0,i.kt)("inlineCode",{parentName:"li"},""),")"),(0,i.kt)("li",{parentName:"ul"},"translocation breakpoints (",(0,i.kt)("inlineCode",{parentName:"li"},"AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911["),") ")),(0,i.kt)("h3",{id:"criteria"},"Criteria"),(0,i.kt)("p",null,"The following criteria must be met for Nirvana to identify a gene fusion:"),(0,i.kt)("ol",null,(0,i.kt)("li",{parentName:"ol"},"Both transcripts must possess a coding region"),(0,i.kt)("li",{parentName:"ol"},"After accounting for genomic rearrangements, both transcripts must have the same orientation"),(0,i.kt)("li",{parentName:"ol"},"Both transcripts must be from the same transcript source (i.e. we won't mix and match between RefSeq and Ensembl transcripts)"),(0,i.kt)("li",{parentName:"ol"},"Both transcripts must belong to different genes"),(0,i.kt)("li",{parentName:"ol"},"Both transcripts cannot have a coding region that already overlaps without the variant (i.e. in cases where two genes naturally overlap, we don't want to call a gene fusion)")),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"UTR overlap")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"In the past, we also required that the coding regions from the two genes intersected. However, in oncology literature, there are many documented gene fusions where only the UTRs overlap. As a result, we adjusted our algorithm to allow for UTR overlaps as well."))),(0,i.kt)("h2",{id:"etv6runx1-example"},"ETV6/RUNX1 Example"),(0,i.kt)("p",null,"ETV6/RUNX1 is the most common gene fusion in childhood B-cell precursor acute lymphoblastic leukemia (ALL). Patients with this translocation are associated with a good prognosis and excellent response to treatment."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Sun C., Chang L., Zhu X. ",(0,i.kt)("a",{parentName:"p",href:"https://www.oncotarget.com/article/16367/text/"},"Pathogenesis of ETV6/RUNX1-positive childhood acute lymphoblastic leukemia and mechanisms underlying its relapse"),". Oncotarget. 2017; 8: 35445-35459"))),(0,i.kt)("h3",{id:"vcf"},"VCF"),(0,i.kt)("p",null,"Here's a simplified representation of the translocation breakends called by the Manta structural variant caller:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\nchr12 12026270 . C [chr21:36420865[C . PASS SVTYPE=BND\nchr12 12026305 . A A]chr21:36420571] . PASS SVTYPE=BND\nchr21 36420571 . C C]chr12:12026305] . PASS SVTYPE=BND\nchr21 36420865 . C [chr12:12026270[C . PASS SVTYPE=BND\n")),(0,i.kt)("h4",{id:"interpreting-translocation-breakends"},"Interpreting translocation breakends"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"REF"),(0,i.kt)("th",{parentName:"tr",align:"left"},"ALT"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Meaning"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"t[p["),(0,i.kt)("td",{parentName:"tr",align:"left"},"piece extending to the right of p is joined after t")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"t]p]"),(0,i.kt)("td",{parentName:"tr",align:"left"},"reverse comp piece extending left of p is joined after t")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"]p]t"),(0,i.kt)("td",{parentName:"tr",align:"left"},"piece extending to the left of p is joined before t")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"[p[t"),(0,i.kt)("td",{parentName:"tr",align:"left"},"reverse comp piece extending right of p is joined before t")))),(0,i.kt)("h4",{id:"visualization"},"Visualization"),(0,i.kt)("p",null,(0,i.kt)("img",{src:t(88476).Z})),(0,i.kt)("h3",{id:"json-output"},"JSON Output"),(0,i.kt)("p",null,"The annotation for the first variant in the VCF looks like this:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{66,68-100,113,115-123}","{66,68-100,113,115-123}":!0},' {\n "chromosome": "chr12",\n "position": 12026270,\n "refAllele": "C",\n "altAlleles": [\n "[chr21:36420865[C"\n ],\n "filters": [\n "PASS"\n ],\n "cytogeneticBand": "12p13.2",\n "clingen": [\n {\n "chromosome": "12",\n "begin": 173786,\n "end": 34835837,\n "variantType": "copy_number_gain",\n "id": "nsv995956",\n "clinicalInterpretation": "pathogenic",\n "phenotypes": [\n "Decreased calvarial ossification",\n "Delayed gross motor development",\n "Feeding difficulties",\n "Frontal bossing",\n "Morphological abnormality of the central nervous system",\n "Patchy alopecia"\n ],\n "phenotypeIds": [\n "HP:0002007",\n "HP:0002011",\n "HP:0002194",\n "HP:0002232",\n "HP:0005474",\n "HP:0011968",\n "MedGen:C0232466",\n "MedGen:C1862862",\n "MedGen:CN001816",\n "MedGen:CN001820",\n "MedGen:CN001989",\n "MedGen:CN004852"\n ],\n "observedGains": 1,\n "validated": true\n }\n ],\n "variants": [\n {\n "vid": "12-12026270-C-[chr21:36420865[C",\n "chromosome": "chr12",\n "begin": 12026270,\n "end": 12026270,\n "isStructuralVariant": true,\n "refAllele": "C",\n "altAllele": "[chr21:36420865[C",\n "variantType": "translocation_breakend",\n "transcripts": [\n {\n "transcript": "ENST00000396373.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "introns": "5/7",\n "geneId": "ENSG00000139083",\n "hgnc": "ETV6",\n "consequence": [\n "transcript_variant",\n "unidirectional_gene_fusion"\n ],\n "geneFusion": {\n "intron": 5,\n "fusions": [\n {\n "hgvsc": "RUNX1{ENST00000437180.1}:c.1_58+274_ETV6{ENST00000396373.4}:c.1009+3367_1359",\n "intron": 2\n },\n {\n "hgvsc": "RUNX1{ENST00000300305.3}:c.1_58+274_ETV6{ENST00000396373.4}:c.1009+3367_1359",\n "intron": 1\n },\n {\n "hgvsc": "RUNX1{ENST00000482318.1}:c.1_58+274_ETV6{ENST00000396373.4}:c.1009+3367_1359",\n "intron": 2\n },\n {\n "hgvsc": "RUNX1{ENST00000486278.2}:c.?_156195_ETV6{ENST00000396373.4}:c.1009+3367_1359",\n "intron": 2\n },\n {\n "hgvsc": "RUNX1{ENST00000455571.1}:c.1_58+274_ETV6{ENST00000396373.4}:c.1009+3367_1359",\n "intron": 2\n },\n {\n "hgvsc": "RUNX1{ENST00000475045.2}:c.1_58+274_ETV6{ENST00000396373.4}:c.1009+3367_1359",\n "intron": 11\n },\n {\n "hgvsc": "RUNX1{ENST00000416754.1}:c.1_58+274_ETV6{ENST00000396373.4}:c.1009+3367_1359",\n "intron": 2\n }\n ]\n },\n "isCanonical": true,\n "proteinId": "ENSP00000379658.3"\n },\n {\n "transcript": "NM_001987.4",\n "source": "RefSeq",\n "bioType": "protein_coding",\n "introns": "5/7",\n "geneId": "2120",\n "hgnc": "ETV6",\n "consequence": [\n "transcript_variant",\n "unidirectional_gene_fusion"\n ],\n "geneFusion": {\n "intron": 5,\n "fusions": [\n {\n "hgvsc": "RUNX1{NM_001754.4}:c.1_58+274_ETV6{NM_001987.4}:c.1009+3367_1359",\n "intron": 2\n }\n ]\n },\n "isCanonical": true,\n "proteinId": "NP_001978.1"\n }\n ]\n }\n ]\n }\n')),(0,i.kt)("h4",{id:"consequences"},"Consequences"),(0,i.kt)("p",null,"When a gene fusion is identified, we add the following Sequence Ontology consequence:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{3}","{3}":!0},' "consequence": [\n "transcript_variant",\n "unidirectional_gene_fusion"\n ],\n')),(0,i.kt)("h4",{id:"introns--exons"},"Introns & Exons"),(0,i.kt)("p",null,"In this section we describe all the pairwise gene fusions that obey the criteria outlined above. In the case of ",(0,i.kt)("inlineCode",{parentName:"p"},"ENST00000396373.4"),", there 7 other Ensembl transcripts that would produce a gene fusion. For ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001987.4"),", there was only one transcript (",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001754.4"),") that produce a gene fusion."),(0,i.kt)("p",null,"In each case, Nirvana outputs which intron or exon contained the breakpoint in both of the transcripts that form the gene fusion."),(0,i.kt)("h4",{id:"hgvs-coding-notation"},"HGVS coding notation"),(0,i.kt)("p",null,"Finally, Nirvana also describes the gene fusion using HGVS c. notation:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{3}","{3}":!0},' "fusions": [\n {\n "hgvsc": "RUNX1{NM_001754.4}:c.1_58+274_ETV6{NM_001987.4}:c.1009+3367_1359",\n "intron": 2\n }\n')),(0,i.kt)("p",null,"This means that gene fusion uses CDS positions 1-58 from ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001754.4")," (RUNX1) and CDS positions 1009-1359 from ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001987.4")," (ETV6). ",(0,i.kt)("inlineCode",{parentName:"p"},"1009+3367")," indicates that the fusion occurred 3367 bp within intron 2."))}d.isMDXComponent=!0},88476:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/etv6-runx1-fusion-ec8f4312c9aca496bde0d6e2b1bbd50d.svg"},96721:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/gene-fusions-fig2-1cce8ac31b00465c8d36bdc47ec3309e.svg"}}]); \ No newline at end of file diff --git a/assets/js/601929e3.084ac62a.js b/assets/js/601929e3.084ac62a.js deleted file mode 100644 index dc656089..00000000 --- a/assets/js/601929e3.084ac62a.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1266],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>u});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var c=r.createContext({}),s=function(e){var t=r.useContext(c),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},p=function(e){var t=s(e.components);return r.createElement(c.Provider,{value:t},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},g=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,c=e.parentName,p=i(e,["components","mdxType","originalType","parentName"]),m=s(n),g=a,u=m["".concat(c,".").concat(g)]||m[g]||d[g]||o;return n?r.createElement(u,l(l({ref:t},p),{},{components:n})):r.createElement(u,l({ref:t},p))}));function u(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=g;var i={};for(var c in t)hasOwnProperty.call(t,c)&&(i[c]=t[c]);i.originalType=e,i[m]="string"==typeof e?e:a,l[1]=i;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>o,metadata:()=>i,toc:()=>c});var r=n(87462),a=(n(67294),n(3905));const o={},l=void 0,i={unversionedId:"data-sources/fusioncatcher-json",id:"data-sources/fusioncatcher-json",title:"fusioncatcher-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/fusioncatcher-json.md",sourceDirName:"data-sources",slug:"/data-sources/fusioncatcher-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/fusioncatcher-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/fusioncatcher-json.md",tags:[],version:"current",frontMatter:{}},c=[{value:"genes",id:"genes",children:[],level:4},{value:"gene",id:"gene",children:[],level:4}],s={toc:c},p="wrapper";function m(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},' "fusionCatcher":[\n {\n "genes":{\n "first":{\n "hgnc":"ETV6",\n "isOncogene":true\n },\n "second":{\n "hgnc":"RUNX1"\n },\n "isParalogPair":true,\n "isPseudogenePair":true,\n "isReadthrough":true\n },\n "germlineSources":[\n "1000 Genomes Project"\n ],\n "somaticSources":[\n "COSMIC",\n "TCGA oesophageal carcinomas"\n ]\n }\n ]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"genes"),(0,a.kt)("td",{parentName:"tr",align:"center"},"genes object"),(0,a.kt)("td",{parentName:"tr",align:"left"},"5' gene & 3' gene")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"germlineSources"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"matches in known germline data sources")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"somaticSources"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"matches in known somatic data sources")))),(0,a.kt)("h4",{id:"genes"},"genes"),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"first"),(0,a.kt)("td",{parentName:"tr",align:"center"},"gene object"),(0,a.kt)("td",{parentName:"tr",align:"left"},"5' gene")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"second"),(0,a.kt)("td",{parentName:"tr",align:"center"},"gene object"),(0,a.kt)("td",{parentName:"tr",align:"left"},"3' gene")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"isParalogPair"),(0,a.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,a.kt)("td",{parentName:"tr",align:"left"},"true when both genes are paralogs for each other")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"isPseudogenePair"),(0,a.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,a.kt)("td",{parentName:"tr",align:"left"},"true when both genes are pseudogenes for each other")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"isReadthrough"),(0,a.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,a.kt)("td",{parentName:"tr",align:"left"},"true when this fusion gene is a readthrough event (both are on the same strand and there are no genes between them)")))),(0,a.kt)("h4",{id:"gene"},"gene"),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"isOncogene"),(0,a.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,a.kt)("td",{parentName:"tr",align:"left"},"true when this gene is an oncogene")))))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/601929e3.dcfe1329.js b/assets/js/601929e3.dcfe1329.js new file mode 100644 index 00000000..6cdef9fb --- /dev/null +++ b/assets/js/601929e3.dcfe1329.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1266],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>u});var r=n(7294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var c=r.createContext({}),s=function(e){var t=r.useContext(c),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},p=function(e){var t=s(e.components);return r.createElement(c.Provider,{value:t},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},g=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,c=e.parentName,p=i(e,["components","mdxType","originalType","parentName"]),m=s(n),g=a,u=m["".concat(c,".").concat(g)]||m[g]||d[g]||o;return n?r.createElement(u,l(l({ref:t},p),{},{components:n})):r.createElement(u,l({ref:t},p))}));function u(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=g;var i={};for(var c in t)hasOwnProperty.call(t,c)&&(i[c]=t[c]);i.originalType=e,i[m]="string"==typeof e?e:a,l[1]=i;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>o,metadata:()=>i,toc:()=>c});var r=n(7462),a=(n(7294),n(3905));const o={},l=void 0,i={unversionedId:"data-sources/fusioncatcher-json",id:"data-sources/fusioncatcher-json",title:"fusioncatcher-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/fusioncatcher-json.md",sourceDirName:"data-sources",slug:"/data-sources/fusioncatcher-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/fusioncatcher-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/fusioncatcher-json.md",tags:[],version:"current",frontMatter:{}},c=[{value:"genes",id:"genes",children:[],level:4},{value:"gene",id:"gene",children:[],level:4}],s={toc:c},p="wrapper";function m(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},' "fusionCatcher":[\n {\n "genes":{\n "first":{\n "hgnc":"ETV6",\n "isOncogene":true\n },\n "second":{\n "hgnc":"RUNX1"\n },\n "isParalogPair":true,\n "isPseudogenePair":true,\n "isReadthrough":true\n },\n "germlineSources":[\n "1000 Genomes Project"\n ],\n "somaticSources":[\n "COSMIC",\n "TCGA oesophageal carcinomas"\n ]\n }\n ]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"genes"),(0,a.kt)("td",{parentName:"tr",align:"center"},"genes object"),(0,a.kt)("td",{parentName:"tr",align:"left"},"5' gene & 3' gene")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"germlineSources"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"matches in known germline data sources")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"somaticSources"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"matches in known somatic data sources")))),(0,a.kt)("h4",{id:"genes"},"genes"),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"first"),(0,a.kt)("td",{parentName:"tr",align:"center"},"gene object"),(0,a.kt)("td",{parentName:"tr",align:"left"},"5' gene")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"second"),(0,a.kt)("td",{parentName:"tr",align:"center"},"gene object"),(0,a.kt)("td",{parentName:"tr",align:"left"},"3' gene")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"isParalogPair"),(0,a.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,a.kt)("td",{parentName:"tr",align:"left"},"true when both genes are paralogs for each other")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"isPseudogenePair"),(0,a.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,a.kt)("td",{parentName:"tr",align:"left"},"true when both genes are pseudogenes for each other")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"isReadthrough"),(0,a.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,a.kt)("td",{parentName:"tr",align:"left"},"true when this fusion gene is a readthrough event (both are on the same strand and there are no genes between them)")))),(0,a.kt)("h4",{id:"gene"},"gene"),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"isOncogene"),(0,a.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,a.kt)("td",{parentName:"tr",align:"left"},"true when this gene is an oncogene")))))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/6120a7a1.31e8977c.js b/assets/js/6120a7a1.31e8977c.js deleted file mode 100644 index 17927d3a..00000000 --- a/assets/js/6120a7a1.31e8977c.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6531],{3905:(e,t,n)=>{n.d(t,{Zo:()=>m,kt:()=>k});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var p=a.createContext({}),s=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},m=function(e){var t=s(e.components);return a.createElement(p.Provider,{value:t},e.children)},d="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},g=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,p=e.parentName,m=l(e,["components","mdxType","originalType","parentName"]),d=s(n),g=r,k=d["".concat(p,".").concat(g)]||d[g]||c[g]||i;return n?a.createElement(k,o(o({ref:t},m),{},{components:n})):a.createElement(k,o({ref:t},m))}));function k(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=g;var l={};for(var p in t)hasOwnProperty.call(t,p)&&(l[p]=t[p]);l.originalType=e,l[d]="string"==typeof e?e:r,o[1]=l;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>l,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const i={title:"MNV Recomposition"},o=void 0,l={unversionedId:"core-functionality/mnv-recomposition",id:"version-3.16/core-functionality/mnv-recomposition",title:"MNV Recomposition",description:"Overview",source:"@site/versioned_docs/version-3.16/core-functionality/mnv-recomposition.md",sourceDirName:"core-functionality",slug:"/core-functionality/mnv-recomposition",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/core-functionality/mnv-recomposition",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/core-functionality/mnv-recomposition.md",tags:[],version:"3.16",frontMatter:{title:"MNV Recomposition"},sidebar:"version-3.16/docs",previous:{title:"Gene Fusion Detection",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/core-functionality/gene-fusions"},next:{title:"Variant IDs",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/core-functionality/variant-ids"}},p=[{value:"Overview",id:"overview",children:[],level:2},{value:"Criteria",id:"criteria",children:[],level:2},{value:"Examples",id:"examples",children:[{value:"Multiple Samples",id:"multiple-samples",children:[],level:3},{value:"Phase Sets",id:"phase-sets",children:[{value:"Homozygous variants, same phase set",id:"homozygous-variants-same-phase-set",children:[],level:4},{value:"Mixing phased and unphased variants",id:"mixing-phased-and-unphased-variants",children:[],level:4},{value:"Variants in different phase sets",id:"variants-in-different-phase-sets",children:[],level:4},{value:"Unphased homozygous variants",id:"unphased-homozygous-variants",children:[],level:4},{value:"Homozygous variants are not commutative",id:"homozygous-variants-are-not-commutative",children:[],level:4}],level:3},{value:"Conflicting Genotypes",id:"conflicting-genotypes",children:[],level:3}],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],s={toc:p},m="wrapper";function d(e){let{components:t,...i}=e;return(0,r.kt)(m,(0,a.Z)({},s,i,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"Most annotation tools handle variants independently. The problem with this approach is that nearby variants could affect the same codon leading to a very different annotation. For example, consider the following example (Danecek, 2017):"),(0,r.kt)("p",null,(0,r.kt)("img",{src:n(45908).Z})),(0,r.kt)("p",null,"When handled independently, the two variants (C\u2192T & G\u2192A) would be annotated as missense annotations. However, if we consider them together, the resulting MNV would yield a stop gain."),(0,r.kt)("p",null,"By default, Nirvana identifies these types of cases where two or more SNVs would affect the same codon. In addition, it's able to perform this operation on VCFs containing large numbers of samples (we've tested this on 2,500+ samples using the 1000 Genomes Project VCF files)."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Petr Danecek, Shane A McCarthy, ",(0,r.kt)("a",{parentName:"p",href:"https://academic.oup.com/bioinformatics/article-abstract/33/13/2037/3000373"},"BCFtools/csq: haplotype-aware variant consequences"),", Bioinformatics, Volume 33, Issue 13, 1 July 2017, Pages 2037\u20132039"))),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Supported variant types")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"At the moment, ",(0,r.kt)("strong",{parentName:"p"},"Nirvana only supports recomposing multiple SNVs into an MNV"),". The Danecek paper makes a compelling case for supporting frameshifting variants paired with frame-restoring variants. We've also received requests for supporting the recomposition of an SNV with insertions and deletions. While this is something we've looked into, it represents functionality that many of our clinical customers are not yet comfortable with."))),(0,r.kt)("h2",{id:"criteria"},"Criteria"),(0,r.kt)("p",null,"Nirvana will recompose a set of SNVs if two or more SNVs are located in the same codon for any codon in any of the overlapping transcripts."),(0,r.kt)("p",null,"The following criteria must also be met for at least one sample:"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"Genotypes are provided for the VCF variants and all variants are in phase or homozygous variant."),(0,r.kt)("li",{parentName:"ol"},"All the available phase set IDs are the same (homozygous variants are available to all phase sets)"),(0,r.kt)("li",{parentName:"ol"},"The genotype ploidy for all the variants are the same."),(0,r.kt)("li",{parentName:"ol"},"No unsupported variant type (i.e. insertion or deletion) overlaps the recomposed variants"),(0,r.kt)("li",{parentName:"ol"},"The first and last base in at least one of the recomposed alleles must be non-reference.")),(0,r.kt)("h2",{id:"examples"},"Examples"),(0,r.kt)("p",null,"During variant recomposition, if two SNVs affect the same codon, it becomes the seed codon. If there are SNVs in the adjacent codons, they will be aggregated into the seed codon."),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Three SNVs in two adjacent codons. The recomposed alternate allele is ",(0,r.kt)("inlineCode",{parentName:"p"},"ATAG"),":\n",(0,r.kt)("img",{src:n(13860).Z}))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Three SNVs in two adjacent codons (larger distance). The recomposed alternate allele is ",(0,r.kt)("inlineCode",{parentName:"p"},"ATATCC"),":\n",(0,r.kt)("img",{src:n(79178).Z}))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Nirvana can use ",(0,r.kt)("strong",{parentName:"p"},"multiple reading frames")," to aggregate the seed codon. In this example, the seed codon is highlighted in green. If we look at reading frame 1, we see that the T\u2192A variant occurs in the ",(0,r.kt)("inlineCode",{parentName:"p"},"ACT")," codon. The adjacent codon to the left also has a variant C\u2192T. As a result, there can be up to four bases between SNVs when aggregating the flanking codons. The recomposed alternate allele is ",(0,r.kt)("inlineCode",{parentName:"p"},"TTCACATAGCACTCAC"),":\n",(0,r.kt)("img",{src:n(94235).Z}))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Nothing will be recomposed if there's no seed codon:\n",(0,r.kt)("img",{src:n(8858).Z})))),(0,r.kt)("h3",{id:"multiple-samples"},"Multiple Samples"),(0,r.kt)("p",null,"Recomposing variants while handling multiple samples can be complex. The recomposition criteria described above often leads to sample-specific recomposed variants. Here we show the recomposition of three variants with sample-specific criteria marked in bold:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Sample 1"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Sample 2"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Sample 3"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},(0,r.kt)("strong",{parentName:"td"},"0/1")),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 3"),(0,r.kt)("td",{parentName:"tr",align:"center"},"102"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},(0,r.kt)("strong",{parentName:"td"},".")),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AC"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AG, CG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"."),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","2"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"ACT"),(0,r.kt)("td",{parentName:"tr",align:"center"},"CCT, CCA"),(0,r.kt)("td",{parentName:"tr",align:"center"},"."),(0,r.kt)("td",{parentName:"tr",align:"center"},"."),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","2")))),(0,r.kt)("p",null,"In the example above, the heterozygous genotype in sample 1 at position 101 would prevent the MNVs from being recomposed. Similarly, the unknown genotype for sample 2 at position 102 would produce a smaller MNV than the one expressed for sample 3."),(0,r.kt)("h3",{id:"phase-sets"},"Phase Sets"),(0,r.kt)("h4",{id:"homozygous-variants-same-phase-set"},"Homozygous variants, same phase set"),(0,r.kt)("p",null,"Recomposed phase set becomes ",(0,r.kt)("inlineCode",{parentName:"p"},".")," since homozygous variants belong to all phase sets."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Genotype"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Phase Set"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AC"),(0,r.kt)("td",{parentName:"tr",align:"center"},"TG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")))),(0,r.kt)("h4",{id:"mixing-phased-and-unphased-variants"},"Mixing phased and unphased variants"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Genotype"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Phase Set"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1/1"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AC"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AG,TG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")))),(0,r.kt)("h4",{id:"variants-in-different-phase-sets"},"Variants in different phase sets"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Genotype"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Phase Set"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"890")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AC"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AG,TG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","2"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")))),(0,r.kt)("h4",{id:"unphased-homozygous-variants"},"Unphased homozygous variants"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Genotype"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Phase Set"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1/1"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1/1"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AC"),(0,r.kt)("td",{parentName:"tr",align:"center"},"TG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1/1"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")))),(0,r.kt)("h4",{id:"homozygous-variants-are-not-commutative"},"Homozygous variants are not commutative"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Genotype"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Phase Set"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 3"),(0,r.kt)("td",{parentName:"tr",align:"center"},"102"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"890")))),(0,r.kt)("p",null,"In this example, the homozygous variant at position 101 cannot bridge the gap between other two variants since there could be a switching error between phase sets 567 & 890. As a result, we have to create two overlapping MNVs:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Genotype"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Phase Set"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AC"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AG, TG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"CG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"GG, GT"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"890")))),(0,r.kt)("h3",{id:"conflicting-genotypes"},"Conflicting Genotypes"),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)("p",null,"Given the following VCF entries:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S2 S3\nchr1 12861477 . T C . PASS . GT:PS 0/0:. 0/0:. 0|1:12861477\nchr1 12861478 . G A . PASS . GT:PS 0/0:. 0/0:. 0|1:12861477\n")),(0,r.kt)("p",null,"Each original variant would be annotated as usual. The difference is that both will now have a ",(0,r.kt)("inlineCode",{parentName:"p"},"isDecomposedVariant")," flag set to true in addition to an entry in the ",(0,r.kt)("inlineCode",{parentName:"p"},"linkedVids")," field that points to the new MNV:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json",metastring:"{31-34,70-73}","{31-34,70-73}":!0},'{\n "chromosome":"chr1",\n "position":12861477,\n "refAllele":"T",\n "altAlleles":[\n "C"\n ],\n "filters":[\n "PASS"\n ],\n "samples":[\n {\n "genotype":"0/0",\n },\n {\n "genotype":"0/0",\n },\n {\n "genotype":"0|1",\n }\n ],\n "variants":[\n {\n "vid":"1-12861477-T-C",\n "chromosome":"chr1",\n "begin":12861477,\n "end":12861477,\n "refAllele":"T",\n "altAllele":"C",\n "variantType":"SNV",\n "isDecomposedVariant":true,\n "linkedVids":[\n "1-12861477-TG-CA"\n ],\n "hgvsg":"NC_000001.11:g.12861477T>C",\n "transcripts":[ ... ]\n }\n ]\n},\n{\n "chromosome":"chr1",\n "position":12861478,\n "refAllele":"G",\n "altAlleles":[\n "A"\n ],\n "filters":[\n "PASS"\n ],\n "samples":[\n {\n "genotype":"0/0",\n },\n {\n "genotype":"0/0",\n },\n {\n "genotype":"0|1",\n }\n ],\n "variants":[\n {\n "vid":"1-12861478-G-A",\n "chromosome":"chr1",\n "begin":12861478,\n "end":12861478,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "isDecomposedVariant":true,\n "linkedVids":[\n "1-12861477-TG-CA"\n ],\n "hgvsg":"NC_000001.11:g.12861478G>A",\n "transcripts":[ ... ]\n }\n ]\n}\n')),(0,r.kt)("p",null,"The recomposed variant gets a separate entry where the ",(0,r.kt)("inlineCode",{parentName:"p"},"isRecomposedVariant")," flag is set to true and the ",(0,r.kt)("inlineCode",{parentName:"p"},"linkedVids")," field links to the constituent SNVs:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json",metastring:"{31-34}","{31-34}":!0},'{\n "chromosome":"chr1",\n "position":12861478,\n "refAllele":"G",\n "altAlleles":[\n "A"\n ],\n "filters":[\n "PASS"\n ],\n "samples":[\n {\n "genotype":"0/0",\n },\n {\n "genotype":"0/0",\n },\n {\n "genotype":"0|1",\n }\n ],\n "variants":[\n {\n "vid":"1-12861478-G-A",\n "chromosome":"chr1",\n "begin":12861478,\n "end":12861478,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "isDecomposedVariant":true,\n "linkedVids":[\n "1-12861477-TG-CA"\n ],\n "hgvsg":"NC_000001.11:g.12861478G>A",\n "transcripts":[ ... ]\n }\n ]\n}\n')),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Recomposed QUAL, FILTER, and GQ")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Although the example above does not demonstrate it, Nirvana tries to set the quality score, filter, and genotype quality (GQ) for the recomposed variant. The QUAL score is calculated to be the ",(0,r.kt)("strong",{parentName:"p"},"minimum")," QUAL score for all the constituent SNVs. The same method is used for the genotype quality (GQ) scores. For the ",(0,r.kt)("inlineCode",{parentName:"p"},"filters")," field, ",(0,r.kt)("inlineCode",{parentName:"p"},"PASS")," will be used if all constituent variants passed their filters, otherwise we set it to ",(0,r.kt)("inlineCode",{parentName:"p"},"FilteredVariantsRecomposed"),"."))))}d.isMDXComponent=!0},45908:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/BCFtools-csq-fig1a-a266b0be1c6d74f085fcacb2f433f750.png"},94235:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/multiple-reading-frames-19e896fe74a8781afdd1fa2539edff88.png"},8858:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/no-recomposition-b63eb855b0ed62b8ae331eafc538223d.png"},79178:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/three-SNVs-larger-separation-85b12d5bafd32ee312103a1b9b588720.png"},13860:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/three-SNVs-two-codons-bc45a465809b53d51dbfb32deaa6324a.png"}}]); \ No newline at end of file diff --git a/assets/js/6260f43c.444a4bf6.js b/assets/js/6260f43c.444a4bf6.js deleted file mode 100644 index bc41c1b1..00000000 --- a/assets/js/6260f43c.444a4bf6.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5522],{3905:(M,L,t)=>{t.d(L,{Zo:()=>o,kt:()=>C});var i=t(67294);function e(M,L,t){return L in M?Object.defineProperty(M,L,{value:t,enumerable:!0,configurable:!0,writable:!0}):M[L]=t,M}function j(M,L){var t=Object.keys(M);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(M);L&&(i=i.filter((function(L){return Object.getOwnPropertyDescriptor(M,L).enumerable}))),t.push.apply(t,i)}return t}function u(M){for(var L=1;L=0||(e[t]=M[t]);return e}(M,L);if(Object.getOwnPropertySymbols){var j=Object.getOwnPropertySymbols(M);for(i=0;i=0||Object.prototype.propertyIsEnumerable.call(M,t)&&(e[t]=M[t])}return e}var N=i.createContext({}),n=function(M){var L=i.useContext(N),t=L;return M&&(t="function"==typeof M?M(L):u(u({},L),M)),t},o=function(M){var L=n(M.components);return i.createElement(N.Provider,{value:L},M.children)},s="mdxType",w={inlineCode:"code",wrapper:function(M){var L=M.children;return i.createElement(i.Fragment,{},L)}},y=i.forwardRef((function(M,L){var t=M.components,e=M.mdxType,j=M.originalType,N=M.parentName,o=a(M,["components","mdxType","originalType","parentName"]),s=n(t),y=e,C=s["".concat(N,".").concat(y)]||s[y]||w[y]||j;return t?i.createElement(C,u(u({ref:L},o),{},{components:t})):i.createElement(C,u({ref:L},o))}));function C(M,L){var t=arguments,e=L&&L.mdxType;if("string"==typeof M||e){var j=t.length,u=new Array(j);u[0]=y;var a={};for(var N in L)hasOwnProperty.call(L,N)&&(a[N]=L[N]);a.originalType=M,a[s]="string"==typeof M?M:e,u[1]=a;for(var n=2;n{t.d(L,{Z:()=>e});var i=t(67294);function e(M){let{className:L,name:t,children:e,githubUrl:j,twitterUrl:u}=M;return i.createElement("div",{className:L},i.createElement("div",{className:"card card--full-height"},i.createElement("div",{className:"card__header"},i.createElement("div",{className:"avatar avatar--vertical"},i.createElement("img",{className:"avatar__photo avatar__photo--xl",src:j+".png"}),i.createElement("div",{className:"avatar__intro"},i.createElement("h3",{className:"avatar__name"},t)))),i.createElement("div",{className:"card__body"},e),i.createElement("div",{className:"card__footer"},i.createElement("div",{className:"button-group button-group--block"},j&&i.createElement("a",{className:"button button--secondary",href:j},"GitHub"),u&&i.createElement("a",{className:"button button--secondary",href:u},"Twitter")))))}},94125:(M,L,t)=>{t.r(L),t.d(L,{TeamProfileCardCol:()=>o,contentTitle:()=>a,default:()=>y,frontMatter:()=>u,metadata:()=>N,toc:()=>n});var i=t(87462),e=(t(67294),t(3905)),j=t(63427);const u={id:"introduction",title:"Introduction",description:"Clinical-grade variant annotation",hide_title:!0,slug:"/"},a=void 0,N={unversionedId:"introduction/introduction",id:"version-3.16/introduction/introduction",title:"Introduction",description:"Clinical-grade variant annotation",source:"@site/versioned_docs/version-3.16/introduction/introduction.mdx",sourceDirName:"introduction",slug:"/",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/introduction/introduction.mdx",tags:[],version:"3.16",frontMatter:{id:"introduction",title:"Introduction",description:"Clinical-grade variant annotation",hide_title:!0,slug:"/"},sidebar:"version-3.16/docs",next:{title:"Dependencies",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/introduction/dependencies"}},n=[{value:"What does Nirvana annotate?",id:"what-does-nirvana-annotate",children:[],level:2},{value:"Licensing",id:"licensing",children:[{value:"Code",id:"code",children:[],level:3},{value:"Data",id:"data",children:[],level:3}],level:2},{value:"Nirvana Team",id:"nirvana-team",children:[{value:"Active Team",id:"active-team",children:[],level:3},{value:"Honorary Alumni",id:"honorary-alumni",children:[],level:3}],level:2}];function o(M){return(0,e.kt)(j.Z,(0,i.Z)({},M,{className:"col col--6 margin-bottom--lg",mdxType:"TeamProfileCard"}))}const s={toc:n,TeamProfileCardCol:o},w="wrapper";function y(M){let{components:L,...j}=M;return(0,e.kt)(w,(0,i.Z)({},s,j,{components:L,mdxType:"MDXLayout"}),(0,e.kt)("p",null,(0,e.kt)("img",{src:t(59662).Z})),(0,e.kt)("p",null,"Nirvana provides clinical-grade annotation of genomic variants (SNVs, MNVs, insertions, deletions, indels, STRs, gene fusions, and SVs (including CNVs). It can be run as a stand-alone package, as an AWS Lambda function, or integrated into larger software tools that require variant annotation."),(0,e.kt)("p",null,"The input to Nirvana are VCFs and the output is a structured JSON representation of all annotation and sample information (as extracted from the VCF). Nirvana handles multiple alternate alleles and multiple samples with ease."),(0,e.kt)("p",null,"The software is being developed under a rigorous SDLC and testing process to ensure accuracy of the results and enable embedding in other software with regulatory needs. Nirvana uses a continuous integration pipeline where millions of variant annotations are monitored against baseline values daily."),(0,e.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,e.kt)("div",{parentName:"div",className:"admonition-heading"},(0,e.kt)("h5",{parentName:"div"},(0,e.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,e.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,e.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Fun Fact")),(0,e.kt)("div",{parentName:"div",className:"admonition-content"},(0,e.kt)("p",{parentName:"div"},"Nirvana is a backronym for ",(0,e.kt)("strong",{parentName:"p"},"NI"),"mble and ",(0,e.kt)("strong",{parentName:"p"},"R"),"obust ",(0,e.kt)("strong",{parentName:"p"},"VA"),"riant a",(0,e.kt)("strong",{parentName:"p"},"N"),"not",(0,e.kt)("strong",{parentName:"p"},"A"),"tor"))),(0,e.kt)("h2",{id:"what-does-nirvana-annotate"},"What does Nirvana annotate?"),(0,e.kt)("p",null,"We use Sequence Ontology consequences to describe how each variant impacts a given transcript:"),(0,e.kt)("p",null,(0,e.kt)("img",{src:t(28522).Z})),(0,e.kt)("p",null,"In addition, we also use external data sources to provide additional context for each variant:"),(0,e.kt)("p",null,(0,e.kt)("img",{src:t(14474).Z})),(0,e.kt)("h2",{id:"licensing"},"Licensing"),(0,e.kt)("h3",{id:"code"},"Code"),(0,e.kt)("p",null,"Nirvana source code is provided under the ",(0,e.kt)("a",{parentName:"p",href:"https://github.com/Illumina/Nirvana/blob/develop/LICENSE"},"GPLv3")," license. Nirvana includes several third party packages provided under other open source licenses, please see ",(0,e.kt)("a",{parentName:"p",href:"introduction/dependencies"},"Dependencies")," for additional details."),(0,e.kt)("h3",{id:"data"},"Data"),(0,e.kt)("p",null,"The data used by Nirvana is publicly available, however some data sources have special restrictions on use by non-academic entities."),(0,e.kt)("h2",{id:"nirvana-team"},"Nirvana Team"),(0,e.kt)("h3",{id:"active-team"},"Active Team"),(0,e.kt)("p",null,"The Nirvana team works on the core functionality, AWS annotation services, in addition to keeping the annotation data sources up-to-date."),(0,e.kt)("p",null,"Current members of the Nirvana team are listed in alphabetical order below."),(0,e.kt)("div",{className:"row"},(0,e.kt)(o,{name:"Joseph Platzer",githubUrl:"https://github.com/jplatzer2",mdxType:"TeamProfileCardCol"},"Test Lead. Joins Nirvana with a history of building sequencing tools and keeping the customer first."),(0,e.kt)(o,{name:"Michael Str\xf6mberg",githubUrl:"https://github.com/MichaelStromberg",mdxType:"TeamProfileCardCol"},"Nirvana founder and now ever grateful Nirvana cheerleader to those who actually write code for it."),(0,e.kt)(o,{name:"Rajat Shuvro Roy",githubUrl:"https://github.com/rajatshuvro",mdxType:"TeamProfileCardCol"},"Lead developer. Loves to speed up things and make services available to all interested users.")),(0,e.kt)("h3",{id:"honorary-alumni"},"Honorary Alumni"),(0,e.kt)("p",null,"Nirvana would never be what it is today without the huge contributions from these folks who have moved on to bigger and greater things."),(0,e.kt)("div",{className:"row"},(0,e.kt)(o,{name:"Haochen Li",githubUrl:"https://github.com/haochenl",mdxType:"TeamProfileCardCol"},"Detail-oriented quick thinker that keeps cool even in the most stressful situations. Now working as a Senior Bioinformatics Data Scientist at GRAIL."),(0,e.kt)(o,{name:"Julien Lajugie",githubUrl:"https://github.com/JulienLajugie",mdxType:"TeamProfileCardCol"},"Julien is a legend around these parts. When he's not taking down opponents in Taekwondo or melting riffs in his rock band, he's demolishing bugs and making the world a better place."),(0,e.kt)(o,{name:"Shuli Kang",githubUrl:"https://github.com/shulik7",mdxType:"TeamProfileCardCol"},"Oncology bioinformatician from USC before joining our team at Illumina. Now working as a Senior Bioinformatics Scientist at Novartis Gene Therapies."),(0,e.kt)(o,{name:"Yu Jiang",githubUrl:"https://github.com/yujiang02",mdxType:"TeamProfileCardCol"},"Biostatistics genius from Duke University before joining our team at Illumina. Now working as a Research Engineer at Facebook AI Research.")))}y.isMDXComponent=!0},59662:(M,L,t)=>{t.d(L,{Z:()=>i});const i=""},14474:(M,L,t)=>{t.d(L,{Z:()=>i});const i=t.p+"assets/images/SupplementaryAnnotations-d43d3f1c837f9b80fab530432e0e4b1d.svg"},28522:(M,L,t)=>{t.d(L,{Z:()=>i});const i=t.p+"assets/images/TranscriptConsequences-60ca1c43a36dacf896fecdabf09ce02c.svg"}}]); \ No newline at end of file diff --git a/assets/js/629fa08a.5d4ae330.js b/assets/js/629fa08a.5d4ae330.js deleted file mode 100644 index 2df17fe0..00000000 --- a/assets/js/629fa08a.5d4ae330.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1865],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>k});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function i(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var o=a.createContext({}),s=function(t){var e=a.useContext(o),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},m=function(t){var e=s(t.components);return a.createElement(o.Provider,{value:e},t.children)},d="mdxType",c={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},u=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,o=t.parentName,m=p(t,["components","mdxType","originalType","parentName"]),d=s(n),u=r,k=d["".concat(o,".").concat(u)]||d[u]||c[u]||l;return n?a.createElement(k,i(i({ref:e},m),{},{components:n})):a.createElement(k,i({ref:e},m))}));function k(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,i=new Array(l);i[0]=u;var p={};for(var o in e)hasOwnProperty.call(e,o)&&(p[o]=e[o]);p.originalType=t,p[d]="string"==typeof t?t:r,i[1]=p;for(var s=2;s{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>d,frontMatter:()=>l,metadata:()=>p,toc:()=>o});var a=n(87462),r=(n(67294),n(3905));const l={},i=void 0,p={unversionedId:"data-sources/clinvar-json",id:"version-3.18/data-sources/clinvar-json",title:"clinvar-json",description:"small variants:",source:"@site/versioned_docs/version-3.18/data-sources/clinvar-json.md",sourceDirName:"data-sources",slug:"/data-sources/clinvar-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/clinvar-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/clinvar-json.md",tags:[],version:"3.18",frontMatter:{}},o=[],s={toc:o},m="wrapper";function d(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},s,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"small variants:")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "id":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "significance":[\n "benign"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "lastUpdatedDate":"2020-03-01",\n "isAlleleSpecific":true\n },\n {\n "id":"RCV000030258.4",\n "variationId":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "alleleOrigins":[\n "germline"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "phenotypes":[\n "Lynch syndrome"\n ],\n "medGenIds":[\n "C1333990"\n ],\n "omimIds":[\n "120435"\n ],\n "significance":[\n "benign"\n ],\n "lastUpdatedDate":"2017-05-01",\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"large variants:")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "chromosome":"1", \n "begin":629025, \n "end":8537745, \n "variantType":"copy_number_loss", \n "id":"RCV000051993.4", \n "variationId":"VCV000058242.1", \n "reviewStatus":"criteria provided, single submitter", \n "alleleOrigins":[\n "not provided"\n ], \n "phenotypes":[\n "See cases"\n ], \n "significance":[\n "pathogenic"\n ], \n "lastUpdatedDate":"2022-04-21", \n "pubMedIds":[\n "21844811"\n ]\n },\n {\n "id":"VCV000058242.1",\n "reviewStatus":"criteria provided, single submitter",\n "significance":[\n "pathogenic"\n ],\n "lastUpdatedDate":"2022-04-21"\n },\n ......\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variationId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar VCV ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"variant type")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"reviewStatus"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"alleleOrigins"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"medGenIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MedGen IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"omimIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"orphanetIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Orphanet IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"significance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"lastUpdatedDate"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the ClinVar alternate allele")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"reviewStatus:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no assertion provided"),(0,r.kt)("li",{parentName:"ul"},"no assertion criteria provided"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, single submitter"),(0,r.kt)("li",{parentName:"ul"},"practice guideline"),(0,r.kt)("li",{parentName:"ul"},"classified by multiple submitters"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, conflicting interpretations"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, multiple submitters, no conflicts"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"alleleOrigins:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"unknown"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"germline"),(0,r.kt)("li",{parentName:"ul"},"somatic"),(0,r.kt)("li",{parentName:"ul"},"inherited"),(0,r.kt)("li",{parentName:"ul"},"paternal"),(0,r.kt)("li",{parentName:"ul"},"maternal"),(0,r.kt)("li",{parentName:"ul"},"de-novo"),(0,r.kt)("li",{parentName:"ul"},"biparental"),(0,r.kt)("li",{parentName:"ul"},"uniparental"),(0,r.kt)("li",{parentName:"ul"},"not-tested"),(0,r.kt)("li",{parentName:"ul"},"tested-inconclusive")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"significance:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"uncertain significance"),(0,r.kt)("li",{parentName:"ul"},"not provided"),(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"drug response"),(0,r.kt)("li",{parentName:"ul"},"histocompatibility"),(0,r.kt)("li",{parentName:"ul"},"association"),(0,r.kt)("li",{parentName:"ul"},"risk factor"),(0,r.kt)("li",{parentName:"ul"},"protective"),(0,r.kt)("li",{parentName:"ul"},"affects"),(0,r.kt)("li",{parentName:"ul"},"conflicting data from submitters"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant"),(0,r.kt)("li",{parentName:"ul"},"conflicting interpretations of pathogenicity")))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/62cf9a59.c18bff48.js b/assets/js/62cf9a59.c18bff48.js deleted file mode 100644 index 5279e33d..00000000 --- a/assets/js/62cf9a59.c18bff48.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[207],{3905:(e,t,n)=>{n.d(t,{Zo:()=>m,kt:()=>k});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var p=a.createContext({}),s=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},m=function(e){var t=s(e.components);return a.createElement(p.Provider,{value:t},e.children)},d="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},g=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,p=e.parentName,m=l(e,["components","mdxType","originalType","parentName"]),d=s(n),g=r,k=d["".concat(p,".").concat(g)]||d[g]||c[g]||i;return n?a.createElement(k,o(o({ref:t},m),{},{components:n})):a.createElement(k,o({ref:t},m))}));function k(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=g;var l={};for(var p in t)hasOwnProperty.call(t,p)&&(l[p]=t[p]);l.originalType=e,l[d]="string"==typeof e?e:r,o[1]=l;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>l,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const i={title:"MNV Recomposition"},o=void 0,l={unversionedId:"core-functionality/mnv-recomposition",id:"version-3.21/core-functionality/mnv-recomposition",title:"MNV Recomposition",description:"Overview",source:"@site/versioned_docs/version-3.21/core-functionality/mnv-recomposition.md",sourceDirName:"core-functionality",slug:"/core-functionality/mnv-recomposition",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/core-functionality/mnv-recomposition",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/core-functionality/mnv-recomposition.md",tags:[],version:"3.21",frontMatter:{title:"MNV Recomposition"},sidebar:"docs",previous:{title:"Gene Fusion Detection",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/core-functionality/gene-fusions"},next:{title:"Variant IDs",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/core-functionality/variant-ids"}},p=[{value:"Overview",id:"overview",children:[],level:2},{value:"Criteria",id:"criteria",children:[],level:2},{value:"Examples",id:"examples",children:[{value:"Multiple Samples",id:"multiple-samples",children:[],level:3},{value:"Phase Sets",id:"phase-sets",children:[{value:"Homozygous variants, same phase set",id:"homozygous-variants-same-phase-set",children:[],level:4},{value:"Mixing phased and unphased variants",id:"mixing-phased-and-unphased-variants",children:[],level:4},{value:"Variants in different phase sets",id:"variants-in-different-phase-sets",children:[],level:4},{value:"Unphased homozygous variants",id:"unphased-homozygous-variants",children:[],level:4},{value:"Homozygous variants are not commutative",id:"homozygous-variants-are-not-commutative",children:[],level:4}],level:3},{value:"Conflicting Genotypes",id:"conflicting-genotypes",children:[],level:3}],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],s={toc:p},m="wrapper";function d(e){let{components:t,...i}=e;return(0,r.kt)(m,(0,a.Z)({},s,i,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"Most annotation tools handle variants independently. The problem with this approach is that nearby variants could affect the same codon leading to a very different annotation. For example, consider the following example (Danecek, 2017):"),(0,r.kt)("p",null,(0,r.kt)("img",{src:n(99579).Z})),(0,r.kt)("p",null,"When handled independently, the two variants (C\u2192T & G\u2192A) would be annotated as missense annotations. However, if we consider them together, the resulting MNV would yield a stop gain."),(0,r.kt)("p",null,"By default, Nirvana identifies these types of cases where two or more SNVs would affect the same codon. In addition, it's able to perform this operation on VCFs containing large numbers of samples (we've tested this on 2,500+ samples using the 1000 Genomes Project VCF files)."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Petr Danecek, Shane A McCarthy, ",(0,r.kt)("a",{parentName:"p",href:"https://academic.oup.com/bioinformatics/article-abstract/33/13/2037/3000373"},"BCFtools/csq: haplotype-aware variant consequences"),", Bioinformatics, Volume 33, Issue 13, 1 July 2017, Pages 2037\u20132039"))),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Supported variant types")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"At the moment, ",(0,r.kt)("strong",{parentName:"p"},"Nirvana only supports recomposing multiple SNVs into an MNV"),". The Danecek paper makes a compelling case for supporting frameshifting variants paired with frame-restoring variants. We've also received requests for supporting the recomposition of an SNV with insertions and deletions. While this is something we've looked into, it represents functionality that many of our clinical customers are not yet comfortable with."))),(0,r.kt)("h2",{id:"criteria"},"Criteria"),(0,r.kt)("p",null,"Nirvana will recompose a set of SNVs if two or more SNVs are located in the same codon for any codon in any of the overlapping transcripts."),(0,r.kt)("p",null,"The following criteria must also be met for at least one sample:"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"Genotypes are provided for the VCF variants and all variants are in phase or homozygous variant."),(0,r.kt)("li",{parentName:"ol"},"All the available phase set IDs are the same (homozygous variants are available to all phase sets)"),(0,r.kt)("li",{parentName:"ol"},"The genotype ploidy for all the variants are the same."),(0,r.kt)("li",{parentName:"ol"},"No unsupported variant type (i.e. insertion or deletion) overlaps the recomposed variants"),(0,r.kt)("li",{parentName:"ol"},"The first and last base in at least one of the recomposed alleles must be non-reference.")),(0,r.kt)("h2",{id:"examples"},"Examples"),(0,r.kt)("p",null,"During variant recomposition, if two SNVs affect the same codon, it becomes the seed codon. If there are SNVs in the adjacent codons, they will be aggregated into the seed codon."),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Three SNVs in two adjacent codons. The recomposed alternate allele is ",(0,r.kt)("inlineCode",{parentName:"p"},"ATAG"),":\n",(0,r.kt)("img",{src:n(21669).Z}))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Three SNVs in two adjacent codons (larger distance). The recomposed alternate allele is ",(0,r.kt)("inlineCode",{parentName:"p"},"ATATCC"),":\n",(0,r.kt)("img",{src:n(26256).Z}))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Nirvana can use ",(0,r.kt)("strong",{parentName:"p"},"multiple reading frames")," to aggregate the seed codon. In this example, the seed codon is highlighted in green. If we look at reading frame 1, we see that the T\u2192A variant occurs in the ",(0,r.kt)("inlineCode",{parentName:"p"},"ACT")," codon. The adjacent codon to the left also has a variant C\u2192T. As a result, there can be up to four bases between SNVs when aggregating the flanking codons. The recomposed alternate allele is ",(0,r.kt)("inlineCode",{parentName:"p"},"TTCACATAGCACTCAC"),":\n",(0,r.kt)("img",{src:n(72272).Z}))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Nothing will be recomposed if there's no seed codon:\n",(0,r.kt)("img",{src:n(20564).Z})))),(0,r.kt)("h3",{id:"multiple-samples"},"Multiple Samples"),(0,r.kt)("p",null,"Recomposing variants while handling multiple samples can be complex. The recomposition criteria described above often leads to sample-specific recomposed variants. Here we show the recomposition of three variants with sample-specific criteria marked in bold:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Sample 1"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Sample 2"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Sample 3"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},(0,r.kt)("strong",{parentName:"td"},"0/1")),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 3"),(0,r.kt)("td",{parentName:"tr",align:"center"},"102"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},(0,r.kt)("strong",{parentName:"td"},".")),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AC"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AG, CG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"."),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","2"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"ACT"),(0,r.kt)("td",{parentName:"tr",align:"center"},"CCT, CCA"),(0,r.kt)("td",{parentName:"tr",align:"center"},"."),(0,r.kt)("td",{parentName:"tr",align:"center"},"."),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","2")))),(0,r.kt)("p",null,"In the example above, the heterozygous genotype in sample 1 at position 101 would prevent the MNVs from being recomposed. Similarly, the unknown genotype for sample 2 at position 102 would produce a smaller MNV than the one expressed for sample 3."),(0,r.kt)("h3",{id:"phase-sets"},"Phase Sets"),(0,r.kt)("h4",{id:"homozygous-variants-same-phase-set"},"Homozygous variants, same phase set"),(0,r.kt)("p",null,"Recomposed phase set becomes ",(0,r.kt)("inlineCode",{parentName:"p"},".")," since homozygous variants belong to all phase sets."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Genotype"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Phase Set"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AC"),(0,r.kt)("td",{parentName:"tr",align:"center"},"TG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")))),(0,r.kt)("h4",{id:"mixing-phased-and-unphased-variants"},"Mixing phased and unphased variants"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Genotype"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Phase Set"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1/1"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AC"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AG,TG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")))),(0,r.kt)("h4",{id:"variants-in-different-phase-sets"},"Variants in different phase sets"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Genotype"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Phase Set"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"890")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AC"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AG,TG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","2"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")))),(0,r.kt)("h4",{id:"unphased-homozygous-variants"},"Unphased homozygous variants"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Genotype"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Phase Set"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1/1"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1/1"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AC"),(0,r.kt)("td",{parentName:"tr",align:"center"},"TG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1/1"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")))),(0,r.kt)("h4",{id:"homozygous-variants-are-not-commutative"},"Homozygous variants are not commutative"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Genotype"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Phase Set"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 3"),(0,r.kt)("td",{parentName:"tr",align:"center"},"102"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"890")))),(0,r.kt)("p",null,"In this example, the homozygous variant at position 101 cannot bridge the gap between other two variants since there could be a switching error between phase sets 567 & 890. As a result, we have to create two overlapping MNVs:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Genotype"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Phase Set"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AC"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AG, TG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"CG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"GG, GT"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"890")))),(0,r.kt)("h3",{id:"conflicting-genotypes"},"Conflicting Genotypes"),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)("p",null,"Given the following VCF entries:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S2 S3\nchr1 12861477 . T C . PASS . GT:PS 0/0:. 0/0:. 0|1:12861477\nchr1 12861478 . G A . PASS . GT:PS 0/0:. 0/0:. 0|1:12861477\n")),(0,r.kt)("p",null,"Each original variant would be annotated as usual. The difference is that both will now have a ",(0,r.kt)("inlineCode",{parentName:"p"},"isDecomposedVariant")," flag set to true in addition to an entry in the ",(0,r.kt)("inlineCode",{parentName:"p"},"linkedVids")," field that points to the new MNV:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json",metastring:"{31-34,70-73}","{31-34,70-73}":!0},'{\n "chromosome":"chr1",\n "position":12861477,\n "refAllele":"T",\n "altAlleles":[\n "C"\n ],\n "filters":[\n "PASS"\n ],\n "samples":[\n {\n "genotype":"0/0",\n },\n {\n "genotype":"0/0",\n },\n {\n "genotype":"0|1",\n }\n ],\n "variants":[\n {\n "vid":"1-12861477-T-C",\n "chromosome":"chr1",\n "begin":12861477,\n "end":12861477,\n "refAllele":"T",\n "altAllele":"C",\n "variantType":"SNV",\n "isDecomposedVariant":true,\n "linkedVids":[\n "1-12861477-TG-CA"\n ],\n "hgvsg":"NC_000001.11:g.12861477T>C",\n "transcripts":[ ... ]\n }\n ]\n},\n{\n "chromosome":"chr1",\n "position":12861478,\n "refAllele":"G",\n "altAlleles":[\n "A"\n ],\n "filters":[\n "PASS"\n ],\n "samples":[\n {\n "genotype":"0/0",\n },\n {\n "genotype":"0/0",\n },\n {\n "genotype":"0|1",\n }\n ],\n "variants":[\n {\n "vid":"1-12861478-G-A",\n "chromosome":"chr1",\n "begin":12861478,\n "end":12861478,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "isDecomposedVariant":true,\n "linkedVids":[\n "1-12861477-TG-CA"\n ],\n "hgvsg":"NC_000001.11:g.12861478G>A",\n "transcripts":[ ... ]\n }\n ]\n}\n')),(0,r.kt)("p",null,"The recomposed variant gets a separate entry where the ",(0,r.kt)("inlineCode",{parentName:"p"},"isRecomposedVariant")," flag is set to true and the ",(0,r.kt)("inlineCode",{parentName:"p"},"linkedVids")," field links to the constituent SNVs:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json",metastring:"{32-36}","{32-36}":!0},' {\n "chromosome": "chr1",\n "position": 12861477,\n "refAllele": "TG",\n "altAlleles": [\n "CA"\n ],\n "filters": [\n "PASS"\n ],\n "cytogeneticBand": "1p36.21",\n "samples": [\n {\n "genotype": "0|0"\n },\n {\n "genotype": "0|0"\n },\n {\n "genotype": "0|1"\n }\n ],\n "variants": [\n {\n "vid": "1-12861477-TG-CA",\n "chromosome": "chr1",\n "begin": 12861477,\n "end": 12861478,\n "refAllele": "TG",\n "altAllele": "CA",\n "variantType": "MNV",\n "isRecomposedVariant": true,\n "linkedVids": [\n "1-12861477-T-C",\n "1-12861478-G-A"\n ],\n "hgvsg": "NC_000001.11:g.12861477_12861478inv",\n "transcripts":[ ... ]\n ]\n }\n ]\n },\n')),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Recomposed QUAL, FILTER, and GQ")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Although the example above does not demonstrate it, Nirvana tries to set the quality score, filter, and genotype quality (GQ) for the recomposed variant. The QUAL score is calculated to be the ",(0,r.kt)("strong",{parentName:"p"},"minimum")," QUAL score for all the constituent SNVs. The same method is used for the genotype quality (GQ) scores. For the ",(0,r.kt)("inlineCode",{parentName:"p"},"filters")," field, ",(0,r.kt)("inlineCode",{parentName:"p"},"PASS")," will be used if all constituent variants passed their filters, otherwise we set it to ",(0,r.kt)("inlineCode",{parentName:"p"},"FilteredVariantsRecomposed"),"."))))}d.isMDXComponent=!0},99579:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/BCFtools-csq-fig1a-a266b0be1c6d74f085fcacb2f433f750.png"},72272:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/multiple-reading-frames-19e896fe74a8781afdd1fa2539edff88.png"},20564:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/no-recomposition-b63eb855b0ed62b8ae331eafc538223d.png"},26256:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/three-SNVs-larger-separation-85b12d5bafd32ee312103a1b9b588720.png"},21669:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/three-SNVs-two-codons-bc45a465809b53d51dbfb32deaa6324a.png"}}]); \ No newline at end of file diff --git a/assets/js/63537de1.17793717.js b/assets/js/63537de1.17793717.js deleted file mode 100644 index 2ee2bc4d..00000000 --- a/assets/js/63537de1.17793717.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5091,7268,140,525,9373,4408,833,8762,5862,986,7199,9482,1100,5200,3396,2616,2146,6819,3057,4091,1779,7366,6403,448],{3905:(t,e,a)=>{a.d(e,{Zo:()=>m,kt:()=>g});var n=a(67294);function r(t,e,a){return e in t?Object.defineProperty(t,e,{value:a,enumerable:!0,configurable:!0,writable:!0}):t[e]=a,t}function l(t,e){var a=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),a.push.apply(a,n)}return a}function i(t){for(var e=1;e=0||(r[a]=t[a]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,a)&&(r[a]=t[a])}return r}var p=n.createContext({}),d=function(t){var e=n.useContext(p),a=e;return t&&(a="function"==typeof t?t(e):i(i({},e),t)),a},m=function(t){var e=d(t.components);return n.createElement(p.Provider,{value:e},t.children)},s="mdxType",u={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},c=n.forwardRef((function(t,e){var a=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,m=o(t,["components","mdxType","originalType","parentName"]),s=d(a),c=r,g=s["".concat(p,".").concat(c)]||s[c]||u[c]||l;return a?n.createElement(g,i(i({ref:e},m),{},{components:a})):n.createElement(g,i({ref:e},m))}));function g(t,e){var a=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=a.length,i=new Array(l);i[0]=c;var o={};for(var p in e)hasOwnProperty.call(e,p)&&(o[p]=e[p]);o.originalType=t,o[s]="string"==typeof t?t:r,i[1]=o;for(var d=2;d{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/1000Genomes-snv-json",id:"version-3.21/data-sources/1000Genomes-snv-json",title:"1000Genomes-snv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/1000Genomes-snv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-snv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/1000Genomes-snv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/1000Genomes-snv-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":{\n "allAf":0.200879,\n "afrAf":0.210287,\n "amrAf":0.139769,\n "easAf":0.275794,\n "eurAf":0.181909,\n "sasAf":0.173824,\n "allAn":5008,\n "afrAn":1322,\n "amrAn":694,\n "easAn":1008,\n "eurAn":1006,\n "sasAn":978,\n "allAc":1006,\n "afrAc":278,\n "amrAc":97,\n "easAc":278,\n "eurAc":183,\n "sasAc":170\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the African super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the African super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the Ad Mixed American super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the Ad Mixed American super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the East Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the East Asian super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the European super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the European super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the South Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the South Asian super population. Non-zero integer.")))))}s.isMDXComponent=!0},12146:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/1000Genomes-sv-json",id:"version-3.21/data-sources/1000Genomes-sv-json",title:"1000Genomes-sv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/1000Genomes-sv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-sv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/1000Genomes-sv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/1000Genomes-sv-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":[\n {\n "chromosome":"1",\n "begin":1595369,\n "end":1612441,\n "variantType": "copy_number_variation",\n "id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",\n "allAn": 5008,\n "allAc": 2702,\n "allAf": 0.539537,\n "afrAf": 0.6052,\n "amrAf": 0.3675,\n "eurAf": 0.5357,\n "easAf": 0.5368,\n "sasAf": 0.5797,\n "reciprocalOverlap": 0.07555\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"range: 0 - 1.")))))}s.isMDXComponent=!0},60617:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/amino-acid-conservation-json",id:"version-3.21/data-sources/amino-acid-conservation-json",title:"amino-acid-conservation-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/amino-acid-conservation-json.md",sourceDirName:"data-sources",slug:"/data-sources/amino-acid-conservation-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/amino-acid-conservation-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/amino-acid-conservation-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"aminoAcidConservation": {\n "scores": [0.34]\n} \n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"aminoAcidConservation"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scores"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array of doubles"),(0,r.kt)("td",{parentName:"tr",align:"left"},"percent conserved with respect to human amino acid residue. Range: 0.01 - 1.00")))))}s.isMDXComponent=!0},33826:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clingen-dosage-json",id:"version-3.21/data-sources/clingen-dosage-json",title:"clingen-dosage-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/clingen-dosage-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-dosage-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/clingen-dosage-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/clingen-dosage-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingenDosageSensitivityMap": [{\n "chromosome": "15",\n "begin": 30900686,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 0.33994\n},\n{\n "chromosome": "15",\n "begin": 31727418,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "dosage sensitivity unlikely",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 1\n}]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingenDosageSensitivityMap"),(0,r.kt)("td",{parentName:"tr",align:null},"object array"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"haploinsufficiency"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"triplosensitivity"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"(same as haploinsufficiency)\xa0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"haploinsufficiency and triplosensitivity")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no evidence to suggest that dosage sensitivity is associated with clinical phenotype"),(0,r.kt)("li",{parentName:"ul"},"little evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,r.kt)("li",{parentName:"ul"},"emerging evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,r.kt)("li",{parentName:"ul"},"sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,r.kt)("li",{parentName:"ul"},"gene associated with autosomal recessive phenotype"),(0,r.kt)("li",{parentName:"ul"},"dosage sensitivity unlikely")))}s.isMDXComponent=!0},73791:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clingen-gene-validity-json",id:"version-3.21/data-sources/clingen-gene-validity-json",title:"clingen-gene-validity-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/clingen-gene-validity-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-gene-validity-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/clingen-gene-validity-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/clingen-gene-validity-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingenGeneValidity":[\n {\n "diseaseId":"MONDO_0007893",\n "disease":"Noonan syndrome with multiple lentigines",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n },\n {\n "diseaseId":"MONDO_0015280",\n "disease":"cardiofaciocutaneous syndrome",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingenGeneValidity"),(0,r.kt)("td",{parentName:"tr",align:null},"object"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"diseaseId"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Monarch Disease Ontology ID (MONDO)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"disease"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"disease label")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"classification"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see below for possible values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"classificationDate"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"yyyy-MM-dd")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"classification")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no reported evidence"),(0,r.kt)("li",{parentName:"ul"},"disputed"),(0,r.kt)("li",{parentName:"ul"},"limited"),(0,r.kt)("li",{parentName:"ul"},"moderate"),(0,r.kt)("li",{parentName:"ul"},"definitive"),(0,r.kt)("li",{parentName:"ul"},"strong"),(0,r.kt)("li",{parentName:"ul"},"refuted"),(0,r.kt)("li",{parentName:"ul"},"no known disease relationship")))}s.isMDXComponent=!0},76541:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clingen-json",id:"version-3.21/data-sources/clingen-json",title:"clingen-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/clingen-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/clingen-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/clingen-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingen":[\n {\n "chromosome":"17",\n "begin":525,\n "end":14667519,\n "variantType":"copy_number_gain",\n "id":"nsv996083",\n "clinicalInterpretation":"pathogenic",\n "observedGains":1,\n "validated":true,\n "phenotypes":[\n "Intrauterine growth retardation"\n ],\n "phenotypeIds":[\n "HP:0001511",\n "MedGen:C1853481"\n ],\n "reciprocalOverlap":0.00131\n },\n {\n "chromosome":"17",\n "begin":45835,\n "end":7600330,\n "variantType":"copy_number_loss",\n "id":"nsv869419",\n "clinicalInterpretation":"pathogenic",\n "observedLosses":1,\n "validated":true,\n "phenotypes":[\n "Developmental delay AND/OR other significant developmental or morphological phenotypes"\n ],\n "reciprocalOverlap":0.00254\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingen"),(0,r.kt)("td",{parentName:"tr",align:null},"object array"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Any of the\xa0sequence alterations defined here.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Identifier from the data source. Alternatively a VID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clinicalInterpretation"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"observedGains"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,r.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"observedLosses"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,r.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"validated"),(0,r.kt)("td",{parentName:"tr",align:null},"boolean"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:null},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"Description of the phenotype.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"phenotypeIds"),(0,r.kt)("td",{parentName:"tr",align:null},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"Description of the phenotype IDs.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"clinicalInterpretation")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"curated benign"),(0,r.kt)("li",{parentName:"ul"},"curated pathogenic"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"path gain"),(0,r.kt)("li",{parentName:"ul"},"path loss"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"uncertain")))}s.isMDXComponent=!0},95697:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clinvar-json",id:"version-3.21/data-sources/clinvar-json",title:"clinvar-json",description:"small variants:",source:"@site/versioned_docs/version-3.21/data-sources/clinvar-json.md",sourceDirName:"data-sources",slug:"/data-sources/clinvar-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/clinvar-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/clinvar-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"small variants:")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "id":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "significance":[\n "benign"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "lastUpdatedDate":"2020-03-01",\n "isAlleleSpecific":true\n },\n {\n "id":"RCV000030258.4",\n "variationId":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "alleleOrigins":[\n "germline"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "phenotypes":[\n "Lynch syndrome"\n ],\n "medGenIds":[\n "C1333990"\n ],\n "omimIds":[\n "120435"\n ],\n "significance":[\n "benign"\n ],\n "lastUpdatedDate":"2017-05-01",\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"large variants:")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "chromosome":"1", \n "begin":629025, \n "end":8537745, \n "variantType":"copy_number_loss", \n "id":"RCV000051993.4", \n "variationId":"VCV000058242.1", \n "reviewStatus":"criteria provided, single submitter", \n "alleleOrigins":[\n "not provided"\n ], \n "phenotypes":[\n "See cases"\n ], \n "significance":[\n "pathogenic"\n ], \n "lastUpdatedDate":"2022-04-21", \n "pubMedIds":[\n "21844811"\n ]\n },\n {\n "id":"VCV000058242.1",\n "reviewStatus":"criteria provided, single submitter",\n "significance":[\n "pathogenic"\n ],\n "lastUpdatedDate":"2022-04-21"\n },\n ......\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variationId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar VCV ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"variant type")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"reviewStatus"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"alleleOrigins"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"medGenIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MedGen IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"omimIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"orphanetIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Orphanet IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"significance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"lastUpdatedDate"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the ClinVar alternate allele")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"reviewStatus:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no assertion provided"),(0,r.kt)("li",{parentName:"ul"},"no assertion criteria provided"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, single submitter"),(0,r.kt)("li",{parentName:"ul"},"practice guideline"),(0,r.kt)("li",{parentName:"ul"},"classified by multiple submitters"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, conflicting interpretations"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, multiple submitters, no conflicts"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"alleleOrigins:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"unknown"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"germline"),(0,r.kt)("li",{parentName:"ul"},"somatic"),(0,r.kt)("li",{parentName:"ul"},"inherited"),(0,r.kt)("li",{parentName:"ul"},"paternal"),(0,r.kt)("li",{parentName:"ul"},"maternal"),(0,r.kt)("li",{parentName:"ul"},"de-novo"),(0,r.kt)("li",{parentName:"ul"},"biparental"),(0,r.kt)("li",{parentName:"ul"},"uniparental"),(0,r.kt)("li",{parentName:"ul"},"not-tested"),(0,r.kt)("li",{parentName:"ul"},"tested-inconclusive")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"significance:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"uncertain significance"),(0,r.kt)("li",{parentName:"ul"},"not provided"),(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"drug response"),(0,r.kt)("li",{parentName:"ul"},"histocompatibility"),(0,r.kt)("li",{parentName:"ul"},"association"),(0,r.kt)("li",{parentName:"ul"},"risk factor"),(0,r.kt)("li",{parentName:"ul"},"protective"),(0,r.kt)("li",{parentName:"ul"},"affects"),(0,r.kt)("li",{parentName:"ul"},"conflicting data from submitters"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant"),(0,r.kt)("li",{parentName:"ul"},"conflicting interpretations of pathogenicity")))}s.isMDXComponent=!0},5033:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/cosmic-cancer-gene-census",id:"version-3.21/data-sources/cosmic-cancer-gene-census",title:"cosmic-cancer-gene-census",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/cosmic-cancer-gene-census.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-cancer-gene-census",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/cosmic-cancer-gene-census",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/cosmic-cancer-gene-census.md",tags:[],version:"3.21",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},' {\n "name": "PRDM16",\n "hgncId": 14000,\n "ncbiGeneId": "63976",\n "ensemblGeneId": "ENSG00000142611",\n "cosmic": {\n "roleInCancer": [\n "oncogene",\n "fusion"\n ]\n }\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"roleInCancer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Possible roles in caner")))))}s.isMDXComponent=!0},59907:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/dann-json",id:"version-3.21/data-sources/dann-json",title:"dann-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/dann-json.md",sourceDirName:"data-sources",slug:"/data-sources/dann-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/dann-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/dann-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"dannScore": 0.27\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"dannScore"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1.0")))))}s.isMDXComponent=!0},76707:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/dbsnp-json",id:"version-3.21/data-sources/dbsnp-json",title:"dbsnp-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/dbsnp-json.md",sourceDirName:"data-sources",slug:"/data-sources/dbsnp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/dbsnp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/dbsnp-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"dbsnp":[\n "rs1042821"\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"dbsnp"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"dbSNP rsIDs")))))}s.isMDXComponent=!0},25803:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/decipher-json",id:"version-3.21/data-sources/decipher-json",title:"decipher-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/decipher-json.md",sourceDirName:"data-sources",slug:"/data-sources/decipher-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/decipher-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/decipher-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"decipher":[\n {\n "chromosome":"1",\n "begin":13516,\n "end":91073,\n "numDeletions":27,\n "deletionFrequency":0.675,\n "numDuplications":27,\n "duplicationFrequency":0.675,\n "sampleSize":40,\n "reciprocalOverlap": 0.27555,\n "annotationOverlap": 0.5901\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"numDeletions"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"# of observed deletions")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"deletionFrequency"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"deletion frequency")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"numDuplications"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"# of observed duplications")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"duplicationFrequency"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"duplication frequency")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sampleSize"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"total # of samples")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% annotation overlap")))))}s.isMDXComponent=!0},63365:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gerp-json",id:"version-3.21/data-sources/gerp-json",title:"gerp-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/gerp-json.md",sourceDirName:"data-sources",slug:"/data-sources/gerp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gerp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/gerp-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gerpScore": 1.27\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"gerpScore"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: -\u221e to +\u221e")))))}s.isMDXComponent=!0},42301:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gme-json",id:"version-3.21/data-sources/gme-json",title:"gme-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/gme-json.md",sourceDirName:"data-sources",slug:"/data-sources/gme-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gme-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/gme-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gmeVariome":{\n "allAc":10,\n "allAn":202,\n "allAf":0.049504,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"GME allele count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"GME allele number")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"GME allele frequency")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}s.isMDXComponent=!0},36458:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gnomad-lof-json",id:"version-3.21/data-sources/gnomad-lof-json",title:"gnomad-lof-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/gnomad-lof-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-lof-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gnomad-lof-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/gnomad-lof-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD":{ \n "pLi":1.00e0,\n "pNull":8.94e-40,\n "pRec":1.84e-16,\n "synZ":-8.44e-2,\n "misZ":5.96e-1,\n "loeuf":1.13e0\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"pLi"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"pNull"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"pRec"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"synZ"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"misZ"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))))}s.isMDXComponent=!0},19804:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gnomad-small-variants-json",id:"version-3.21/data-sources/gnomad-small-variants-json",title:"gnomad-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/gnomad-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gnomad-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/gnomad-small-variants-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomad":{ \n "coverage":20,\n "allAf":0.190317,\n "maleAf":0.193,\n "femaleAf": 0.1935,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "maleAn":15096,\n "femaleAn":15700\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "maleAc":2930,\n "femaleAc": 2931,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "maleHc":280,\n "femaleHc":281,\n "controlsAllAf":0.190317,\n "controlsAllAn":30796,\n "controlsAllAc":5861,\n "lowComplexityRegion":true,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"coverage"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"controlsAllAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the controls subset. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for male population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for female population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"controlsAllAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the controls subset. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for male population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for female population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"controlsAllAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the controls subset. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the South Asian population Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the South Asian population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the South Asian population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"lowComplexityRegion"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant is located in a low complexity region.")))))}s.isMDXComponent=!0},292:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gnomad-structural-variants-json",id:"version-3.21/data-sources/gnomad-structural-variants-json",title:"gnomad-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/gnomad-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gnomad-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/gnomad-structural-variants-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD-preview": [\n {\n "chromosome": "1",\n "begin": 40001,\n "end": 47200,\n "variantId": "gnomAD-SV_v2.1_DUP_1_1",\n "variantType": "duplication",\n "failedFilter": true,\n "allAf": 0.068963,\n "afrAf": 0.135694,\n "amrAf": 0.022876,\n "easAf": 0.01101,\n "eurAf": 0.007846,\n "othAf": 0.017544,\n "femaleAf": 0.065288,\n "maleAf": 0.07255,\n "allAc": 943,\n "afrAc": 866,\n "amrAc": 21,\n "easAc": 17,\n "eurAc": 37,\n "othAc": 2,\n "femaleAc": 442,\n "maleAc": 499,\n "allAn": 13674,\n "afrAn": 6382,\n "amrAn": 918,\n "easAn": 1544,\n "eurAn": 4716,\n "othAn": 114,\n "femaleAn": 6770,\n "maleAn": 6878,\n "allHc": 91,\n "afrHc": 90,\n "amrHc": 1,\n "easHc": 0,\n "eurHc": 0,\n "othHc": 55,\n "femaleHc": 44,\n "maleHc": 47,\n "reciprocalOverlap": 0.01839,\n "annotationOverlap": 0.16667\n }\n]\n\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"chromosome number")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"position interval start")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"position internal end")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"structural variant type")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantId"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"gnomAD ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all other populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the African super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Ad Mixed American super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the European super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all other populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for male population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for female population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the African super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Ad Mixed American super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the European super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all other populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for female population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for male population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the African / African American population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Latino population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the East Asian population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the European super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all other populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"boolean"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Reciprocal overlap. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Reciprocal overlap. Range: 0 - 1.0")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Note:")," Following fields are not available in ",(0,r.kt)("em",{parentName:"p"},"GRCh38")," because the source file does not contain this information:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAn")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAn")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter")))))}s.isMDXComponent=!0},16006:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/mitomap-small-variants-json",id:"version-3.21/data-sources/mitomap-small-variants-json",title:"mitomap-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/mitomap-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/mitomap-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/mitomap-small-variants-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "refAllele":"G",\n "altAllele":"A",\n "diseases":[ \n "Bipolar disorder",\n "Melanoma"\n ],\n "hasHomoplasmy":false,\n "hasHeteroplasmy":true,\n "status":"Reported",\n "clinicalSignificance":"confirmed pathogenic",\n "scorePercentile":83.30,\n "numGenBankFullLengthSeqs":2,\n "pubMedIds":["2316527","6299878","6301949"],\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"diseases"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"associated diseases")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHomoplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHeteroplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"status"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"record status")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"clinicalSignificance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"predicted pathogenicity")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MitoTIP score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numGenBankFullLengthSeqs"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"# of GenBank full-length sequences")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the MITOMAP alternate allele")))))}s.isMDXComponent=!0},96392:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/mitomap-structural-variants-json",id:"version-3.21/data-sources/mitomap-structural-variants-json",title:"mitomap-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/mitomap-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/mitomap-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/mitomap-structural-variants-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "chromosome":"MT",\n "begin":3166,\n "end":14152,\n "variantType":"deletion",\n "reciprocalOverlap":0.18068,\n "annotationOverlap":0.42405\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"end"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")))))}s.isMDXComponent=!0},95878:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/omim-json",id:"version-3.21/data-sources/omim-json",title:"omim-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/omim-json.md",sourceDirName:"data-sources",slug:"/data-sources/omim-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/omim-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/omim-json.md",tags:[],version:"3.21",frontMatter:{}},p=[{value:"Phenotype",id:"phenotype",children:[],level:4},{value:"Mapping",id:"mapping",children:[],level:4},{value:"Inheritance",id:"inheritance",children:[],level:4},{value:"Comments",id:"comments",children:[],level:4}],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"omim":[ \n { \n "mimNumber":600678,\n "geneName":"MutS, E. coli, homolog of, 6",\n "description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",\n "phenotypes":[ \n { \n "mimNumber":614350,\n "phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",\n "description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal dominant"\n ]\n },\n { \n "mimNumber":608089,\n "phenotype":"Endometrial cancer, familial",\n "mapping":"molecular basis of the disorder is known"\n },\n { \n "mimNumber":276300,\n "phenotype":"Mismatch repair cancer syndrome",\n "description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal recessive"\n ],\n "comments" : [\n "contribute to susceptibility to multifactorial disorders or to susceptibility to infection",\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM ID for gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneName"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene name")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"left"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#phenotype"},"Phenotype entry below"))))),(0,r.kt)("h4",{id:"phenotype"},"Phenotype"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mapping"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#mapping"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"inheritance"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#inheritance"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"comments"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#comments"},"possible values below"))))),(0,r.kt)("h4",{id:"mapping"},"Mapping"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"disorder was positioned by mapping of the wild type gene"),(0,r.kt)("li",{parentName:"ol"},"disease phenotype itself was mapped"),(0,r.kt)("li",{parentName:"ol"},"molecular basis of the disorder is known"),(0,r.kt)("li",{parentName:"ol"},"disorder is a chromosome deletion or duplication syndrome")),(0,r.kt)("h4",{id:"inheritance"},"Inheritance"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"autosomal recessive"),(0,r.kt)("li",{parentName:"ul"},"autosomal dominant")),(0,r.kt)("h4",{id:"comments"},"Comments"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"contributes to the susceptibility to multifactorial disorders"),(0,r.kt)("li",{parentName:"ul"},"variations that lead to apparently abnormal laboratory test values"),(0,r.kt)("li",{parentName:"ul"},"unconfirmed mapping")))}s.isMDXComponent=!0},3775:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/primate-ai-json",id:"version-3.21/data-sources/primate-ai-json",title:"primate-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/primate-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/primate-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/primate-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/primate-ai-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI":[\n {\n "hgnc":"TP53",\n "scorePercentile":0.3,\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))))}s.isMDXComponent=!0},27061:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/revel-json",id:"version-3.21/data-sources/revel-json",title:"revel-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/revel-json.md",sourceDirName:"data-sources",slug:"/data-sources/revel-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/revel-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/revel-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"revel":{ \n "score":0.027\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"score"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1.0")))))}s.isMDXComponent=!0},54887:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/splice-ai-json",id:"version-3.21/data-sources/splice-ai-json",title:"splice-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/splice-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/splice-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/splice-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/splice-ai-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"spliceAI":[ \n {\n "hgnc":"BLCAP",\n "acceptorGainDistance":-3,\n "acceptorGainScore":0.3,\n "donorLossDistance":7,\n "donorLossScore":0.9\n },\n { \n "hgnc":"NNAT",\n "acceptorGainDistance":-1,\n "acceptorGainScore":0.2,\n "donorGainDistance":-2,\n "donorGainScore":0.3\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")))))}s.isMDXComponent=!0},60216:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/topmed-json",id:"version-3.21/data-sources/topmed-json",title:"topmed-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/topmed-json.md",sourceDirName:"data-sources",slug:"/data-sources/topmed-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/topmed-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/topmed-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"topmed":{ \n "allAc":20,\n "allAn":125568,\n "allAf":0.000159,\n "allHc":0,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele number. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele frequency (computed by Nirvana)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed homozygous count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}s.isMDXComponent=!0},52557:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>C,default:()=>x,frontMatter:()=>M,metadata:()=>S,toc:()=>R});var n=a(87462),r=(a(67294),a(3905)),l=a(60617),i=a(95697),o=a(76541),p=a(33826),d=a(73791),m=a(76707),s=a(3775),u=a(27061),c=a(59907),g=a(63365),k=a(54887),N=a(16006),f=a(96392),y=a(19804),h=a(36458),b=a(3952),v=a(12146),A=a(95878),j=a(60216),D=a(292),w=a(42301),I=a(25803),T=a(5033);const M={title:"Nirvana JSON File Format"},C=void 0,S={unversionedId:"file-formats/nirvana-json-file-format",id:"version-3.21/file-formats/nirvana-json-file-format",title:"Nirvana JSON File Format",description:"Overview",source:"@site/versioned_docs/version-3.21/file-formats/nirvana-json-file-format.mdx",sourceDirName:"file-formats",slug:"/file-formats/nirvana-json-file-format",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/file-formats/nirvana-json-file-format",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/file-formats/nirvana-json-file-format.mdx",tags:[],version:"3.21",frontMatter:{title:"Nirvana JSON File Format"},sidebar:"docs",previous:{title:"TOPMed",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/topmed"},next:{title:"Custom Annotations",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/file-formats/custom-annotations"}},R=[{value:"Overview",id:"overview",children:[{value:"Conventions",id:"conventions",children:[],level:3},{value:"JSON Layout",id:"json-layout",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Header",id:"header",children:[{value:"Data Source",id:"data-source",children:[],level:4},{value:"Genome Assemblies",id:"genome-assemblies",children:[],level:4}],level:2},{value:"Positions",id:"positions",children:[{value:"ClinGen",id:"clingen",children:[],level:3},{value:"1000 Genomes (SV)",id:"1000-genomes-sv",children:[],level:3},{value:"gnomAD (SV)",id:"gnomad-sv",children:[],level:3},{value:"MITOMAP (SV)",id:"mitomap-sv",children:[],level:3}],level:2},{value:"Samples",id:"samples",children:[],level:2},{value:"Variants",id:"variants",children:[{value:"Transcripts",id:"transcripts",children:[{value:"PolyPhen",id:"polyphen",children:[],level:4},{value:"SIFT",id:"sift",children:[],level:4},{value:"Amino Acid Conservation",id:"amino-acid-conservation",children:[],level:4},{value:"Gene Fusions",id:"gene-fusions",children:[],level:4},{value:"Fusion",id:"fusion",children:[],level:4},{value:"Cancer Hotspots",id:"cancer-hotspots",children:[],level:4}],level:3},{value:"Regulatory Regions",id:"regulatory-regions",children:[{value:"Regulatory Types",id:"regulatory-types",children:[],level:4},{value:"Regulatory Consequences",id:"regulatory-consequences",children:[],level:4}],level:3},{value:"ClinVar",id:"clinvar",children:[],level:3},{value:"1000 Genomes",id:"1000-genomes",children:[],level:3},{value:"DANN",id:"dann",children:[],level:3},{value:"dbSNP",id:"dbsnp",children:[],level:3},{value:"DECIPHER",id:"decipher",children:[],level:3},{value:"GERP",id:"gerp",children:[],level:3},{value:"GME Variome",id:"gme-variome",children:[],level:3},{value:"gnomAD",id:"gnomad",children:[],level:3},{value:"MITOMAP",id:"mitomap",children:[],level:3},{value:"Primate AI",id:"primate-ai",children:[],level:3},{value:"REVEL",id:"revel",children:[],level:3},{value:"Splice AI",id:"splice-ai",children:[],level:3},{value:"TOPMed",id:"topmed",children:[],level:3}],level:2},{value:"Genes",id:"genes",children:[{value:"OMIM",id:"omim",children:[],level:3},{value:"gnomAD LoF Gene Metrics",id:"gnomad-lof-gene-metrics",children:[],level:3},{value:"ClinGen Disease Validity",id:"clingen-disease-validity",children:[],level:3},{value:"COSMIC Cancer Gene Census",id:"cosmic-cancer-gene-census",children:[],level:3}],level:2}],O={toc:R},F="wrapper";function x(t){let{components:e,...M}=t;return(0,r.kt)(F,(0,n.Z)({},O,M,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("h3",{id:"conventions"},"Conventions"),(0,r.kt)("p",null,"In the Nirvana JSON representation, we try to maximize the amount of useful information that is relayed in the output file. As such, we have several conventions that are useful to know about:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"With boolean key/value pairs, we only output the keys that have a true value. I.e. there's no reason to display ",(0,r.kt)("inlineCode",{parentName:"li"},'"isStructuralVariant":false')," a few million times when annotating a small variant VCF."),(0,r.kt)("li",{parentName:"ul"},"When transferring data from the VCF file to the JSON (e.g. for allele depths (AD)), it is common to use a period (.) as a placeholder for missing data in the VCF file. Nirvana treats periods like empty or null strings and therefore will not output those entries.")),(0,r.kt)("h3",{id:"json-layout"},"JSON Layout"),(0,r.kt)("p",null,(0,r.kt)("img",{src:a(95421).Z})),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"In general, each position corresponds to a row in the original VCF file."),(0,r.kt)("p",{parentName:"div"},"For each gene that was referenced in the transcripts found in the positions section, there will be additional gene-level annotation in the gene section."))),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"We've put together a ",(0,r.kt)("a",{parentName:"p",href:"../introduction/parsing-json"},"new section that discusses how to parse our JSON files")," easily using examples in a ",(0,r.kt)("a",{parentName:"p",href:"https://github.com/Illumina/NirvanaDocumentation/blob/master/static/files/parse-nirvana-json-python.ipynb"},"Python Jupyter notebook")," and a ",(0,r.kt)("a",{parentName:"p",href:"https://github.com/Illumina/NirvanaDocumentation/blob/master/static/files/parse-nirvana-json-r.ipynb"},"R version")," as well. In addition, we have information about how to quickly dump content from our JSON file using a tabix-like utility called JASIX."))),(0,r.kt)("h2",{id:"header"},"Header"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'{\n "header":{\n "annotator":"Nirvana 3.0.0-alpha.5+g6c52e247",\n "creationTime":"2017-06-14 15:53:13",\n "genomeAssembly":"GRCh37",\n "dataSources":[\n {\n "name":"OMIM",\n "version":"unknown",\n "description":"An Online Catalog of Human Genes and Genetic Disorders",\n "releaseDate":"2017-05-03"\n },\n {\n "name":"VEP",\n "version":"84",\n "description":"BothRefSeqAndEnsembl",\n "releaseDate":"2017-01-16"\n },\n {\n "name":"ClinVar",\n "version":"20170503",\n "description":"A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence",\n "releaseDate":"2017-05-03"\n },\n {\n "name":"phyloP",\n "version":"hg19",\n "description":"46 way conservation score between humans and 45 other vertebrates",\n "releaseDate":"2009-11-10"\n }\n ],\n "samples":[\n "NA12878",\n "NA12891",\n "NA12892"\n ]\n },\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"annotator"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"the name of the annotator and the current version")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"creationTime"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd hh:mm:ss")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genomeAssembly"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#genome-assemblies"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"schemaVersion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"incremented whenever the core structure of the JSON file introduces breaking changes")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"dataVersion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"dataSources"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#data-source"},"Data Source entry below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"samples"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"the order of these sample names will be used throughout the JSON file when enumerating samples")))),(0,r.kt)("h4",{id:"data-source"},"Data Source"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"name"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"version"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"optional description of the data source")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"releaseDate"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")))),(0,r.kt)("h4",{id:"genome-assemblies"},"Genome Assemblies"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"GRCh37"),(0,r.kt)("li",{parentName:"ul"},"GRCh38"),(0,r.kt)("li",{parentName:"ul"},"hg19"),(0,r.kt)("li",{parentName:"ul"},"SARSCoV2")),(0,r.kt)("h2",{id:"positions"},"Positions"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"positions":[\n {\n "chromosome":"chr2",\n "position":48010488,\n "repeatUnit":"GGCCCC",\n "refRepeatCount":3,\n "svEnd":48020488,\n "refAllele":"G",\n "altAlleles":[\n "A",\n "GT"\n ],\n "quality":461,\n "filters":[\n "PASS"\n ],\n "ciPos":[\n -170,\n 170\n ],\n "ciEnd":[\n -175,\n 175\n ],\n "svLength":1000,\n "strandBias":1.23,\n "jointSomaticNormalQuality":29,\n "cytogeneticBand":"2p16.3",\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Variant Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"position"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf (1-based notation). Range: 1 - 250 million")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"repeatUnit"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"STR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by ExpansionHunter")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refRepeatCount"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"STR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by ExpansionHunter")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"svEnd"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"quality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf (Normally an integer, but some variant callers using floating point. Has been observed as high as 500k)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"filters"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ciPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ciEnd"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"svLength"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"strandBias"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"small variant"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by GATK (from SB)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"jointSomaticNormalQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by the Manta variant caller (SOMATICSCORE)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cytogeneticBand"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"e.g. 17p13.1")))),(0,r.kt)("h3",{id:"clingen"},"ClinGen"),(0,r.kt)(o.default,{mdxType:"ClinGen"}),(0,r.kt)(p.default,{mdxType:"ClinGenDosage"}),(0,r.kt)("h3",{id:"1000-genomes-sv"},"1000 Genomes (SV)"),(0,r.kt)(v.default,{mdxType:"ThousandGenomesSV"}),(0,r.kt)("h3",{id:"gnomad-sv"},"gnomAD (SV)"),(0,r.kt)(D.default,{mdxType:"GnomadSV"}),(0,r.kt)("h3",{id:"mitomap-sv"},"MITOMAP (SV)"),(0,r.kt)(f.default,{mdxType:"MitoMapSV"}),(0,r.kt)("h2",{id:"samples"},"Samples"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"samples":[\n {\n "genotype":"0/1",\n "variantFrequencies":[\n 0.333,\n 0.5\n ],\n "totalDepth":57,\n "genotypeQuality":12,\n "copyNumber":3,\n "repeatUnitCounts":[\n 10,\n 20\n ],\n "alleleDepths":[\n 10,\n 20,\n 30\n ],\n "failedFilter":true,\n "splitReadCounts":[\n 10,\n 20\n ],\n "pairedEndReadCounts":[\n 10,\n 20\n ],\n "isDeNovo":true,\n "diseaseAffectedStatuses":[\n "-"\n ],\n "artifactAdjustedQualityScore":89.3,\n "likelihoodRatioQualityScore":78.2,\n "heteroplasmyPercentile":[\n 23.13,\n 12.65\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"center"},"VCF"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genotype"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"GT"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantFrequencies"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"VF, AD"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. One value per alternate allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"totalDepth"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"DP"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genotypeQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"GQ"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values. Typically maxes out at 99")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"copyNumber"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"CN"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"minorHaplotypeCopyNumber"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"MCN"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"repeatUnitCounts"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"REPCN"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ExpansionHunter-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"alleleDepths"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AD"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"center"},"FT"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"splitReadCounts"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Manta-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pairedEndReadCounts"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"PR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Manta-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isDeNovo"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"center"},"DN"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"deNovoQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"DQ"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"diseaseAffectedStatuses"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"DST"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ExpansionHunter-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"artifactAdjustedQualityScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AQ"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PEPE-specific. Range: 0 - 100.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"likelihoodRatioQualityScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"LQ"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PEPE-specific. Range: 0 - 100.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"lossOfHeterozygosity"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"center"},"CN, MCN"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"somaticQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SQ"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"heteroplasmyPercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"VF"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 100. 2 decimal places. One value per alternate allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"binCount"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"BC"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Empty Samples")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"If a sample does not contain any entries, we will create a sample object that contains the ",(0,r.kt)("inlineCode",{parentName:"p"},"isEmpty")," key. This ensures that sample ordering is preserved while indicating that a sample is intentionally empty."),(0,r.kt)("pre",{parentName:"div"},(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"samples":[\n {\n "isEmpty":true\n }\n],\n')))),(0,r.kt)("h2",{id:"variants"},"Variants"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"variants":[\n {\n "vid":"2:48010488:A",\n "chromosome":"chr2",\n "begin":48010488,\n "end":48010488,\n "isReferenceMinorAllele":true,\n "isStructuralVariant":true,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "isDecomposedVariant":true,\n "isRecomposedVariant":true,\n "linkedVids":["2:48010488:GTA:ATC"],\n "hgvsg":"NC_000002.11:g.48010488G>A",\n "phylopScore":0.459\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"vid"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"../core-functionality/variant-ids"},"Variant Identifiers"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"1-based non-negative integer values. Range: 1 - 250 million")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"end"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"1-based non-negative integer values. Range: 1 - 250 million")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isReferenceMinorAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this is a reference minor allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isStructuralVariant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant is a structural variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"inLowComplexityRegion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant lies in a low complexity region (gnomAD low complexity regions)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"parsimonious representation of the reference allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"parsimonious representation of the alternate allele.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"uses\xa0",(0,r.kt)("a",{parentName:"td",href:"http://www.sequenceontology.org/browser/current_svn/term/SO:0001059"},"Sequence Ontology sequence alterations"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isDecomposedVariant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the decomposed variant has been used to create another recomposed variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isRecomposedVariant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant is recomposed from two or more decomposed variants")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"linkedVids"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"list of ",(0,r.kt)("a",{parentName:"td",href:"../core-functionality/variant-ids"},"VIDs")," for variants connecting decomposed and recomposed variants")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsg"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS g. notation")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phylopScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"phyloP conservation score. Range: -14.08 to 6.424")))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Reference Minor Alleles")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Nirvana supports annotating reference minor alleles. In such a case, ",(0,r.kt)("inlineCode",{parentName:"p"},"refAllele")," will be replaced by the global major allele and ",(0,r.kt)("inlineCode",{parentName:"p"},"altAllele")," will be replaced with the original reference allele."))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Flagging Decomposed & Recomposed Variants")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"When two or more decomposed variants are recomposed into an MNV, the decomposed variants will be marked with ",(0,r.kt)("inlineCode",{parentName:"p"},'"isDecomposedVariant":true'),"."),(0,r.kt)("p",{parentName:"div"},"Similarly, the recomposed variant will be shown as a new VCF position. This recomposed variant will be flagged with ",(0,r.kt)("inlineCode",{parentName:"p"},'"isRecomposedVariant":true'),"."))),(0,r.kt)("h3",{id:"transcripts"},"Transcripts"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"transcripts":[\n {\n "transcript":"ENST00000445503.1",\n "source":"Ensembl",\n "bioType":"nonsense_mediated_decay",\n "codons":"gGg/gAg",\n "aminoAcids":"G/E",\n "cdnaPos":"268",\n "cdsPos":"116",\n "exons":"1/9",\n "introns":"1/8",\n "proteinPos":"39",\n "geneId":"ENSG00000116062",\n "hgnc":"MSH6",\n "consequence":[\n "missense_variant",\n "NMD_transcript_variant"\n ],\n "hgvsc":"ENST00000445503.1:c.116G>A",\n "hgvsp":"ENSP00000405294.1:p.(Gly39Glu)",\n "geneFusion":{\n "exon":6,\n "intron":5,\n "fusions":[\n {\n "hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000437180.1}:c.58+568_1443",\n "exon":3,\n "intron":2\n },\n {\n "hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000300305.3}:c.58+568_1443",\n "exon":2,\n "intron":1\n }\n ]\n },\n "isCanonical":true,\n "polyPhenScore":0.95,\n "polyPhenPrediction":"probably damaging",\n "proteinId":"ENSP00000405294.1",\n "siftScore":0.61,\n "siftPrediction":"tolerated",\n "completeOverlap":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"transcript"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"transcript ID. e.g. ENST00000445503.1")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"source"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"RefSeq / Ensembl")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"bioType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"descriptions of the ",(0,r.kt)("a",{parentName:"td",href:"https://uswest.ensembl.org/info/genome/genebuild/biotypes.html"},"biotypes from Ensembl"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"codons"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"aminoAcids"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cdnaPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cdsPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"exons"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exons affected by the variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"introns"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"introns affected by the variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"proteinPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene ID. e.g. ENSG00000116062")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"consequence"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://www.sequenceontology.org/browser/obob.cgi"},"Sequence Ontology Consequences"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS coding nomenclature")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsp"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS protein nomenclature")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneFusion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#gene-fusions"},"Gene Fusions entry below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isCanonical"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this is a canonical transcript")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"polyPhenScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"polyPhenPrediction"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#polyphen"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"proteinId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"protein ID. E.g. ENSP00000405294.1")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"siftScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"siftPrediction"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#sift"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"completeOverlap"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this transcript is completely overlapped by the variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cancerHotspots"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#cancer-hotspots"},"Cancer Hotspots entry below"))))),(0,r.kt)("h4",{id:"polyphen"},"PolyPhen"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"probably damaging"),(0,r.kt)("li",{parentName:"ul"},"possibly damaging"),(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"unknown")),(0,r.kt)("h4",{id:"sift"},"SIFT"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"tolerated"),(0,r.kt)("li",{parentName:"ul"},"deleterious"),(0,r.kt)("li",{parentName:"ul"},"tolerated - low confidence"),(0,r.kt)("li",{parentName:"ul"},"deleterious - low confidence")),(0,r.kt)("h4",{id:"amino-acid-conservation"},"Amino Acid Conservation"),(0,r.kt)(l.default,{mdxType:"AminoAcidConservation"}),(0,r.kt)("h4",{id:"gene-fusions"},"Gene Fusions"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"exon"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual exon where the breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"intron"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual intron where the breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"fusions"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#fusion"},"Fusion entry below"))))),(0,r.kt)("h4",{id:"fusion"},"Fusion"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"exon"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual exon where the other breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"intron"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual intron where the other breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS coding nomenclature describing the two genes and the transcripts that are fused along with")))),(0,r.kt)("h4",{id:"cancer-hotspots"},"Cancer Hotspots"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"residue"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"how many samples are associated with a variant at the same amino acid position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numAltAminoAcidSamples"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"how many samples are associated with a variant with the same position and alternate amino acid position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"qValue"),(0,r.kt)("td",{parentName:"tr",align:"center"},"double"),(0,r.kt)("td",{parentName:"tr",align:"left"})))),(0,r.kt)("h3",{id:"regulatory-regions"},"Regulatory Regions"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"regulatoryRegions":[\n {\n "id":"ENSR00001542175",\n "type":"promoter",\n "consequence":[\n "regulatory_region_variant"\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"type"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see ",(0,r.kt)("a",{parentName:"td",href:"#regulatory-types"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"consequence"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"see ",(0,r.kt)("a",{parentName:"td",href:"#regulatory-consequences"},"possible values below"))))),(0,r.kt)("h4",{id:"regulatory-types"},"Regulatory Types"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"CTCF_binding_site"),(0,r.kt)("li",{parentName:"ul"},"enhancer"),(0,r.kt)("li",{parentName:"ul"},"open_chromatin_region"),(0,r.kt)("li",{parentName:"ul"},"promoter"),(0,r.kt)("li",{parentName:"ul"},"promoter_flanking_region"),(0,r.kt)("li",{parentName:"ul"},"TF_binding_site")),(0,r.kt)("h4",{id:"regulatory-consequences"},"Regulatory Consequences"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"regulatory_region_variant"),(0,r.kt)("li",{parentName:"ul"},"regulatory_region_ablation"),(0,r.kt)("li",{parentName:"ul"},"regulatory_region_amplification"),(0,r.kt)("li",{parentName:"ul"},"regulatory_region_truncation")),(0,r.kt)("h3",{id:"clinvar"},"ClinVar"),(0,r.kt)(i.default,{mdxType:"ClinVar"}),(0,r.kt)("h3",{id:"1000-genomes"},"1000 Genomes"),(0,r.kt)(b.default,{mdxType:"ThousandGenomesSmall"}),(0,r.kt)("h3",{id:"dann"},"DANN"),(0,r.kt)(c.default,{mdxType:"DANN"}),(0,r.kt)("h3",{id:"dbsnp"},"dbSNP"),(0,r.kt)(m.default,{mdxType:"DbSNP"}),(0,r.kt)("h3",{id:"decipher"},"DECIPHER"),(0,r.kt)(I.default,{mdxType:"DECIPHER"}),(0,r.kt)("h3",{id:"gerp"},"GERP"),(0,r.kt)(g.default,{mdxType:"GERP"}),(0,r.kt)("h3",{id:"gme-variome"},"GME Variome"),(0,r.kt)(w.default,{mdxType:"GME"}),(0,r.kt)("h3",{id:"gnomad"},"gnomAD"),(0,r.kt)(y.default,{mdxType:"GnomadSmall"}),(0,r.kt)("h3",{id:"mitomap"},"MITOMAP"),(0,r.kt)(N.default,{mdxType:"MitoMapSmall"}),(0,r.kt)("h3",{id:"primate-ai"},"Primate AI"),(0,r.kt)(s.default,{mdxType:"PrimateAI"}),(0,r.kt)("h3",{id:"revel"},"REVEL"),(0,r.kt)(u.default,{mdxType:"REVEL"}),(0,r.kt)("h3",{id:"splice-ai"},"Splice AI"),(0,r.kt)(k.default,{mdxType:"SpliceAI"}),(0,r.kt)("h3",{id:"topmed"},"TOPMed"),(0,r.kt)(j.default,{mdxType:"TOPMed"}),(0,r.kt)("h2",{id:"genes"},"Genes"),(0,r.kt)("p",null,"Nirvana repots gene annotations for all genes that have an overlapping variant with the exception of flanking variants (i.e. variants that only cause upstream_gene_variant or downstream_gene_variant)."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"genes":[\n {\n "name":"MSH6",\n "hgncId":7329,\n "summary":"This gene encodes a member of the DNA mismatch repair MutS family. In E. coli, the MutS protein helps in the recognition of mismatched nucleotides prior to their repair. A highly conserved region of approximately 150 aa, called the Walker-A adenine nucleotide binding motif, exists in MutS homologs. The encoded protein heterodimerizes with MSH2 to form a mismatch recognition complex that functions as a bidirectional molecular switch that exchanges ADP and ATP as DNA mismatches are bound and dissociated. Mutations in this gene may be associated with hereditary nonpolyposis colon cancer, colorectal cancer, and endometrial cancer. Transcripts variants encoding different isoforms have been described. [provided by RefSeq, Jul 2013]",\n /* this is where gene-level data sources can be found e.g. OMIM */\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"name"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgncId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"summary"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"short description of the gene from ",(0,r.kt)("a",{parentName:"td",href:"https://www.omim.org/"},"OMIM"))))),(0,r.kt)("h3",{id:"omim"},"OMIM"),(0,r.kt)(A.default,{mdxType:"Omim"}),(0,r.kt)("h3",{id:"gnomad-lof-gene-metrics"},"gnomAD LoF Gene Metrics"),(0,r.kt)(h.default,{mdxType:"GnomadGeneLof"}),(0,r.kt)("h3",{id:"clingen-disease-validity"},"ClinGen Disease Validity"),(0,r.kt)(d.default,{mdxType:"ClinGenDiseaseValidity"}),(0,r.kt)("h3",{id:"cosmic-cancer-gene-census"},"COSMIC Cancer Gene Census"),(0,r.kt)(T.default,{mdxType:"COSMICCGC"}))}x.isMDXComponent=!0},95421:(t,e,a)=>{a.d(e,{Z:()=>n});const n=a.p+"assets/images/JSON-Layout-fc8e5c0cf4c8428981cd206fe9b6feac.svg"}}]); \ No newline at end of file diff --git a/assets/js/63aa7e0c.35c70851.js b/assets/js/63aa7e0c.35c70851.js deleted file mode 100644 index ac1f0d65..00000000 --- a/assets/js/63aa7e0c.35c70851.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3739,5062,692,357,1946,578,7857,2164,2020,2883,7751,8493,2116],{3905:(t,e,a)=>{a.d(e,{Zo:()=>d,kt:()=>k});var n=a(67294);function r(t,e,a){return e in t?Object.defineProperty(t,e,{value:a,enumerable:!0,configurable:!0,writable:!0}):t[e]=a,t}function l(t,e){var a=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),a.push.apply(a,n)}return a}function i(t){for(var e=1;e=0||(r[a]=t[a]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,a)&&(r[a]=t[a])}return r}var p=n.createContext({}),m=function(t){var e=n.useContext(p),a=e;return t&&(a="function"==typeof t?t(e):i(i({},e),t)),a},d=function(t){var e=m(t.components);return n.createElement(p.Provider,{value:e},t.children)},s="mdxType",g={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},N=n.forwardRef((function(t,e){var a=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,d=o(t,["components","mdxType","originalType","parentName"]),s=m(a),N=r,k=s["".concat(p,".").concat(N)]||s[N]||g[N]||l;return a?n.createElement(k,i(i({ref:e},d),{},{components:a})):n.createElement(k,i({ref:e},d))}));function k(t,e){var a=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=a.length,i=new Array(l);i[0]=N;var o={};for(var p in e)hasOwnProperty.call(e,p)&&(o[p]=e[p]);o.originalType=t,o[s]="string"==typeof t?t:r,i[1]=o;for(var m=2;m{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/1000Genomes-snv-json",id:"version-3.14/data-sources/1000Genomes-snv-json",title:"1000Genomes-snv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/1000Genomes-snv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-snv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/1000Genomes-snv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/1000Genomes-snv-json.md",tags:[],version:"3.14",frontMatter:{}},p=[],m={toc:p},d="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(d,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":{\n "allAf":0.200879,\n "afrAf":0.210287,\n "amrAf":0.139769,\n "easAf":0.275794,\n "eurAf":0.181909,\n "sasAf":0.173824,\n "allAn":5008,\n "afrAn":1322,\n "amrAn":694,\n "easAn":1008,\n "eurAn":1006,\n "sasAn":978,\n "allAc":1006,\n "afrAc":278,\n "amrAc":97,\n "easAc":278,\n "eurAc":183,\n "sasAc":170\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the African super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the African super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the Ad Mixed American super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the Ad Mixed American super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the East Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the East Asian super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the European super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the European super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the South Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the South Asian super population. Non-zero integer.")))))}s.isMDXComponent=!0},97118:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/1000Genomes-sv-json",id:"version-3.14/data-sources/1000Genomes-sv-json",title:"1000Genomes-sv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/1000Genomes-sv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-sv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/1000Genomes-sv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/1000Genomes-sv-json.md",tags:[],version:"3.14",frontMatter:{}},p=[],m={toc:p},d="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(d,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":[\n {\n "chromosome":"1",\n "begin":1595369,\n "end":1612441,\n "variantType": "copy_number_variation",\n "id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",\n "allAn": 5008,\n "allAc": 2702,\n "allAf": 0.539537,\n "afrAf": 0.6052,\n "amrAf": 0.3675,\n "eurAf": 0.5357,\n "easAf": 0.5368,\n "sasAf": 0.5797,\n "reciprocalOverlap": 0.07555\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"range: 0 - 1.")))))}s.isMDXComponent=!0},5044:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clinvar-json",id:"version-3.14/data-sources/clinvar-json",title:"clinvar-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/clinvar-json.md",sourceDirName:"data-sources",slug:"/data-sources/clinvar-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/clinvar-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/clinvar-json.md",tags:[],version:"3.14",frontMatter:{}},p=[],m={toc:p},d="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(d,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "id":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "significance":[\n "benign"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "lastUpdatedDate":"2020-03-01",\n "isAlleleSpecific":true\n },\n {\n "id":"RCV000030258.4",\n "variationId":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "alleleOrigins":[\n "germline"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "phenotypes":[\n "Lynch syndrome"\n ],\n "medGenIds":[\n "C1333990"\n ],\n "omimIds":[\n "120435"\n ],\n "significance":[\n "benign"\n ],\n "lastUpdatedDate":"2017-05-01",\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variationId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar VCV ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"reviewStatus"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"alleleOrigins"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"medGenIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MedGen IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"omimIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"orphanetIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Orphanet IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"significance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"lastUpdatedDate"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the ClinVar alternate allele")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"reviewStatus:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no assertion provided"),(0,r.kt)("li",{parentName:"ul"},"no assertion criteria provided"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, single submitter"),(0,r.kt)("li",{parentName:"ul"},"practice guideline"),(0,r.kt)("li",{parentName:"ul"},"classified by multiple submitters"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, conflicting interpretations"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, multiple submitters, no conflicts"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"alleleOrigins:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"unknown"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"germline"),(0,r.kt)("li",{parentName:"ul"},"somatic"),(0,r.kt)("li",{parentName:"ul"},"inherited"),(0,r.kt)("li",{parentName:"ul"},"paternal"),(0,r.kt)("li",{parentName:"ul"},"maternal"),(0,r.kt)("li",{parentName:"ul"},"de-novo"),(0,r.kt)("li",{parentName:"ul"},"biparental"),(0,r.kt)("li",{parentName:"ul"},"uniparental"),(0,r.kt)("li",{parentName:"ul"},"not-tested"),(0,r.kt)("li",{parentName:"ul"},"tested-inconclusive")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"significance:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"uncertain significance"),(0,r.kt)("li",{parentName:"ul"},"not provided"),(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"drug response"),(0,r.kt)("li",{parentName:"ul"},"histocompatibility"),(0,r.kt)("li",{parentName:"ul"},"association"),(0,r.kt)("li",{parentName:"ul"},"risk factor"),(0,r.kt)("li",{parentName:"ul"},"protective"),(0,r.kt)("li",{parentName:"ul"},"affects"),(0,r.kt)("li",{parentName:"ul"},"conflicting data from submitters"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant"),(0,r.kt)("li",{parentName:"ul"},"conflicting interpretations of pathogenicity")))}s.isMDXComponent=!0},66916:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/dbsnp-json",id:"version-3.14/data-sources/dbsnp-json",title:"dbsnp-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/dbsnp-json.md",sourceDirName:"data-sources",slug:"/data-sources/dbsnp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/dbsnp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/dbsnp-json.md",tags:[],version:"3.14",frontMatter:{}},p=[],m={toc:p},d="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(d,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"dbsnp":[\n "rs1042821"\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"dbsnp"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"dbSNP rsIDs")))))}s.isMDXComponent=!0},14466:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gnomad-lof-json",id:"version-3.14/data-sources/gnomad-lof-json",title:"gnomad-lof-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/gnomad-lof-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-lof-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/gnomad-lof-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/gnomad-lof-json.md",tags:[],version:"3.14",frontMatter:{}},p=[],m={toc:p},d="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(d,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD":{ \n "pLi":1.00e0,\n "pNull":8.94e-40,\n "pRec":1.84e-16,\n "synZ":-8.44e-2,\n "misZ":5.96e-1,\n "loeuf":1.13e0\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"pLi"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"pNull"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"pRec"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"synZ"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"misZ"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))))}s.isMDXComponent=!0},82436:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gnomad-small-variants-json",id:"version-3.14/data-sources/gnomad-small-variants-json",title:"gnomad-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/gnomad-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/gnomad-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/gnomad-small-variants-json.md",tags:[],version:"3.14",frontMatter:{}},p=[],m={toc:p},d="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(d,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomad":{ \n "coverage":20,\n "allAf":0.190317,\n "maleAf":0.193,\n "femaleAf": 0.1935,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "maleAn":15096,\n "femaleAn":15700\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "maleAc":2930,\n "femaleAc": 2931,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "maleHc":280,\n "femaleHc":281,\n "controlsAllAf":0.190317,\n "controlsAllAn":30796,\n "controlsAllAc":5861,\n "lowComplexityRegion":true,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"coverage"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"controlsAllAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the controls subset. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for male population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for female population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"controlsAllAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the controls subset. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for male population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for female population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"controlsAllAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the controls subset. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the South Asian population Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the South Asian population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the South Asian population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"lowComplexityRegion"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant is located in a low complexity region.")))))}s.isMDXComponent=!0},4663:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/mitomap-small-variants-json",id:"version-3.14/data-sources/mitomap-small-variants-json",title:"mitomap-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/mitomap-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/mitomap-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/mitomap-small-variants-json.md",tags:[],version:"3.14",frontMatter:{}},p=[],m={toc:p},d="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(d,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "refAllele":"G",\n "altAllele":"A",\n "diseases":[ \n "Bipolar disorder",\n "Melanoma"\n ],\n "hasHomoplasmy":false,\n "hasHeteroplasmy":true,\n "status":"Reported",\n "clinicalSignificance":"confirmed pathogenic",\n "scorePercentile":83.30,\n "numGenBankFullLengthSeqs":2,\n "pubMedIds":["2316527","6299878","6301949"],\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"diseases"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"associated diseases")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHomoplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHeteroplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"status"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"record status")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"clinicalSignificance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"predicted pathogenicity")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MitoTIP score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numGenBankFullLengthSeqs"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"# of GenBank full-length sequences")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the MITOMAP alternate allele")))))}s.isMDXComponent=!0},24028:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/mitomap-structural-variants-json",id:"version-3.14/data-sources/mitomap-structural-variants-json",title:"mitomap-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/mitomap-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/mitomap-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/mitomap-structural-variants-json.md",tags:[],version:"3.14",frontMatter:{}},p=[],m={toc:p},d="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(d,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "chromosome":"MT",\n "begin":"3166",\n "end":"14152",\n "variantType":"deletion",\n "reciprocalOverlap":0.18068,\n "annotationOverlap":0.42405\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"end"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")))))}s.isMDXComponent=!0},27968:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/omim-json",id:"version-3.14/data-sources/omim-json",title:"omim-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/omim-json.md",sourceDirName:"data-sources",slug:"/data-sources/omim-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/omim-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/omim-json.md",tags:[],version:"3.14",frontMatter:{}},p=[{value:"Phenotype",id:"phenotype",children:[],level:4},{value:"Mapping",id:"mapping",children:[],level:4},{value:"Inheritance",id:"inheritance",children:[],level:4},{value:"Comments",id:"comments",children:[],level:4}],m={toc:p},d="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(d,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"omim":[ \n { \n "mimNumber":600678,\n "geneName":"MutS, E. coli, homolog of, 6",\n "description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",\n "phenotypes":[ \n { \n "mimNumber":614350,\n "phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",\n "description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal dominant"\n ]\n },\n { \n "mimNumber":608089,\n "phenotype":"Endometrial cancer, familial",\n "mapping":"molecular basis of the disorder is known"\n },\n { \n "mimNumber":276300,\n "phenotype":"Mismatch repair cancer syndrome",\n "description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal recessive"\n ],\n "comments" : [\n "contribute to susceptibility to multifactorial disorders or to susceptibility to infection",\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM ID for gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneName"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene name")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"left"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#phenotype"},"Phenotype entry below"))))),(0,r.kt)("h4",{id:"phenotype"},"Phenotype"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mapping"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#mapping"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"inheritance"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#inheritance"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"comments"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#comments"},"possible values below"))))),(0,r.kt)("h4",{id:"mapping"},"Mapping"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"disorder was positioned by mapping of the wild type gene"),(0,r.kt)("li",{parentName:"ol"},"disease phenotype itself was mapped"),(0,r.kt)("li",{parentName:"ol"},"molecular basis of the disorder is known"),(0,r.kt)("li",{parentName:"ol"},"disorder is a chromosome deletion or duplication syndrome")),(0,r.kt)("h4",{id:"inheritance"},"Inheritance"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"autosomal recessive"),(0,r.kt)("li",{parentName:"ul"},"autosomal dominant")),(0,r.kt)("h4",{id:"comments"},"Comments"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"contributes to the susceptibility to multifactorial disorders"),(0,r.kt)("li",{parentName:"ul"},"variations that lead to apparently abnormal laboratory test values"),(0,r.kt)("li",{parentName:"ul"},"unconfirmed mapping")))}s.isMDXComponent=!0},39038:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/primate-ai-json",id:"version-3.14/data-sources/primate-ai-json",title:"primate-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/primate-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/primate-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/primate-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/primate-ai-json.md",tags:[],version:"3.14",frontMatter:{}},p=[],m={toc:p},d="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(d,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI":[\n {\n "hgnc":"TP53",\n "scorePercentile":0.3,\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))))}s.isMDXComponent=!0},54352:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/revel-json",id:"version-3.14/data-sources/revel-json",title:"revel-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/revel-json.md",sourceDirName:"data-sources",slug:"/data-sources/revel-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/revel-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/revel-json.md",tags:[],version:"3.14",frontMatter:{}},p=[],m={toc:p},d="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(d,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"revel":{ \n "score":0.027\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"score"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1.0")))))}s.isMDXComponent=!0},54886:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/splice-ai-json",id:"version-3.14/data-sources/splice-ai-json",title:"splice-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/splice-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/splice-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/splice-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/splice-ai-json.md",tags:[],version:"3.14",frontMatter:{}},p=[],m={toc:p},d="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(d,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"spliceAI":[ \n {\n "hgnc":"BLCAP",\n "acceptorGainDistance":-3,\n "acceptorGainScore":0.3,\n "donorLossDistance":7,\n "donorLossScore":0.9\n },\n { \n "hgnc":"NNAT",\n "acceptorGainDistance":-1,\n "acceptorGainScore":0.2,\n "donorGainDistance":-2,\n "donorGainScore":0.3\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")))))}s.isMDXComponent=!0},17866:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>h,default:()=>w,frontMatter:()=>f,metadata:()=>y,toc:()=>b});var n=a(87462),r=(a(67294),a(3905)),l=a(5044),i=a(66916),o=a(39038),p=a(54352),m=a(54886),d=a(4663),s=a(24028),g=a(82436),N=a(14466),k=a(49082),c=a(97118),u=a(27968);const f={title:"Nirvana JSON File Format"},h=void 0,y={unversionedId:"file-formats/nirvana-json-file-format",id:"version-3.14/file-formats/nirvana-json-file-format",title:"Nirvana JSON File Format",description:"Overview",source:"@site/versioned_docs/version-3.14/file-formats/nirvana-json-file-format.mdx",sourceDirName:"file-formats",slug:"/file-formats/nirvana-json-file-format",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/file-formats/nirvana-json-file-format",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/file-formats/nirvana-json-file-format.mdx",tags:[],version:"3.14",frontMatter:{title:"Nirvana JSON File Format"},sidebar:"version-3.14/docs",previous:{title:"Splice AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/splice-ai"},next:{title:"Custom Annotations",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/file-formats/custom-annotations"}},b=[{value:"Overview",id:"overview",children:[{value:"Conventions",id:"conventions",children:[],level:3},{value:"JSON Layout",id:"json-layout",children:[],level:3}],level:2},{value:"Header",id:"header",children:[{value:"Data Source",id:"data-source",children:[],level:4},{value:"Genome Assemblies",id:"genome-assemblies",children:[],level:4}],level:2},{value:"Positions",id:"positions",children:[{value:"1000 Genomes (SV)",id:"1000-genomes-sv",children:[],level:3},{value:"MITOMAP (SV)",id:"mitomap-sv",children:[],level:3}],level:2},{value:"Samples",id:"samples",children:[],level:2},{value:"Variants",id:"variants",children:[{value:"Transcripts",id:"transcripts",children:[{value:"PolyPhen",id:"polyphen",children:[],level:4},{value:"SIFT",id:"sift",children:[],level:4},{value:"Gene Fusions",id:"gene-fusions",children:[],level:4},{value:"Fusion",id:"fusion",children:[],level:4}],level:3},{value:"Regulatory Regions",id:"regulatory-regions",children:[{value:"Regulatory Types",id:"regulatory-types",children:[],level:4},{value:"Regulatory Consequences",id:"regulatory-consequences",children:[],level:4}],level:3},{value:"ClinVar",id:"clinvar",children:[],level:3},{value:"1000 Genomes",id:"1000-genomes",children:[],level:3},{value:"gnomAD",id:"gnomad",children:[],level:3},{value:"dbSNP",id:"dbsnp",children:[],level:3},{value:"MITOMAP",id:"mitomap",children:[],level:3},{value:"Primate AI",id:"primate-ai",children:[],level:3},{value:"REVEL",id:"revel",children:[],level:3},{value:"Splice AI",id:"splice-ai",children:[],level:3}],level:2},{value:"Genes",id:"genes",children:[{value:"OMIM",id:"omim",children:[],level:3},{value:"gnomAD LoF Gene Metrics",id:"gnomad-lof-gene-metrics",children:[],level:3}],level:2}],v={toc:b},A="wrapper";function w(t){let{components:e,...f}=t;return(0,r.kt)(A,(0,n.Z)({},v,f,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("h3",{id:"conventions"},"Conventions"),(0,r.kt)("p",null,"In the Nirvana JSON representation, we try to maximize the amount of useful information that is relayed in the output file. As such, we have several conventions that are useful to know about:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"With boolean key/value pairs, we only output the keys that have a true value. I.e. there's no reason to display ",(0,r.kt)("inlineCode",{parentName:"li"},'"isStructuralVariant":false')," a few million times when annotating a small variant VCF."),(0,r.kt)("li",{parentName:"ul"},"When transferring data from the VCF file to the JSON (e.g. for allele depths (AD)), it is common to use a period (.) as a placeholder for missing data in the VCF file. Nirvana treats periods like empty or null strings and therefore will not output those entries.")),(0,r.kt)("h3",{id:"json-layout"},"JSON Layout"),(0,r.kt)("p",null,(0,r.kt)("img",{src:a(14229).Z})),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"In general, each position corresponds to a row in the original VCF file."),(0,r.kt)("p",{parentName:"div"},"For each gene that was referenced in the transcripts found in the positions section, there will be additional gene-level annotation in the gene section."))),(0,r.kt)("h2",{id:"header"},"Header"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'{ \n "header":{ \n "annotator":"Nirvana 3.0.0-alpha.5+g6c52e247",\n "creationTime":"2017-06-14 15:53:13",\n "genomeAssembly":"GRCh37",\n "dataSources":[ \n { \n "name":"OMIM",\n "version":"unknown",\n "description":"An Online Catalog of Human Genes and Genetic Disorders",\n "releaseDate":"2017-05-03"\n },\n { \n "name":"VEP",\n "version":"84",\n "description":"BothRefSeqAndEnsembl",\n "releaseDate":"2017-01-16"\n },\n { \n "name":"ClinVar",\n "version":"20170503",\n "description":"A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence",\n "releaseDate":"2017-05-03"\n },\n { \n "name":"phyloP",\n "version":"hg19",\n "description":"46 way conservation score between humans and 45 other vertebrates",\n "releaseDate":"2009-11-10"\n }\n ],\n "samples":[ \n "NA12878",\n "NA12891",\n "NA12892"\n ]\n },\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"annotator"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"the name of the annotator and the current version")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"creationTime"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd hh:mm:ss")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genomeAssembly"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#genome-assemblies"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"schemaVersion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"incremented whenever the core structure of the JSON file introduces breaking changes")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"dataVersion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"dataSources"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#data-source"},"Data Source entry below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"samples"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"the order of these sample names will be used throughout the JSON file when enumerating samples")))),(0,r.kt)("h4",{id:"data-source"},"Data Source"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"name"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"version"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"optional description of the data source")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"releaseDate"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")))),(0,r.kt)("h4",{id:"genome-assemblies"},"Genome Assemblies"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"GRCh37"),(0,r.kt)("li",{parentName:"ul"},"GRCh38"),(0,r.kt)("li",{parentName:"ul"},"hg19"),(0,r.kt)("li",{parentName:"ul"},"SARSCoV2")),(0,r.kt)("h2",{id:"positions"},"Positions"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"positions":[ \n { \n "chromosome":"chr2",\n "position":48010488,\n "repeatUnit":"GGCCCC",\n "refRepeatCount":3,\n "svEnd":48020488,\n "refAllele":"G",\n "altAlleles":[ \n "A",\n "GT"\n ],\n "quality":461,\n "filters":[ \n "PASS"\n ],\n "ciPos":[ \n -170,\n 170\n ],\n "ciEnd":[ \n -175,\n 175\n ],\n "svLength":1000,\n "strandBias":1.23,\n "jointSomaticNormalQuality":29,\n "cytogeneticBand":"2p16.3",\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Variant Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"postion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf (1-based notation). Range: 1 - 250 million")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"repeatUnit"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"STR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by ExpansionHunter")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refRepeatCount"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"STR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by ExpansionHunter")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"svEnd"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"quality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf (Normally an integer, but some variant callers using floating point. Has been observed as high as 500k)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"filters"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ciPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ciEnd"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"svLength"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"strandBias"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"small variant"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by GATK (from SB)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"jointSomaticNormalQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by the Manta variant caller (SOMATICSCORE)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cytogeneticBand"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"e.g. 17p13.1")))),(0,r.kt)("h3",{id:"1000-genomes-sv"},"1000 Genomes (SV)"),(0,r.kt)(c.default,{mdxType:"ThousandGenomesSV"}),(0,r.kt)("h3",{id:"mitomap-sv"},"MITOMAP (SV)"),(0,r.kt)(s.default,{mdxType:"MitoMapSV"}),(0,r.kt)("h2",{id:"samples"},"Samples"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"samples":[\n {\n "genotype":"0/1",\n "variantFrequencies":[\n 0.333,\n 0.5\n ],\n "totalDepth":57,\n "genotypeQuality":12,\n "copyNumber":3,\n "repeatUnitCounts":[\n 10,\n 20\n ],\n "alleleDepths":[\n 10,\n 20,\n 30\n ],\n "failedFilter":true,\n "splitReadCounts":[\n 10,\n 20\n ],\n "pairedEndReadCounts":[\n 10,\n 20\n ],\n "isDeNovo":true,\n "diseaseAffectedStatuses":[\n "-"\n ],\n "artifactAdjustedQualityScore":89.3,\n "likelihoodRatioQualityScore":78.2,\n "heteroplasmyPercentile":[\n 23.13,\n 12.65\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genotype"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantFrequencies"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. One value per alternate allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"totalDepth"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genotypeQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values. Typically maxes out at 99")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"copyNumber"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"repeatUnitCounts"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ExpansionHunter-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"alleleDepths"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"splitReadCounts"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Manta-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pairedEndReadCounts"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Manta-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isDeNovo"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"diseaseAffectedStatuses"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ExpansionHunter-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"artifactAdjustedQualityScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PEPE-specific. Range: 0 - 100.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"likelihoodRatioQualityScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PEPE-specific. Range: 0 - 100.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"heteroplasmyPercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 100. 2 decimal places. One value per alternate allele")))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Empty Samples")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"If a sample does not contain any entries, we will create a sample object that contains the ",(0,r.kt)("inlineCode",{parentName:"p"},"isEmpty")," key. This ensures that sample ordering is preserved while indicating that a sample is intentionally empty."),(0,r.kt)("pre",{parentName:"div"},(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"samples":[ \n { \n "isEmpty":true\n }\n],\n')))),(0,r.kt)("h2",{id:"variants"},"Variants"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"variants":[ \n { \n "vid":"2:48010488:A",\n "chromosome":"chr2",\n "begin":48010488,\n "end":48010488,\n "isReferenceMinorAllele":true,\n "isStructuralVariant":true,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "isDecomposedVariant":true,\n "isRecomposedVariant":true,\n "linkedVids":["2:48010488:GTA:ATC"],\n "hgvsg":"NC_000002.11:g.48010488G>A",\n "phylopScore":0.459\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"vid"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"../core-functionality/variant-ids"},"Variant Identifiers"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"1-based non-negative integer values. Range: 1 - 250 million")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"end"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"1-based non-negative integer values. Range: 1 - 250 million")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isReferenceMinorAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this is a reference minor allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isStructuralVariant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant is a structural variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"inLowComplexityRegion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant lies in a low complexity region (gnomAD low complexity regions)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"parsimonious representation of the reference allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"parsimonious representation of the alternate allele.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"uses\xa0",(0,r.kt)("a",{parentName:"td",href:"http://www.sequenceontology.org/browser/current_svn/term/SO:0001059"},"Sequence Ontology sequence alterations"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isDecomposedVariant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the decomposed variant has been used to create another recomposed variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isRecomposedVariant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant is recomposed from two or more decomposed variants")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"linkedVids"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"list of ",(0,r.kt)("a",{parentName:"td",href:"../core-functionality/variant-ids"},"VIDs")," for variants connecting decomposed and recomposed variants")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsg"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS g. notation")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phylopScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"phyloP conservation score. Range: -14.08 to 6.424")))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Reference Minor Alleles")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Nirvana supports annotating reference minor alleles. In such a case, ",(0,r.kt)("inlineCode",{parentName:"p"},"refAllele")," will be replaced by the global major allele and ",(0,r.kt)("inlineCode",{parentName:"p"},"altAllele")," will be replaced with the original reference allele."))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Flagging Decomposed & Recomposed Variants")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"When two or more decomposed variants are recomposed into an MNV, the decomposed variants will be marked with ",(0,r.kt)("inlineCode",{parentName:"p"},'"isDecomposedVariant":true'),"."),(0,r.kt)("p",{parentName:"div"},"Similarly, the recomposed variant will be shown as a new VCF position. This recomposed variant will be flagged with ",(0,r.kt)("inlineCode",{parentName:"p"},'"isRecomposedVariant":true'),"."))),(0,r.kt)("h3",{id:"transcripts"},"Transcripts"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"transcripts":[\n {\n "transcript":"ENST00000445503.1",\n "source":"Ensembl",\n "bioType":"nonsense_mediated_decay",\n "codons":"gGg/gAg",\n "aminoAcids":"G/E",\n "cdnaPos":"268",\n "cdsPos":"116",\n "exons":"1/9",\n "introns":"1/8",\n "proteinPos":"39",\n "geneId":"ENSG00000116062",\n "hgnc":"MSH6",\n "consequence":[\n "missense_variant",\n "NMD_transcript_variant"\n ],\n "hgvsc":"ENST00000445503.1:c.116G>A",\n "hgvsp":"ENSP00000405294.1:p.(Gly39Glu)",\n "geneFusion":{\n "exon":6,\n "intron":5,\n "fusions":[\n {\n "hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000437180.1}:c.58+568_1443",\n "exon":3,\n "intron":2\n },\n {\n "hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000300305.3}:c.58+568_1443",\n "exon":2,\n "intron":1\n }\n ]\n },\n "isCanonical":true,\n "polyPhenScore":0.95,\n "polyPhenPrediction":"probably damaging",\n "proteinId":"ENSP00000405294.1",\n "siftScore":0.61,\n "siftPrediction":"tolerated",\n "completeOverlap":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"transcript"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"transcript ID. e.g. ENST00000445503.1")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"source"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"RefSeq / Ensembl")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"bioType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"descriptions of the ",(0,r.kt)("a",{parentName:"td",href:"https://uswest.ensembl.org/info/genome/genebuild/biotypes.html"},"biotypes from Ensembl"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"codons"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"aminoAcids"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cdnaPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cdsPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"exons"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exons affected by the variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"introns"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"introns affected by the variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"proteinPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene ID. e.g. ENSG00000116062")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"consequence"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://www.sequenceontology.org/index.html"},"Sequence Ontology Consequences"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS coding nomenclature")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsp"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS protein nomenclature")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneFusion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#gene-fusions"},"Gene Fusions entry below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isCanonical"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this is a canonical transcript")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"polyPhenScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"polyPhenPrediction"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#polyphen"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"proteinId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"protein ID. E.g. ENSP00000405294.1")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"siftScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"siftPrediction"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#sift"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"completeOverlap"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this transcript is completely overlapped by the variant")))),(0,r.kt)("h4",{id:"polyphen"},"PolyPhen"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"probably damaging"),(0,r.kt)("li",{parentName:"ul"},"possibly damaging"),(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"unknown")),(0,r.kt)("h4",{id:"sift"},"SIFT"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"tolerated"),(0,r.kt)("li",{parentName:"ul"},"deleterious"),(0,r.kt)("li",{parentName:"ul"},"tolerated - low confidence"),(0,r.kt)("li",{parentName:"ul"},"deleterious - low confidence")),(0,r.kt)("h4",{id:"gene-fusions"},"Gene Fusions"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"exon"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual exon where the breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"intron"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual intron where the breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"fusions"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#fusion"},"Fusion entry below"))))),(0,r.kt)("h4",{id:"fusion"},"Fusion"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"exon"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual exon where the other breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"intron"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual intron where the other breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS coding nomenclature describing the two genes and the transcripts that are fused along with")))),(0,r.kt)("h3",{id:"regulatory-regions"},"Regulatory Regions"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"regulatoryRegions":[ \n { \n "id":"ENSR00001542175",\n "type":"promoter",\n "consequence":[ \n "regulatory_region_variant"\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"type"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see ",(0,r.kt)("a",{parentName:"td",href:"#regulatory-types"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"consequence"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"see ",(0,r.kt)("a",{parentName:"td",href:"#regulatory-consequences"},"possible values below"))))),(0,r.kt)("h4",{id:"regulatory-types"},"Regulatory Types"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"CTCF_binding_site"),(0,r.kt)("li",{parentName:"ul"},"enhancer"),(0,r.kt)("li",{parentName:"ul"},"open_chromatin_region"),(0,r.kt)("li",{parentName:"ul"},"promoter"),(0,r.kt)("li",{parentName:"ul"},"promoter_flanking_region"),(0,r.kt)("li",{parentName:"ul"},"TF_binding_site")),(0,r.kt)("h4",{id:"regulatory-consequences"},"Regulatory Consequences"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"regulatory_region_variant"),(0,r.kt)("li",{parentName:"ul"},"regulatory_region_ablation"),(0,r.kt)("li",{parentName:"ul"},"regulatory_region_amplification"),(0,r.kt)("li",{parentName:"ul"},"regulatory_region_truncation")),(0,r.kt)("h3",{id:"clinvar"},"ClinVar"),(0,r.kt)(l.default,{mdxType:"ClinVar"}),(0,r.kt)("h3",{id:"1000-genomes"},"1000 Genomes"),(0,r.kt)(k.default,{mdxType:"ThousandGenomesSmall"}),(0,r.kt)("h3",{id:"gnomad"},"gnomAD"),(0,r.kt)(g.default,{mdxType:"GnomadSmall"}),(0,r.kt)("h3",{id:"dbsnp"},"dbSNP"),(0,r.kt)(i.default,{mdxType:"DbSNP"}),(0,r.kt)("h3",{id:"mitomap"},"MITOMAP"),(0,r.kt)(d.default,{mdxType:"MitoMapSmall"}),(0,r.kt)("h3",{id:"primate-ai"},"Primate AI"),(0,r.kt)(o.default,{mdxType:"PrimateAI"}),(0,r.kt)("h3",{id:"revel"},"REVEL"),(0,r.kt)(p.default,{mdxType:"REVEL"}),(0,r.kt)("h3",{id:"splice-ai"},"Splice AI"),(0,r.kt)(m.default,{mdxType:"SpliceAI"}),(0,r.kt)("h2",{id:"genes"},"Genes"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"genes":[ \n { \n "name":"MSH6",\n "hgncId":7329,\n "summary":"This gene encodes a member of the DNA mismatch repair MutS family. In E. coli, the MutS protein helps in the recognition of mismatched nucleotides prior to their repair. A highly conserved region of approximately 150 aa, called the Walker-A adenine nucleotide binding motif, exists in MutS homologs. The encoded protein heterodimerizes with MSH2 to form a mismatch recognition complex that functions as a bidirectional molecular switch that exchanges ADP and ATP as DNA mismatches are bound and dissociated. Mutations in this gene may be associated with hereditary nonpolyposis colon cancer, colorectal cancer, and endometrial cancer. Transcripts variants encoding different isoforms have been described. [provided by RefSeq, Jul 2013]",\n /* this is where gene-level data sources can be found e.g. OMIM */\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"name"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgncId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"summary"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"short description of the gene from ",(0,r.kt)("a",{parentName:"td",href:"https://www.omim.org/"},"OMIM"))))),(0,r.kt)("h3",{id:"omim"},"OMIM"),(0,r.kt)(u.default,{mdxType:"Omim"}),(0,r.kt)("h3",{id:"gnomad-lof-gene-metrics"},"gnomAD LoF Gene Metrics"),(0,r.kt)(N.default,{mdxType:"GnomadGeneLof"}))}w.isMDXComponent=!0},14229:(t,e,a)=>{a.d(e,{Z:()=>n});const n=a.p+"assets/images/JSON-Layout-fc8e5c0cf4c8428981cd206fe9b6feac.svg"}}]); \ No newline at end of file diff --git a/assets/js/644aa76c.122c2d0f.js b/assets/js/644aa76c.122c2d0f.js new file mode 100644 index 00000000..40d4267f --- /dev/null +++ b/assets/js/644aa76c.122c2d0f.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[216],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>f});var a=n(7294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function i(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var p=a.createContext({}),s=function(t){var e=a.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},m=function(t){var e=s(t.components);return a.createElement(p.Provider,{value:e},t.children)},c="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},u=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,i=t.originalType,p=t.parentName,m=l(t,["components","mdxType","originalType","parentName"]),c=s(n),u=r,f=c["".concat(p,".").concat(u)]||c[u]||d[u]||i;return n?a.createElement(f,o(o({ref:e},m),{},{components:n})):a.createElement(f,o({ref:e},m))}));function f(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var p in e)hasOwnProperty.call(e,p)&&(l[p]=e[p]);l.originalType=t,l[c]="string"==typeof t?t:r,o[1]=l;for(var s=2;s{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>c,frontMatter:()=>i,metadata:()=>l,toc:()=>p});var a=n(7462),r=(n(7294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/omim-json",id:"data-sources/omim-json",title:"omim-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/omim-json.md",sourceDirName:"data-sources",slug:"/data-sources/omim-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/omim-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/omim-json.md",tags:[],version:"current",frontMatter:{}},p=[{value:"Phenotype",id:"phenotype",children:[],level:4},{value:"Mapping",id:"mapping",children:[],level:4},{value:"Inheritance",id:"inheritance",children:[],level:4},{value:"Comments",id:"comments",children:[],level:4}],s={toc:p},m="wrapper";function c(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},s,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"omim":[ \n { \n "mimNumber":600678,\n "geneName":"MutS, E. coli, homolog of, 6",\n "description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",\n "phenotypes":[ \n { \n "mimNumber":614350,\n "phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",\n "description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal dominant"\n ]\n },\n { \n "mimNumber":608089,\n "phenotype":"Endometrial cancer, familial",\n "mapping":"molecular basis of the disorder is known"\n },\n { \n "mimNumber":276300,\n "phenotype":"Mismatch repair cancer syndrome",\n "description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal recessive"\n ],\n "comments" : [\n "contribute to susceptibility to multifactorial disorders or to susceptibility to infection",\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM ID for gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneName"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene name")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"left"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#phenotype"},"Phenotype entry below"))))),(0,r.kt)("h4",{id:"phenotype"},"Phenotype"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mapping"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#mapping"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"inheritance"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#inheritance"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"comments"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#comments"},"possible values below"))))),(0,r.kt)("h4",{id:"mapping"},"Mapping"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"disorder was positioned by mapping of the wild type gene"),(0,r.kt)("li",{parentName:"ol"},"disease phenotype itself was mapped"),(0,r.kt)("li",{parentName:"ol"},"molecular basis of the disorder is known"),(0,r.kt)("li",{parentName:"ol"},"disorder is a chromosome deletion or duplication syndrome")),(0,r.kt)("h4",{id:"inheritance"},"Inheritance"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"autosomal recessive"),(0,r.kt)("li",{parentName:"ul"},"autosomal dominant")),(0,r.kt)("h4",{id:"comments"},"Comments"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"contributes to the susceptibility to multifactorial disorders"),(0,r.kt)("li",{parentName:"ul"},"variations that lead to apparently abnormal laboratory test values"),(0,r.kt)("li",{parentName:"ul"},"unconfirmed mapping")))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/644aa76c.8847e623.js b/assets/js/644aa76c.8847e623.js deleted file mode 100644 index 6c844acc..00000000 --- a/assets/js/644aa76c.8847e623.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[216],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>f});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function i(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var p=a.createContext({}),s=function(t){var e=a.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},m=function(t){var e=s(t.components);return a.createElement(p.Provider,{value:e},t.children)},c="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},u=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,i=t.originalType,p=t.parentName,m=l(t,["components","mdxType","originalType","parentName"]),c=s(n),u=r,f=c["".concat(p,".").concat(u)]||c[u]||d[u]||i;return n?a.createElement(f,o(o({ref:e},m),{},{components:n})):a.createElement(f,o({ref:e},m))}));function f(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var p in e)hasOwnProperty.call(e,p)&&(l[p]=e[p]);l.originalType=t,l[c]="string"==typeof t?t:r,o[1]=l;for(var s=2;s{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>c,frontMatter:()=>i,metadata:()=>l,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/omim-json",id:"data-sources/omim-json",title:"omim-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/omim-json.md",sourceDirName:"data-sources",slug:"/data-sources/omim-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/omim-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/omim-json.md",tags:[],version:"current",frontMatter:{}},p=[{value:"Phenotype",id:"phenotype",children:[],level:4},{value:"Mapping",id:"mapping",children:[],level:4},{value:"Inheritance",id:"inheritance",children:[],level:4},{value:"Comments",id:"comments",children:[],level:4}],s={toc:p},m="wrapper";function c(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},s,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"omim":[ \n { \n "mimNumber":600678,\n "geneName":"MutS, E. coli, homolog of, 6",\n "description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",\n "phenotypes":[ \n { \n "mimNumber":614350,\n "phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",\n "description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal dominant"\n ]\n },\n { \n "mimNumber":608089,\n "phenotype":"Endometrial cancer, familial",\n "mapping":"molecular basis of the disorder is known"\n },\n { \n "mimNumber":276300,\n "phenotype":"Mismatch repair cancer syndrome",\n "description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal recessive"\n ],\n "comments" : [\n "contribute to susceptibility to multifactorial disorders or to susceptibility to infection",\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM ID for gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneName"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene name")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"left"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#phenotype"},"Phenotype entry below"))))),(0,r.kt)("h4",{id:"phenotype"},"Phenotype"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mapping"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#mapping"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"inheritance"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#inheritance"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"comments"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#comments"},"possible values below"))))),(0,r.kt)("h4",{id:"mapping"},"Mapping"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"disorder was positioned by mapping of the wild type gene"),(0,r.kt)("li",{parentName:"ol"},"disease phenotype itself was mapped"),(0,r.kt)("li",{parentName:"ol"},"molecular basis of the disorder is known"),(0,r.kt)("li",{parentName:"ol"},"disorder is a chromosome deletion or duplication syndrome")),(0,r.kt)("h4",{id:"inheritance"},"Inheritance"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"autosomal recessive"),(0,r.kt)("li",{parentName:"ul"},"autosomal dominant")),(0,r.kt)("h4",{id:"comments"},"Comments"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"contributes to the susceptibility to multifactorial disorders"),(0,r.kt)("li",{parentName:"ul"},"variations that lead to apparently abnormal laboratory test values"),(0,r.kt)("li",{parentName:"ul"},"unconfirmed mapping")))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/64f4c861.e4ce1b02.js b/assets/js/64f4c861.e4ce1b02.js deleted file mode 100644 index 76175f6c..00000000 --- a/assets/js/64f4c861.e4ce1b02.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6786,692,578],{3905:(e,t,a)=>{a.d(t,{Zo:()=>d,kt:()=>h});var n=a(67294);function i(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function r(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function o(e){for(var t=1;t=0||(i[a]=e[a]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(i[a]=e[a])}return i}var s=n.createContext({}),m=function(e){var t=n.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):o(o({},t),e)),a},d=function(e){var t=m(e.components);return n.createElement(s.Provider,{value:t},e.children)},p="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},u=n.forwardRef((function(e,t){var a=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,d=l(e,["components","mdxType","originalType","parentName"]),p=m(a),u=i,h=p["".concat(s,".").concat(u)]||p[u]||c[u]||r;return a?n.createElement(h,o(o({ref:t},d),{},{components:a})):n.createElement(h,o({ref:t},d))}));function h(e,t){var a=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=a.length,o=new Array(r);o[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[p]="string"==typeof e?e:i,o[1]=l;for(var m=2;m{a.r(t),a.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var n=a(87462),i=(a(67294),a(3905));const r={},o=void 0,l={unversionedId:"data-sources/mitomap-small-variants-json",id:"version-3.14/data-sources/mitomap-small-variants-json",title:"mitomap-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/mitomap-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/mitomap-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/mitomap-small-variants-json.md",tags:[],version:"3.14",frontMatter:{}},s=[],m={toc:s},d="wrapper";function p(e){let{components:t,...a}=e;return(0,i.kt)(d,(0,n.Z)({},m,a,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "refAllele":"G",\n "altAllele":"A",\n "diseases":[ \n "Bipolar disorder",\n "Melanoma"\n ],\n "hasHomoplasmy":false,\n "hasHeteroplasmy":true,\n "status":"Reported",\n "clinicalSignificance":"confirmed pathogenic",\n "scorePercentile":83.30,\n "numGenBankFullLengthSeqs":2,\n "pubMedIds":["2316527","6299878","6301949"],\n "isAlleleSpecific":true\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"diseases"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"associated diseases")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hasHomoplasmy"),(0,i.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hasHeteroplasmy"),(0,i.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"status"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"record status")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"clinicalSignificance"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"predicted pathogenicity")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float"),(0,i.kt)("td",{parentName:"tr",align:"left"},"MitoTIP score")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numGenBankFullLengthSeqs"),(0,i.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,i.kt)("td",{parentName:"tr",align:"left"},"# of GenBank full-length sequences")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,i.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,i.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the MITOMAP alternate allele")))))}p.isMDXComponent=!0},24028:(e,t,a)=>{a.r(t),a.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var n=a(87462),i=(a(67294),a(3905));const r={},o=void 0,l={unversionedId:"data-sources/mitomap-structural-variants-json",id:"version-3.14/data-sources/mitomap-structural-variants-json",title:"mitomap-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/mitomap-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/mitomap-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/mitomap-structural-variants-json.md",tags:[],version:"3.14",frontMatter:{}},s=[],m={toc:s},d="wrapper";function p(e){let{components:t,...a}=e;return(0,i.kt)(d,(0,n.Z)({},m,a,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "chromosome":"MT",\n "begin":"3166",\n "end":"14152",\n "variantType":"deletion",\n "reciprocalOverlap":0.18068,\n "annotationOverlap":0.42405\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,i.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"end"),(0,i.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"annotationOverlap"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")))))}p.isMDXComponent=!0},65084:(e,t,a)=>{a.r(t),a.d(t,{contentTitle:()=>s,default:()=>u,frontMatter:()=>l,metadata:()=>m,toc:()=>d});var n=a(87462),i=(a(67294),a(3905)),r=a(4663),o=a(24028);const l={title:"MITOMAP"},s=void 0,m={unversionedId:"data-sources/mitomap",id:"version-3.14/data-sources/mitomap",title:"MITOMAP",description:"Overview",source:"@site/versioned_docs/version-3.14/data-sources/mitomap.mdx",sourceDirName:"data-sources",slug:"/data-sources/mitomap",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/mitomap",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/mitomap.mdx",tags:[],version:"3.14",frontMatter:{title:"MITOMAP"},sidebar:"version-3.14/docs",previous:{title:"Mitochondrial Heteroplasmy",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/mito-heteroplasmy"},next:{title:"OMIM",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/omim"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"Scraping HTML Pages",id:"scraping-html-pages",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Allele Parsing",id:"allele-parsing",children:[],level:4}],level:3}],level:2},{value:"PostgreSQL Dump File",id:"postgresql-dump-file",children:[{value:"Example",id:"example-1",children:[],level:3},{value:"Parsing",id:"parsing-1",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URLs",id:"download-urls",children:[],level:2},{value:"JSON Output",id:"json-output",children:[{value:"Small Variants",id:"small-variants",children:[],level:3},{value:"Structural Variants",id:"structural-variants",children:[],level:3}],level:2}],p={toc:d},c="wrapper";function u(e){let{components:t,...l}=e;return(0,i.kt)(c,(0,n.Z)({},p,l,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"MITOMAP provides a compendium of polymorphisms and mutations in human mitochondrial DNA."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Lott, M.T., Leipzig, J.N., Derbeneva, O., Xie, H.M., Chalkia, D., Sarmady, M., Procaccio, V., and Wallace, D.C. mtDNA variation and analysis using MITOMAP and MITOMASTER. ",(0,i.kt)("em",{parentName:"p"},"Current Protocols in Bioinformatics")," 1(123):1.23.1-26 (2013). ",(0,i.kt)("a",{parentName:"p",href:"http://www.mitomap.org"},"http://www.mitomap.org")))),(0,i.kt)("h2",{id:"scraping-html-pages"},"Scraping HTML Pages"),(0,i.kt)("h3",{id:"example"},"Example"),(0,i.kt)("p",null,"MITOMAP is unique in that it doesn't offer the data in a downloadable format. As a result, the annotation content in Nirvana is scraped from the following MITOMAP pages:"),(0,i.kt)("ol",null,(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/PolymorphismsControl"},"mtDNA Control Region Sequence Variants")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/PolymorphismsCoding"},"mtDNA Coding Region & RNA Sequence Variants")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/MutationsRNA"},"Reported Mitochondrial DNA Base Substitution Diseases: rRNA/tRNA mutations")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/MutationsCodingControl"},"Reported Mitochondrial DNA Base Substitution Diseases: Coding and Control Region Point Mutations")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/DeletionsSingle"},"Reported mtDNA Deletions")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/InsertionsSimple"},"mtDNA Simple Insertions"))),(0,i.kt)("p",null,(0,i.kt)("img",{src:a(79744).Z})),(0,i.kt)("h3",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"Here's what the HTML code looks like:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-html"},"[\"582\",\"MT-TF\",\"Mitochondrial myopathy\",\"T582C\",\"tRNA Phe\",\"-\",\"+\",\"Reported\",\"72.90% \",\"0\",\"2\"],\n[\"583\",\"MT-TF\",\"MELAS / MM & EXIT\",\"G583A\",\"tRNA Phe\",\"-\",\"+\",\"Cfrm\",\"93.10% \",\"0\",\"3\"],\n")),(0,i.kt)("p",null,"We're mainly interested in the following columns (numbers indicate the HTML page above):"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"Position",(0,i.kt)("sup",null,"1,2,3,4")),(0,i.kt)("li",{parentName:"ul"},"Disease",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Nucleotide Change",(0,i.kt)("sup",null,"1,2")),(0,i.kt)("li",{parentName:"ul"},"Allele",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Homoplasmy",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Heteroplasmy",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Status",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"MitoTIP",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"GB Seqs FL(CR)",(0,i.kt)("sup",null,"1,2,3,4")),(0,i.kt)("li",{parentName:"ul"},"Deletion Junction",(0,i.kt)("sup",null,"5")),(0,i.kt)("li",{parentName:"ul"},"Insert (nt)",(0,i.kt)("sup",null,"6")),(0,i.kt)("li",{parentName:"ul"},"Insert Point (nt)",(0,i.kt)("sup",null,"6")),(0,i.kt)("li",{parentName:"ul"},"References/Curated References",(0,i.kt)("sup",null,"1,2,3,4"))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"MitoTIP")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The MitoTIP information is used to populate the ",(0,i.kt)("inlineCode",{parentName:"p"},"clinicalSignificance")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"scorePercentile"),' JSON keys. The "frequency alert" entries are skipped since it\'s not directly relevant to clinical significance.'))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Left alignment")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Many of the variants in MITOMAP have not been normalized. As part of our import procedure, we left align all insertions and deletions."))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Variant Enumeration")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Sometimes MITOMAP provides data that indicates that multiple values have been observed. Some examples of this are ",(0,i.kt)("inlineCode",{parentName:"p"},"C-C(2-8)")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"A-AC or ACC"),". Alternate alleles containing IUPAC ambiguity codes are similarly enumerated."))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Inversions")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"MITOMAP inversions are currently treated as MNVs."))),(0,i.kt)("h4",{id:"allele-parsing"},"Allele Parsing"),(0,i.kt)("p",null,"The following MITOMAP allele parsing conventions are supported:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"C123T"),(0,i.kt)("li",{parentName:"ul"},"16021_16022del"),(0,i.kt)("li",{parentName:"ul"},"8042del2"),(0,i.kt)("li",{parentName:"ul"},"C9537insC"),(0,i.kt)("li",{parentName:"ul"},"3902_3908invACCTTGC"),(0,i.kt)("li",{parentName:"ul"},"A-AC or ACC"),(0,i.kt)("li",{parentName:"ul"},"C-C(2-8)"),(0,i.kt)("li",{parentName:"ul"},"8042delAT")),(0,i.kt)("h2",{id:"postgresql-dump-file"},"PostgreSQL Dump File"),(0,i.kt)("h3",{id:"example-1"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"COPY mitomap.reference (id, authors, title, publication, editors, volume, number, pages, date, city, publisher, keywords, abstract, nlmid) FROM stdin;\n1 Albring, M., Griffith, J. and Attardi, G. Association of a protein structure of probable membrane derivation with HeLa cell mitochondrial DNA near its origin of replication Proceedings of the National Academy of Sciences of the United States of America . 74 4 1348-1352 1977 . . Deoxyribonucleoproteins; DNA Replication; DNA, Mitochondrial; Hela Cells; Membrane Proteins; Microscopy, Electron; Molecular Weight; Neoplasm Proteins; Protein Binding Almost all (about 95 percent) of the mitochondrial DNA molecules released by Triton X-100 lysis of HeLa cell mitochondria in the presence of 0.15 M salt are associated with a single protein-containing structure varying in appearance between a 10-20 nm knob and a 100-500 nm membrane-like patch. Analysis by high resolution electron microscopy and by polyacrylamide gel electrophoresis after cleavage of mitochondrial DNA with the endonucleases EcoRI, HindIII, and Hpa II has shown that the protein structure is attached to the DNA in the region of the D-loop, and probably near the origin of mitochondrial DNA replication. The data strongly suggest that HeLa cell mitochondrial DNA is attached in vivo to the inner mitochondrial membrane at or near the origin of replication, and that a membrane fragment of variable size remains associated with the DNA during the isolation. After sodium dodecyl sulfate extraction of mitochondrial DNA, a small 5-10 nm protein is found at the same site on a fraction of the mitochondrial DNA molecules. 266177\n2 Anderson, S., Bankier, A.T., Barrell, B.G., de Bruijn, M.H., Coulson, A.R., Drouin, J., Eperon, I.C., Nierlich, D.P., Roe, B.A., Sanger, F., Schreier, P.H., Smith, A.J., Staden, R., Young, I.G. Sequence and organization of the human mitochondrial genome Nature . 290 5806 457-465 1981 . . Base Sequence; Codon; DNA Replication; mtDNA; Evolution; Genes, Structural; Human; Nucleic Acid Precursors; Peptide Chain Initiation; Peptide Chain Termination; RNA, Ribosomal; RNA, Transfer; Transcription, Genetic The complete sequence of the 16,569-base pair human mitochondrial genome is presented. The genes for the 12S and 16S rRNAs, 22 tRNAs, cytochrome c oxidase subunits I, II and III, ATPase subunit 6, cytochrome b and eight other predicted protein coding genes have been located. The sequence shows extreme economy in that the genes have none or only a few noncoding bases between them, and in many cases the termination codons are not coded in the DNA but are created post- transcriptionally by polyadenylation of the mRNAs. 7219534\n")),(0,i.kt)("h3",{id:"parsing-1"},"Parsing"),(0,i.kt)("p",null,"From the PostgreSQL dump file, we're interested in parsing the mapping between reference IDs and the PubMed IDs:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"id"),(0,i.kt)("li",{parentName:"ul"},"nlmid")),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Why not use the PostgreSQL file for everything?")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Ideally we would use this file for parsing all of our data, but the schema contains 80+ tables and we haven't invested the time yet to see how the tables are linked together to produce the 6 main HTML pages that we're interested in."))),(0,i.kt)("h2",{id:"known-issues"},"Known Issues"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Duplicated records")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Multiple records describing the same nucleotide change are merged into the same record. If any conflicting information is found (homoplasmy, heteroplasmy, status, clinical significance, score percentile, end coordinate, variant type), an exception is thrown."),(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"For diseases and PubMed IDs, we take the union of the values in the duplicated records."),(0,i.kt)("li",{parentName:"ul"},"For full length GenBank sequences, we take the largest number from each of the duplicated records since it provides the strongest evidence for this variant.")))),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Skipped records")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Records that represent an alternate notation of the original variant are skipped. Similarly some variants with confusing alleles (T961delT+ / -C(n)ins) are also skipped."))),(0,i.kt)("h2",{id:"download-urls"},"Download URLs"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"see ",(0,i.kt)("a",{parentName:"li",href:"#example"},"HTML Pages")," above"),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/downloads/mitomap.dump.sql.gz"},"PostgreSQL dump file"))),(0,i.kt)("h2",{id:"json-output"},"JSON Output"),(0,i.kt)("h3",{id:"small-variants"},"Small Variants"),(0,i.kt)(r.default,{mdxType:"SmallJSON"}),(0,i.kt)("h3",{id:"structural-variants"},"Structural Variants"),(0,i.kt)(o.default,{mdxType:"SVJSON"}))}u.isMDXComponent=!0},79744:(e,t,a)=>{a.d(t,{Z:()=>n});const n=a.p+"assets/images/MITOMAP-d8d4dd35c2336fdba5fcced77ec438e6.png"}}]); \ No newline at end of file diff --git a/assets/js/6555d2c3.3d341511.js b/assets/js/6555d2c3.3d341511.js deleted file mode 100644 index 107d0b8c..00000000 --- a/assets/js/6555d2c3.3d341511.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9482],{3905:(t,e,n)=>{n.d(e,{Zo:()=>c,kt:()=>d});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function o(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var i=r.createContext({}),m=function(t){var e=r.useContext(i),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},c=function(t){var e=m(t.components);return r.createElement(i.Provider,{value:e},t.children)},s="mdxType",f={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},u=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,l=t.originalType,i=t.parentName,c=p(t,["components","mdxType","originalType","parentName"]),s=m(n),u=a,d=s["".concat(i,".").concat(u)]||s[u]||f[u]||l;return n?r.createElement(d,o(o({ref:e},c),{},{components:n})):r.createElement(d,o({ref:e},c))}));function d(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var l=n.length,o=new Array(l);o[0]=u;var p={};for(var i in e)hasOwnProperty.call(e,i)&&(p[i]=e[i]);p.originalType=t,p[s]="string"==typeof t?t:a,o[1]=p;for(var m=2;m{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>s,frontMatter:()=>l,metadata:()=>p,toc:()=>i});var r=n(87462),a=(n(67294),n(3905));const l={},o=void 0,p={unversionedId:"data-sources/1000Genomes-snv-json",id:"version-3.21/data-sources/1000Genomes-snv-json",title:"1000Genomes-snv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/1000Genomes-snv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-snv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/1000Genomes-snv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/1000Genomes-snv-json.md",tags:[],version:"3.21",frontMatter:{}},i=[],m={toc:i},c="wrapper";function s(t){let{components:e,...n}=t;return(0,a.kt)(c,(0,r.Z)({},m,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":{\n "allAf":0.200879,\n "afrAf":0.210287,\n "amrAf":0.139769,\n "easAf":0.275794,\n "eurAf":0.181909,\n "sasAf":0.173824,\n "allAn":5008,\n "afrAn":1322,\n "amrAn":694,\n "easAn":1008,\n "eurAn":1006,\n "sasAn":978,\n "allAc":1006,\n "afrAc":278,\n "amrAc":97,\n "easAc":278,\n "eurAc":183,\n "sasAc":170\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for all populations. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"allAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for all populations. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"allAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for all populations. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"afrAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the African super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"afrAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the African super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"afrAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the African super population. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"amrAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"amrAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the Ad Mixed American super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"amrAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the Ad Mixed American super population. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"easAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"easAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the East Asian super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"easAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the East Asian super population. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"eurAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the European super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"eurAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the European super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"eurAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the European super population. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"sasAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"sasAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the South Asian super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"sasAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the South Asian super population. Non-zero integer.")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/65e83232.d96535c9.js b/assets/js/65e83232.d96535c9.js deleted file mode 100644 index bdd03d24..00000000 --- a/assets/js/65e83232.d96535c9.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[578],{3905:(t,e,r)=>{r.d(e,{Zo:()=>m,kt:()=>f});var n=r(67294);function a(t,e,r){return e in t?Object.defineProperty(t,e,{value:r,enumerable:!0,configurable:!0,writable:!0}):t[e]=r,t}function o(t,e){var r=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),r.push.apply(r,n)}return r}function i(t){for(var e=1;e=0||(a[r]=t[r]);return a}(t,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,r)&&(a[r]=t[r])}return a}var p=n.createContext({}),c=function(t){var e=n.useContext(p),r=e;return t&&(r="function"==typeof t?t(e):i(i({},e),t)),r},m=function(t){var e=c(t.components);return n.createElement(p.Provider,{value:e},t.children)},s="mdxType",u={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},d=n.forwardRef((function(t,e){var r=t.components,a=t.mdxType,o=t.originalType,p=t.parentName,m=l(t,["components","mdxType","originalType","parentName"]),s=c(r),d=a,f=s["".concat(p,".").concat(d)]||s[d]||u[d]||o;return r?n.createElement(f,i(i({ref:e},m),{},{components:r})):n.createElement(f,i({ref:e},m))}));function f(t,e){var r=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var o=r.length,i=new Array(o);i[0]=d;var l={};for(var p in e)hasOwnProperty.call(e,p)&&(l[p]=e[p]);l.originalType=t,l[s]="string"==typeof t?t:a,i[1]=l;for(var c=2;c{r.r(e),r.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>o,metadata:()=>l,toc:()=>p});var n=r(87462),a=(r(67294),r(3905));const o={},i=void 0,l={unversionedId:"data-sources/mitomap-structural-variants-json",id:"version-3.14/data-sources/mitomap-structural-variants-json",title:"mitomap-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/mitomap-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/mitomap-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/mitomap-structural-variants-json.md",tags:[],version:"3.14",frontMatter:{}},p=[],c={toc:p},m="wrapper";function s(t){let{components:e,...r}=t;return(0,a.kt)(m,(0,n.Z)({},c,r,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "chromosome":"MT",\n "begin":"3166",\n "end":"14152",\n "variantType":"deletion",\n "reciprocalOverlap":0.18068,\n "annotationOverlap":0.42405\n }\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,a.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"end"),(0,a.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"reciprocalOverlap"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"annotationOverlap"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/661c0ea4.8feadb43.js b/assets/js/661c0ea4.8feadb43.js deleted file mode 100644 index a9006538..00000000 --- a/assets/js/661c0ea4.8feadb43.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9664,5062],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>v});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),p=d(n),u=r,v=p["".concat(s,".").concat(u)]||p[u]||m[u]||o;return n?a.createElement(v,i(i({ref:t},c),{},{components:n})):a.createElement(v,i({ref:t},c))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[p]="string"==typeof e?e:r,i[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>p,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/revel-json",id:"version-3.14/data-sources/revel-json",title:"revel-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/revel-json.md",sourceDirName:"data-sources",slug:"/data-sources/revel-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/revel-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/revel-json.md",tags:[],version:"3.14",frontMatter:{}},s=[],d={toc:s},c="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"revel":{ \n "score":0.027\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"score"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1.0")))))}p.isMDXComponent=!0},43157:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>i,metadata:()=>s,toc:()=>d});var a=n(87462),r=(n(67294),n(3905)),o=n(54352);const i={title:"REVEL"},l=void 0,s={unversionedId:"data-sources/revel",id:"version-3.14/data-sources/revel",title:"REVEL",description:"Overview",source:"@site/versioned_docs/version-3.14/data-sources/revel.mdx",sourceDirName:"data-sources",slug:"/data-sources/revel",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/revel",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/revel.mdx",tags:[],version:"3.14",frontMatter:{title:"REVEL"},sidebar:"version-3.14/docs",previous:{title:"PhyloP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/phylop"},next:{title:"Splice AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/splice-ai"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"CSV File",id:"csv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:d},p="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"REVEL is an ensemble method for predicting the pathogenicity of missense variants based on a combination of scores from 13 individual tools: MutPred, FATHMM v2.3, VEST 3.0, PolyPhen-2, SIFT, PROVEAN, MutationAssessor, MutationTaster, LRT, GERP++, SiPhy, phyloP, and phastCons."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Ioannidis, N. M. et al. REVEL: An Ensemble Method for Predicting the Pathogenicity of Rare Missense Variants. ",(0,r.kt)("em",{parentName:"p"},"The American Journal of Human Genetics")," ",(0,r.kt)("strong",{parentName:"p"},"99"),", 877-885 (2016). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1016/j.ajhg.2016.08.016"},"https://doi.org/10.1016/j.ajhg.2016.08.016")))),(0,r.kt)("h2",{id:"csv-file"},"CSV File"),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr,hg19_pos,grch38_pos,ref,alt,aaref,aaalt,REVEL\n1,35142,35142,G,A,T,M,0.027\n1,35142,35142,G,C,T,R,0.035\n1,35142,35142,G,T,T,K,0.043\n1,35143,35143,T,A,T,S,0.018\n1,35143,35143,T,C,T,A,0.034\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the CSV file, we're mainly interested in the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"chr")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"hg19_pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"grch38_pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"ref")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"alt")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"REVEL"))),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Sorting")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Since the input file contains positions for both GRCh37 and GRCh38, we split it into two ",(0,r.kt)("strong",{parentName:"p"},"TSV")," files (for the sake of better readability) with identical format. The positions for GRCh37 were sorted but not for GRCh38. So we re-sort the variants by position in the GRCh38 file."))),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Conflicting Scores")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"When there are multiple scores available for the same variant (i.e. the same position with the same alternative allele), we pick the highest score."))),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://sites.google.com/site/revelgenomics/downloads"},"https://sites.google.com/site/revelgenomics/downloads")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(o.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/666ea911.15e873b8.js b/assets/js/666ea911.15e873b8.js new file mode 100644 index 00000000..ab13e574 --- /dev/null +++ b/assets/js/666ea911.15e873b8.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6635],{3905:(e,t,n)=>{n.d(t,{Zo:()=>u,kt:()=>f});var r=n(7294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function c(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function o(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var c=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var s=r.createContext({}),l=function(e){var t=r.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},u=function(e){var t=l(e.components);return r.createElement(s.Provider,{value:t},e.children)},p="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,c=e.originalType,s=e.parentName,u=i(e,["components","mdxType","originalType","parentName"]),p=l(n),d=a,f=p["".concat(s,".").concat(d)]||p[d]||m[d]||c;return n?r.createElement(f,o(o({ref:t},u),{},{components:n})):r.createElement(f,o({ref:t},u))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var c=n.length,o=new Array(c);o[0]=d;var i={};for(var s in t)hasOwnProperty.call(t,s)&&(i[s]=t[s]);i.originalType=e,i[p]="string"==typeof e?e:a,o[1]=i;for(var l=2;l{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>c,metadata:()=>i,toc:()=>s});var r=n(7462),a=(n(7294),n(3905));const c={},o=void 0,i={unversionedId:"data-sources/cosmic-cancer-gene-census",id:"data-sources/cosmic-cancer-gene-census",title:"cosmic-cancer-gene-census",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/cosmic-cancer-gene-census.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-cancer-gene-census",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-cancer-gene-census",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/cosmic-cancer-gene-census.md",tags:[],version:"current",frontMatter:{}},s=[],l={toc:s},u="wrapper";function p(e){let{components:t,...n}=e;return(0,a.kt)(u,(0,r.Z)({},l,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},' {\n "name": "PRDM16",\n "hgncId": 14000,\n "ncbiGeneId": "63976",\n "ensemblGeneId": "ENSG00000142611",\n "cosmic": {\n "roleInCancer": [\n "oncogene",\n "fusion"\n ]\n }\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"roleInCancer"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Possible roles in caner")))))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/666ea911.7fc337cd.js b/assets/js/666ea911.7fc337cd.js deleted file mode 100644 index 480477ff..00000000 --- a/assets/js/666ea911.7fc337cd.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6635],{3905:(e,t,n)=>{n.d(t,{Zo:()=>u,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function c(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function o(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var c=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var s=r.createContext({}),l=function(e){var t=r.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},u=function(e){var t=l(e.components);return r.createElement(s.Provider,{value:t},e.children)},p="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,c=e.originalType,s=e.parentName,u=i(e,["components","mdxType","originalType","parentName"]),p=l(n),d=a,f=p["".concat(s,".").concat(d)]||p[d]||m[d]||c;return n?r.createElement(f,o(o({ref:t},u),{},{components:n})):r.createElement(f,o({ref:t},u))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var c=n.length,o=new Array(c);o[0]=d;var i={};for(var s in t)hasOwnProperty.call(t,s)&&(i[s]=t[s]);i.originalType=e,i[p]="string"==typeof e?e:a,o[1]=i;for(var l=2;l{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>c,metadata:()=>i,toc:()=>s});var r=n(87462),a=(n(67294),n(3905));const c={},o=void 0,i={unversionedId:"data-sources/cosmic-cancer-gene-census",id:"data-sources/cosmic-cancer-gene-census",title:"cosmic-cancer-gene-census",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/cosmic-cancer-gene-census.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-cancer-gene-census",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-cancer-gene-census",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/cosmic-cancer-gene-census.md",tags:[],version:"current",frontMatter:{}},s=[],l={toc:s},u="wrapper";function p(e){let{components:t,...n}=e;return(0,a.kt)(u,(0,r.Z)({},l,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},' {\n "name": "PRDM16",\n "hgncId": 14000,\n "ncbiGeneId": "63976",\n "ensemblGeneId": "ENSG00000142611",\n "cosmic": {\n "roleInCancer": [\n "oncogene",\n "fusion"\n ]\n }\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"roleInCancer"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Possible roles in caner")))))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/680e83ab.274d065b.js b/assets/js/680e83ab.274d065b.js deleted file mode 100644 index dc691208..00000000 --- a/assets/js/680e83ab.274d065b.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7857],{3905:(e,t,n)=>{n.d(t,{Zo:()=>m,kt:()=>f});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),p=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},m=function(e){var t=p(e.components);return a.createElement(s.Provider,{value:t},e.children)},c="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,m=l(e,["components","mdxType","originalType","parentName"]),c=p(n),u=r,f=c["".concat(s,".").concat(u)]||c[u]||d[u]||i;return n?a.createElement(f,o(o({ref:t},m),{},{components:n})):a.createElement(f,o({ref:t},m))}));function f(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[c]="string"==typeof e?e:r,o[1]=l;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>c,frontMatter:()=>i,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/omim-json",id:"version-3.14/data-sources/omim-json",title:"omim-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/omim-json.md",sourceDirName:"data-sources",slug:"/data-sources/omim-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/omim-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/omim-json.md",tags:[],version:"3.14",frontMatter:{}},s=[{value:"Phenotype",id:"phenotype",children:[],level:4},{value:"Mapping",id:"mapping",children:[],level:4},{value:"Inheritance",id:"inheritance",children:[],level:4},{value:"Comments",id:"comments",children:[],level:4}],p={toc:s},m="wrapper";function c(e){let{components:t,...n}=e;return(0,r.kt)(m,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"omim":[ \n { \n "mimNumber":600678,\n "geneName":"MutS, E. coli, homolog of, 6",\n "description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",\n "phenotypes":[ \n { \n "mimNumber":614350,\n "phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",\n "description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal dominant"\n ]\n },\n { \n "mimNumber":608089,\n "phenotype":"Endometrial cancer, familial",\n "mapping":"molecular basis of the disorder is known"\n },\n { \n "mimNumber":276300,\n "phenotype":"Mismatch repair cancer syndrome",\n "description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal recessive"\n ],\n "comments" : [\n "contribute to susceptibility to multifactorial disorders or to susceptibility to infection",\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM ID for gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneName"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene name")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"left"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#phenotype"},"Phenotype entry below"))))),(0,r.kt)("h4",{id:"phenotype"},"Phenotype"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mapping"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#mapping"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"inheritance"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#inheritance"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"comments"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#comments"},"possible values below"))))),(0,r.kt)("h4",{id:"mapping"},"Mapping"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"disorder was positioned by mapping of the wild type gene"),(0,r.kt)("li",{parentName:"ol"},"disease phenotype itself was mapped"),(0,r.kt)("li",{parentName:"ol"},"molecular basis of the disorder is known"),(0,r.kt)("li",{parentName:"ol"},"disorder is a chromosome deletion or duplication syndrome")),(0,r.kt)("h4",{id:"inheritance"},"Inheritance"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"autosomal recessive"),(0,r.kt)("li",{parentName:"ul"},"autosomal dominant")),(0,r.kt)("h4",{id:"comments"},"Comments"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"contributes to the susceptibility to multifactorial disorders"),(0,r.kt)("li",{parentName:"ul"},"variations that lead to apparently abnormal laboratory test values"),(0,r.kt)("li",{parentName:"ul"},"unconfirmed mapping")))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/6945.1c8247b7.js b/assets/js/6945.1c8247b7.js new file mode 100644 index 00000000..2a172f9d --- /dev/null +++ b/assets/js/6945.1c8247b7.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6945],{6945:(n,a,e)=>{e.r(a)}}]); \ No newline at end of file diff --git a/assets/js/6945.b80f5527.js b/assets/js/6945.b80f5527.js deleted file mode 100644 index 67cf3e1f..00000000 --- a/assets/js/6945.b80f5527.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6945],{46945:(n,a,e)=>{e.r(a)}}]); \ No newline at end of file diff --git a/assets/js/6a25488b.cc64bbb2.js b/assets/js/6a25488b.cc64bbb2.js deleted file mode 100644 index 0b244719..00000000 --- a/assets/js/6a25488b.cc64bbb2.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6923],{3905:(e,t,n)=>{n.d(t,{Zo:()=>m,kt:()=>f});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),p=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},m=function(e){var t=p(e.components);return a.createElement(s.Provider,{value:t},e.children)},c="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,m=l(e,["components","mdxType","originalType","parentName"]),c=p(n),u=r,f=c["".concat(s,".").concat(u)]||c[u]||d[u]||i;return n?a.createElement(f,o(o({ref:t},m),{},{components:n})):a.createElement(f,o({ref:t},m))}));function f(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[c]="string"==typeof e?e:r,o[1]=l;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>c,frontMatter:()=>i,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/omim-json",id:"version-3.18/data-sources/omim-json",title:"omim-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/omim-json.md",sourceDirName:"data-sources",slug:"/data-sources/omim-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/omim-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/omim-json.md",tags:[],version:"3.18",frontMatter:{}},s=[{value:"Phenotype",id:"phenotype",children:[],level:4},{value:"Mapping",id:"mapping",children:[],level:4},{value:"Inheritance",id:"inheritance",children:[],level:4},{value:"Comments",id:"comments",children:[],level:4}],p={toc:s},m="wrapper";function c(e){let{components:t,...n}=e;return(0,r.kt)(m,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"omim":[ \n { \n "mimNumber":600678,\n "geneName":"MutS, E. coli, homolog of, 6",\n "description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",\n "phenotypes":[ \n { \n "mimNumber":614350,\n "phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",\n "description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal dominant"\n ]\n },\n { \n "mimNumber":608089,\n "phenotype":"Endometrial cancer, familial",\n "mapping":"molecular basis of the disorder is known"\n },\n { \n "mimNumber":276300,\n "phenotype":"Mismatch repair cancer syndrome",\n "description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal recessive"\n ],\n "comments" : [\n "contribute to susceptibility to multifactorial disorders or to susceptibility to infection",\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM ID for gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneName"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene name")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"left"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#phenotype"},"Phenotype entry below"))))),(0,r.kt)("h4",{id:"phenotype"},"Phenotype"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mapping"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#mapping"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"inheritance"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#inheritance"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"comments"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#comments"},"possible values below"))))),(0,r.kt)("h4",{id:"mapping"},"Mapping"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"disorder was positioned by mapping of the wild type gene"),(0,r.kt)("li",{parentName:"ol"},"disease phenotype itself was mapped"),(0,r.kt)("li",{parentName:"ol"},"molecular basis of the disorder is known"),(0,r.kt)("li",{parentName:"ol"},"disorder is a chromosome deletion or duplication syndrome")),(0,r.kt)("h4",{id:"inheritance"},"Inheritance"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"autosomal recessive"),(0,r.kt)("li",{parentName:"ul"},"autosomal dominant")),(0,r.kt)("h4",{id:"comments"},"Comments"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"contributes to the susceptibility to multifactorial disorders"),(0,r.kt)("li",{parentName:"ul"},"variations that lead to apparently abnormal laboratory test values"),(0,r.kt)("li",{parentName:"ul"},"unconfirmed mapping")))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/6ad9fc4a.3ecf05ae.js b/assets/js/6ad9fc4a.3ecf05ae.js deleted file mode 100644 index 23f653c6..00000000 --- a/assets/js/6ad9fc4a.3ecf05ae.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9962,1155],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>h});var o=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function a(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);t&&(o=o.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,o)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);for(o=0;o=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=o.createContext({}),p=function(e){var t=o.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=p(e.components);return o.createElement(s.Provider,{value:t},e.children)},d="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return o.createElement(o.Fragment,{},t)}},m=o.forwardRef((function(e,t){var n=e.components,r=e.mdxType,a=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),d=p(n),m=r,h=d["".concat(s,".").concat(m)]||d[m]||u[m]||a;return n?o.createElement(h,i(i({ref:t},c),{},{components:n})):o.createElement(h,i({ref:t},c))}));function h(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var a=n.length,i=new Array(a);i[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[d]="string"==typeof e?e:r,i[1]=l;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>d,frontMatter:()=>a,metadata:()=>l,toc:()=>s});var o=n(87462),r=(n(67294),n(3905));const a={},i=void 0,l={unversionedId:"data-sources/phylop-json",id:"version-3.14/data-sources/phylop-json",title:"phylop-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/phylop-json.md",sourceDirName:"data-sources",slug:"/data-sources/phylop-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/phylop-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/phylop-json.md",tags:[],version:"3.14",frontMatter:{}},s=[],p={toc:s},c="wrapper";function d(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,o.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"variants":[\n {\n "vid":"2:48010488:A",\n "chromosome":"chr2",\n "begin":48010488,\n "end":48010488,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "phylopScore":0.459\n }\n] \n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phylopScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: -14.08 to 6.424")))))}d.isMDXComponent=!0},99636:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>i,metadata:()=>s,toc:()=>p});var o=n(87462),r=(n(67294),n(3905)),a=n(13e3);const i={title:"PhyloP"},l=void 0,s={unversionedId:"data-sources/phylop",id:"version-3.14/data-sources/phylop",title:"PhyloP",description:"Overview",source:"@site/versioned_docs/version-3.14/data-sources/phylop.mdx",sourceDirName:"data-sources",slug:"/data-sources/phylop",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/phylop",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/phylop.mdx",tags:[],version:"3.14",frontMatter:{title:"PhyloP"},sidebar:"version-3.14/docs",previous:{title:"Primate AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/primate-ai"},next:{title:"REVEL",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/revel"}},p=[{value:"Overview",id:"overview",children:[],level:2},{value:"WigFix File",id:"wigfix-file",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:p},d="wrapper";function u(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,o.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"PhyloP (phylogenetic p-values) conservation scores are obtained from the ","[PHAST package]"," (",(0,r.kt)("a",{parentName:"p",href:"http://compgen.bscb.cornell.edu/phast/"},"http://compgen.bscb.cornell.edu/phast/"),") for multiple alignments of vertebrate genomes to the human genome. For GRCh38, the multiple alignments are against 19 mammals and for GRCh37, it is against 45 vertebrate genomes."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Siepel A, Bejerano G, Pedersen JS, Hinrichs AS, Hou M, Rosenbloom K, Clawson H, Spieth J, Hillier LW, Richards S, et al. Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. ",(0,r.kt)("strong",{parentName:"p"},"Genome Res. 2005")," Aug;15(8):1034-50. (",(0,r.kt)("a",{parentName:"p",href:"http://www.genome.org/cgi/doi/10.1101/gr.3715005"},"http://www.genome.org/cgi/doi/10.1101/gr.3715005"),")"))),(0,r.kt)("h2",{id:"wigfix-file"},"WigFix File"),(0,r.kt)("p",null,"The data is provided in WigFix files which is a text file that provides conservation scores for contiguous intervals in the following format:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"fixedStep chrom=chr1 start=10918 step=1\n0.064\n0.058\n0.064\n0.058\n0.064\n0.064\nfixedStep chrom=chr1 start=34045 step=1\n0.111\n0.100\n0.111\n0.111\n0.100\n0.111\n0.111\n0.111\n0.100\n0.111\n-1.636\n")),(0,r.kt)("p",null,"We convert them to binary files with indexes for fast query. Note that these are scores for genomic positions and are reported only for SNVs."),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,"GRCh37: ",(0,r.kt)("a",{parentName:"p",href:"http://hgdownload.cse.ucsc.edu/goldenpath/hg19/phyloP46way/vertebrate/"},"http://hgdownload.cse.ucsc.edu/goldenpath/hg19/phyloP46way/vertebrate/")),(0,r.kt)("p",null,"GRCh38: ",(0,r.kt)("a",{parentName:"p",href:"http://hgdownload.cse.ucsc.edu/goldenPath/hg38/phyloP20way/"},"http://hgdownload.cse.ucsc.edu/goldenPath/hg38/phyloP20way/")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)("p",null,"Unlike other supplemetary datasources, phyloP scores are reported in the variants section."),(0,r.kt)(a.default,{mdxType:"JSON"}))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/6b535d84.2d58d164.js b/assets/js/6b535d84.2d58d164.js deleted file mode 100644 index a23be838..00000000 --- a/assets/js/6b535d84.2d58d164.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1419],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>v});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},p=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,p=o(e,["components","mdxType","originalType","parentName"]),m=c(n),u=r,v=m["".concat(s,".").concat(u)]||m[u]||d[u]||i;return n?a.createElement(v,l(l({ref:t},p),{},{components:n})):a.createElement(v,l({ref:t},p))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,l=new Array(i);l[0]=u;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[m]="string"==typeof e?e:r,l[1]=o;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>i,metadata:()=>o,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const i={title:"Variant IDs"},l=void 0,o={unversionedId:"core-functionality/variant-ids",id:"version-3.16/core-functionality/variant-ids",title:"Variant IDs",description:"Overview",source:"@site/versioned_docs/version-3.16/core-functionality/variant-ids.md",sourceDirName:"core-functionality",slug:"/core-functionality/variant-ids",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/core-functionality/variant-ids",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/core-functionality/variant-ids.md",tags:[],version:"3.16",frontMatter:{title:"Variant IDs"},sidebar:"version-3.16/docs",previous:{title:"MNV Recomposition",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/core-functionality/mnv-recomposition"},next:{title:"Jasix",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/utilities/jasix"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF Examples",id:"vcf-examples",children:[],level:3},{value:"Format",id:"format",children:[],level:3},{value:"VID Examples",id:"vid-examples",children:[],level:3}],level:2},{value:"Translocation Breakends",id:"translocation-breakends",children:[{value:"VCF Example",id:"vcf-example",children:[],level:3},{value:"Format",id:"format-1",children:[],level:3},{value:"VID Example",id:"vid-example",children:[],level:3}],level:2},{value:"All Other Structural Variants",id:"all-other-structural-variants",children:[{value:"VCF Examples",id:"vcf-examples-1",children:[],level:3},{value:"Format",id:"format-2",children:[],level:3},{value:"VID Examples",id:"vid-examples-1",children:[],level:3}],level:2}],c={toc:s},p="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"Many downstream tools use a variant identifier to store annotation results. We've standardized on using variant identifiers (VIDs) that originated from the notation used by the Broad Institute."),(0,r.kt)("p",null,"The Broad VID scheme is not only simple, but it has the advantage that a user could create a bare bones VCF entry from the information captured in the identifier. One of the limitations of the Broad VID scheme is that it does not define how to handle structural variants. Our VID scheme attempts to fill that gap."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Conventions")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("ul",{parentName:"div"},(0,r.kt)("li",{parentName:"ul"},"all chromosomes use Ensembl style notation (i.e. 22 instead of chr22)"),(0,r.kt)("li",{parentName:"ul"},"for a reference variant (i.e. no alt allele), replace the period (.) with the reference base"),(0,r.kt)("li",{parentName:"ul"},"padding bases are used, neither the reference nor alternate allele can be empty"),(0,r.kt)("li",{parentName:"ul"},"some large variant callers lazily output N for the reference allele. If this is the case, replace it with the true reference base")))),(0,r.kt)("h2",{id:"small-variants"},"Small Variants"),(0,r.kt)("h3",{id:"vcf-examples"},"VCF Examples"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 66507 . T A 184.45 PASS .\nchr1 66521 . T TATATA 144.53 PASS .\nchr1 66572 . GTA G,GTACTATATATTATA 45.45 PASS .\n")),(0,r.kt)("h3",{id:"format"},"Format"),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},"chromosome"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"reference allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"alternate allele")),(0,r.kt)("h3",{id:"vid-examples"},"VID Examples"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"1-66507-T-A"),(0,r.kt)("li",{parentName:"ul"},"1-66521-T-TATATA"),(0,r.kt)("li",{parentName:"ul"},"1-66572-GTA-G"),(0,r.kt)("li",{parentName:"ul"},"1-66572-G-GTACTATATATTA")),(0,r.kt)("h2",{id:"translocation-breakends"},"Translocation Breakends"),(0,r.kt)("h3",{id:"vcf-example"},"VCF Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 2617277 . A AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[ . PASS SVTYPE=BND\n")),(0,r.kt)("h3",{id:"format-1"},"Format"),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},"chromosome"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"reference allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"alternate allele")),(0,r.kt)("h3",{id:"vid-example"},"VID Example"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"1-2617277-A-AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[")),(0,r.kt)("h2",{id:"all-other-structural-variants"},"All Other Structural Variants"),(0,r.kt)("h3",{id:"vcf-examples-1"},"VCF Examples"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 1000 . G . PASS END=3001000;SVTYPE=ROH\nchr1 1350082 . G . PASS END=1351320;SVTYPE=DEL\nchr1 1477854 . C . PASS END=1477984;SVTYPE=DUP\nchr1 1477968 . T . PASS END=1477968;SVTYPE=INS\nchr1 1715898 . N . PASS SVTYPE=CNV;END=1750149\nchr1 2650426 . N . PASS SVTYPE=CNV;END=2653074\nchr2 321682 . T . PASS SVTYPE=INV;END=421681\nchr20 2633403 . G . PASS END=2633421\n")),(0,r.kt)("h3",{id:"format-2"},"Format"),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},"chromosome"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"end position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"reference allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"alternate allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"SVTYPE")),(0,r.kt)("h3",{id:"vid-examples-1"},"VID Examples"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"1-1000-3001000-G-","<","ROH",">","-ROH"),(0,r.kt)("li",{parentName:"ul"},"1-1350082-1351320-G-","<","DEL",">","-DEL"),(0,r.kt)("li",{parentName:"ul"},"1-1477854-1477984-C-","<","DUP:TANDEM",">","-DUP"),(0,r.kt)("li",{parentName:"ul"},"1-1477968-1477968-T-","<","INS",">","-INS"),(0,r.kt)("li",{parentName:"ul"},"1-1715898-1750149-A-","<","DUP",">","-CNV ",(0,r.kt)("strong",{parentName:"li"},"(replace the N with A)")),(0,r.kt)("li",{parentName:"ul"},"1-2650426-2653074-N-","<","DEL",">","-CNV ",(0,r.kt)("strong",{parentName:"li"},"(keep the N)")),(0,r.kt)("li",{parentName:"ul"},"2-321682-421681-T-","<","INV",">","-INV"),(0,r.kt)("li",{parentName:"ul"},"20-2633403-2633421-G-","<","STR2",">","-STR")))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/6bd48569.e2c5cf15.js b/assets/js/6bd48569.e2c5cf15.js deleted file mode 100644 index 2acdf96b..00000000 --- a/assets/js/6bd48569.e2c5cf15.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9311,2439,7043],{3905:(e,t,n)=>{n.d(t,{Zo:()=>u,kt:()=>N});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var p=a.createContext({}),s=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},u=function(e){var t=s(e.components);return a.createElement(p.Provider,{value:t},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},c=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,l=e.originalType,p=e.parentName,u=i(e,["components","mdxType","originalType","parentName"]),m=s(n),c=r,N=m["".concat(p,".").concat(c)]||m[c]||d[c]||l;return n?a.createElement(N,o(o({ref:t},u),{},{components:n})):a.createElement(N,o({ref:t},u))}));function N(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=n.length,o=new Array(l);o[0]=c;var i={};for(var p in t)hasOwnProperty.call(t,p)&&(i[p]=t[p]);i.originalType=e,i[m]="string"==typeof e?e:r,o[1]=i;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>m,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/1000Genomes-snv-json",id:"version-3.16/data-sources/1000Genomes-snv-json",title:"1000Genomes-snv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/1000Genomes-snv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-snv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/1000Genomes-snv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/1000Genomes-snv-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],s={toc:p},u="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(u,(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":{\n "allAf":0.200879,\n "afrAf":0.210287,\n "amrAf":0.139769,\n "easAf":0.275794,\n "eurAf":0.181909,\n "sasAf":0.173824,\n "allAn":5008,\n "afrAn":1322,\n "amrAn":694,\n "easAn":1008,\n "eurAn":1006,\n "sasAn":978,\n "allAc":1006,\n "afrAc":278,\n "amrAc":97,\n "easAc":278,\n "eurAc":183,\n "sasAc":170\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the African super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the African super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the Ad Mixed American super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the Ad Mixed American super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the East Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the East Asian super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the European super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the European super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the South Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the South Asian super population. Non-zero integer.")))))}m.isMDXComponent=!0},74146:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>m,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/1000Genomes-sv-json",id:"version-3.16/data-sources/1000Genomes-sv-json",title:"1000Genomes-sv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/1000Genomes-sv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-sv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/1000Genomes-sv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/1000Genomes-sv-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],s={toc:p},u="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(u,(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":[\n {\n "chromosome":"1",\n "begin":1595369,\n "end":1612441,\n "variantType": "copy_number_variation",\n "id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",\n "allAn": 5008,\n "allAc": 2702,\n "allAf": 0.539537,\n "afrAf": 0.6052,\n "amrAf": 0.3675,\n "eurAf": 0.5357,\n "easAf": 0.5368,\n "sasAf": 0.5797,\n "reciprocalOverlap": 0.07555\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"range: 0 - 1.")))))}m.isMDXComponent=!0},58763:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>p,default:()=>c,frontMatter:()=>i,metadata:()=>s,toc:()=>u});var a=n(87462),r=(n(67294),n(3905)),l=n(43853),o=n(74146);const i={title:"1000 Genomes"},p=void 0,s={unversionedId:"data-sources/1000Genomes",id:"version-3.16/data-sources/1000Genomes",title:"1000 Genomes",description:"Overview",source:"@site/versioned_docs/version-3.16/data-sources/1000Genomes.mdx",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/1000Genomes",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/1000Genomes.mdx",tags:[],version:"3.16",frontMatter:{title:"1000 Genomes"},sidebar:"version-3.16/docs",previous:{title:"Annotating COVID-19",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/introduction/covid19"},next:{title:"Amino Acid Conservation",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/amino-acid-conservation"}},u=[{value:"Overview",id:"overview",children:[],level:2},{value:"Populations",id:"populations",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF File Parsing",id:"vcf-file-parsing",children:[{value:"Conflict Resolution",id:"conflict-resolution",children:[],level:4}],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2},{value:"Structural Variants",id:"structural-variants",children:[{value:"VCF File Parsing",id:"vcf-file-parsing-1",children:[],level:3},{value:"Converting VCF svTypes to SO sequence alterations",id:"converting-vcf-svtypes-to-so-sequence-alterations",children:[{value:"Exceptions",id:"exceptions",children:[],level:4}],level:3}],level:2},{value:"JSON Output",id:"json-output-1",children:[],level:2}],m={toc:u},d="wrapper";function c(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"The goal of the 1000 Genomes Project was to find most genetic variants with frequencies of at least 1% in the populations studied. It was the first project to sequence the genomes of a large number of people, to provide a comprehensive resource on human genetic variation. Data from the 1000 Genomes Project was quickly made available to the worldwide scientific community through freely accessible public databases."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Sudmant, P., Rausch, T., Gardner, E. et al. An integrated map of structural variation in 2,504 human genomes. ",(0,r.kt)("em",{parentName:"p"},"Nature 526"),", 75\u201381 (2015). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1038/nature15394"},"https://doi.org/10.1038/nature15394")))),(0,r.kt)("h2",{id:"populations"},"Populations"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"The super population membership can be found here: (",(0,r.kt)("a",{parentName:"li",href:"http://www.1000genomes.org/category/population/"},"http://www.1000genomes.org/category/population/"),")"),(0,r.kt)("li",{parentName:"ul"},"We want to capture the allele frequencies for all 26 populations as well as the 5 super populations and the total population.")),(0,r.kt)("h2",{id:"small-variants"},"Small Variants"),(0,r.kt)("h3",{id:"vcf-file-parsing"},"VCF File Parsing"),(0,r.kt)("p",null,"The original VCF files come with allele frequency fields (e.g. ALL_AF, AMR_AF) but we recompute them using allele counts and allele numbers in order to get 6 digit precision. The allele counts and allele numbers (e.g. AMR_AC, AMR_AN) are not expressed in the INFO field. Instead the genotypes need to be parsed to compute that information. Our team converted the original data to VCF entries with allele counts and allele numbers like the following."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 15274 rs62636497 A G,T 100 PASS AC=1739,3210;AF=0.347244,0.640974;AN=5008;NS=2504;DP=23255;EAS_AF=0.4812,0.5188;AMR_AF=0.2752,0.7205;AFR_AF=0.323,0.6369;EUR_AF=0.2922,0.7078;SAS_AF=0.3497,0.6472;AA=g|||;VT=SNP;MULTI_ALLELIC;EAS_AN=1008;EAS_AC=485,523;EUR_AN=1006;EUR_AC=294,712;AFR_AN=1322;AFR_AC=427,842;AMR_AN=694;AMR_AC=191,500;SAS_AN=978;SAS_AC=342,633\n")),(0,r.kt)("p",null,"The ancestral allele, if it exists, is the first value in the pipe separated AA fields (the Indel specific REF, ALT, IndelType fields are ignored)."),(0,r.kt)("p",null,"We parse the VCF file and extract the following fields from INFO:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"AA"),(0,r.kt)("li",{parentName:"ul"},"AC"),(0,r.kt)("li",{parentName:"ul"},"AN"),(0,r.kt)("li",{parentName:"ul"},"EAS_AN"),(0,r.kt)("li",{parentName:"ul"},"AMR_AN"),(0,r.kt)("li",{parentName:"ul"},"AFR_AN"),(0,r.kt)("li",{parentName:"ul"},"EUR_AN"),(0,r.kt)("li",{parentName:"ul"},"SAS_AN"),(0,r.kt)("li",{parentName:"ul"},"EAS_AC"),(0,r.kt)("li",{parentName:"ul"},"AMR_AC"),(0,r.kt)("li",{parentName:"ul"},"AFR_AC"),(0,r.kt)("li",{parentName:"ul"},"EUR_AC"),(0,r.kt)("li",{parentName:"ul"},"SAS_AC")),(0,r.kt)("h4",{id:"conflict-resolution"},"Conflict Resolution"),(0,r.kt)("p",null,"We have observed conflicting allele frequency information in the source. Take the following example:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 20505705 rs35377696 C CTCTG,CTG,CTGTG 100 PASS AC=46,1513,152;AF=0.0091853,0.302117,0.0303514;\n1 20505705 rs35377696 C CTG 100 PASS AC=4;AF=0.000798722;\n")),(0,r.kt)("p",null,"That is, the variant 1-20505705-C-CTG has conflicting entries. To get an idea of how frequently we observe this, here is a table summarizing ChrX and all chromosomes. Note that almost all such entries are found in ChrX."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"center"},"Chromosome"),(0,r.kt)("th",{parentName:"tr",align:"left"},"#"," of alleles"),(0,r.kt)("th",{parentName:"tr",align:"center"},"#"," of conflicting alleles"),(0,r.kt)("th",{parentName:"tr",align:"left"},"percentage"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"chrX"),(0,r.kt)("td",{parentName:"tr",align:"left"},"834800"),(0,r.kt)("td",{parentName:"tr",align:"center"},"2733"),(0,r.kt)("td",{parentName:"tr",align:"left"},"0.33%")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"Total"),(0,r.kt)("td",{parentName:"tr",align:"left"},"21413098"),(0,r.kt)("td",{parentName:"tr",align:"center"},"2743"),(0,r.kt)("td",{parentName:"tr",align:"left"},"0.013%")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Currently"),", we removed the allele frequency of the conflicting allele (i.e., insertion TG in the example) but keep allele frequencies of all other alleles in the VCF line."),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Potential Alternate Solutions")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"Remove all alleles that are contained in the vcf lines which have conflicting allele. (Recommended by 1000 genome group Holly Zheng-Bradley, 7/29/2015)"),(0,r.kt)("li",{parentName:"ul"},"Recalculate the allele frequency for the conflicting allele."),(0,r.kt)("li",{parentName:"ul"},"Pick the allele frequency that has the highest data support.")),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/"},"GRCh37"),"\n",(0,r.kt)("a",{parentName:"p",href:"http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/"},"GRCh38")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(l.default,{mdxType:"JSONSNV"}),(0,r.kt)("h2",{id:"structural-variants"},"Structural Variants"),(0,r.kt)("h3",{id:"vcf-file-parsing-1"},"VCF File Parsing"),(0,r.kt)("p",null,"The VCF files contain entries like the following:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103\n22 16050654 esv3647175;esv3647176;esv3647177;esv3647178 A ,,, 100 PASS AC=9,87,599,20;AF=0.00179712,0.0173722,0.119609,0.00399361;AN=5008;CS=DUP_gs;END=16063474;NS=2504;SVTYPE=CNV;DP=22545;EAS_AF=0.001,0.0169,0.2361,0.0099;AMR_AF=0,0.0101,0.219,0.0072;AFR_AF=0.0061,0.0363,0.0053,0;EUR_AF=0,0.007,0.0944,0.003;SAS_AF=0,0.0082,0.1094,0.002;VT=SV GT 3|0 0|0 0|0 0|0 0|0 0|0 0|4\n")),(0,r.kt)("p",null,"Please note that, CNVs are allele-specific. For example, HG00096 is effectively copy number 4, which would be a net gain on chr22."),(0,r.kt)("p",null,"1000 Genomes contains 5 types of structural variants:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"CNV"),(0,r.kt)("li",{parentName:"ul"},"DEL"),(0,r.kt)("li",{parentName:"ul"},"DUP"),(0,r.kt)("li",{parentName:"ul"},"INS"),(0,r.kt)("li",{parentName:"ul"},"INV")),(0,r.kt)("p",null,"Since data of 1000 genomes is provided in VCF format, we assume that the coordinates follow the vcf format, i.e., there is a padding base for symbolic alleles. So all the interval can be interpreted as ","[BEGIN+1, END]",".\nSimilarly, for all other variant types except insertion, END is far larger than BEGIN. The distribution of BEGIN and END for insertions is summarized below."),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Insertion issues")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"END = BEGIN for 6/165"),(0,r.kt)("li",{parentName:"ul"},"END = BEGIN+2 for 93/165"),(0,r.kt)("li",{parentName:"ul"},"END = BEGIN+3 for 11/165"),(0,r.kt)("li",{parentName:"ul"},"END = BEGIN+4 for 11/165"),(0,r.kt)("li",{parentName:"ul"},"END \u2013 BEGIN range from 5 to 1156 for others.")),(0,r.kt)("h3",{id:"converting-vcf-svtypes-to-so-sequence-alterations"},"Converting VCF svTypes to SO sequence alterations"),(0,r.kt)("p",null,"The svType will be captured in our JSON file under the ",(0,r.kt)("a",{parentName:"p",href:"http://www.sequenceontology.org/browser/current_svn/term/SO:0001059"},"sequenceAlteration")," key. Here's the translation we'll use according to svType in 1000 Genomes."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"svType"),(0,r.kt)("th",{parentName:"tr",align:null},"Alternative Alleles contain "),(0,r.kt)("th",{parentName:"tr",align:null},"sequenceAlteration"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"ALU"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"DUP"),(0,r.kt)("td",{parentName:"tr",align:null},"TRUE"),(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_gain")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"CNV"),(0,r.kt)("td",{parentName:"tr",align:null},"TRUE"),(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_gain (observed_gains >0 and observed_losses =0) ",(0,r.kt)("br",null),"copy_number_loss\xa0(observed_gains = 0 and observed_losses > 0) ",(0,r.kt)("br",null),"copy_number_variation (otherwise)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"DEL"),(0,r.kt)("td",{parentName:"tr",align:null},"TRUE"),(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_loss")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"LINE1"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"SVA"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"INV"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"inversion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"INS"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"insertion")))),(0,r.kt)("h4",{id:"exceptions"},"Exceptions"),(0,r.kt)("p",null,(0,r.kt)("em",{parentName:"p"},"We discard structural variants without END")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103\n21 9495848 esv3646347 A 100 PASS AC=1543;AF=0.308107;AN=5008;CS=L1_umary;MEINFO=LINE1,5669,6005,+;NS=2504;SVLEN=336;SVTYPE=LINE1;TSD=null;DP=20015;EAS_AF=0.3125;AMR_AF=0.2911;AFR_AF=0.3026;EUR_AF=0.2922;SAS_AF=0.3395;VT=SV GT 0|0 1|1 1|0 0|1 1|0 1|0 0|0\n")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"CNVs in chrY")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"No other types of structural variants exist in chrY"),(0,r.kt)("li",{parentName:"ul"},'Since copy number is provided in genotype field, we directly parse the copy number from "CN" field.'),(0,r.kt)("li",{parentName:"ul"},"For most CNVs in chrY, the reference copy number is 1, but the refence number for CNVs in segmental duplication sites is 2 ("," in the 2nd example). All segmental duplication calls have identifiers starting with GS_SD_M2.")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00101 HG00103 HG00105 HG00107 HG00108\nY 2888555 CNV_Y_2888555_3014661 T 100 PASS AC=1;AF=0.000817661;AN=1223;END=3014661;NS=1233;SVTYPE=CNV;AMR_AF=0.0000;AFR_AF=0.0000;EUR_AF=0.0000;SAS_AF=0.0019;EAS_AF=0.0000;VT=SV GT:CN:CNL:CNP:CNQ:GP:GQ:PL 0:1:-1000,0,-58.45:-1000,0,-61.55:99:0,-61.55:99:0,585 0:1:-296.36,0,-16.6:-300.46,0,-19.7:99:0,-19.7:99:0,166 0:1:-1000,0,-39.44:-1000,0,-42.54:99:0,-42.54:99:0,394\nY 6128381 GS_SD_M2_Y_6128381_6230094_Y_9650284_9752225 C , 100 PASS AC=4,2;AF=0.00327065,0.00163532;AN=1223;END=6230094;NS=1233;SVTYPE=CNV;AMR_AF=0.0029,0.0029;AFR_AF=0.0016,0.0016;EUR_AF=0.0000,0.0000;SAS_AF=0.0038,0.0000;EAS_AF=0.0000,0.0000;VT=SV;EX_TARGET GT:CN:CNL:CNP:CNQ:GP:GQ 0:2:-1000,-138.78,0,-38.53:-1000,-141.27,0,-41.33:99:0,-141.27,-41.33:99 0:2:-1000,-53.32,0,-17.85:-1000,-55.81,0,-20.64:99:0,-55.81,-20.64:99 0:2:-1000,-71.83,0,-32.5:-1000,-74.32,0,-35.29:99:0,-74.32,-35.29:99 0:2:-1000,-60.96,0,-20.29:-1000,-63.45,0,-23.08:99:0,-63.45,-23.08:99 0:2:-1000,-77.6,0,-31.45:-1000,-80.09,0,-34.24:99:0,-80.09,-34.24:99\n")),(0,r.kt)("h2",{id:"json-output-1"},"JSON Output"),(0,r.kt)(o.default,{mdxType:"JSONSV"}))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/6f0414fb.6a409972.js b/assets/js/6f0414fb.6a409972.js deleted file mode 100644 index 7b0adc30..00000000 --- a/assets/js/6f0414fb.6a409972.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5508],{3905:(e,n,t)=>{t.d(n,{Zo:()=>u,kt:()=>h});var a=t(67294);function i(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function o(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function r(e){for(var n=1;n=0||(i[t]=e[t]);return i}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(i[t]=e[t])}return i}var s=a.createContext({}),c=function(e){var n=a.useContext(s),t=n;return e&&(t="function"==typeof e?e(n):r(r({},n),e)),t},u=function(e){var n=c(e.components);return a.createElement(s.Provider,{value:n},e.children)},p="mdxType",d={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},m=a.forwardRef((function(e,n){var t=e.components,i=e.mdxType,o=e.originalType,s=e.parentName,u=l(e,["components","mdxType","originalType","parentName"]),p=c(t),m=i,h=p["".concat(s,".").concat(m)]||p[m]||d[m]||o;return t?a.createElement(h,r(r({ref:n},u),{},{components:t})):a.createElement(h,r({ref:n},u))}));function h(e,n){var t=arguments,i=n&&n.mdxType;if("string"==typeof e||i){var o=t.length,r=new Array(o);r[0]=m;var l={};for(var s in n)hasOwnProperty.call(n,s)&&(l[s]=n[s]);l.originalType=e,l[p]="string"==typeof e?e:i,r[1]=l;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>r,default:()=>p,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=t(87462),i=(t(67294),t(3905));const o={title:"Annotating COVID-19"},r=void 0,l={unversionedId:"introduction/covid19",id:"version-3.16/introduction/covid19",title:"Annotating COVID-19",description:"The Nirvana development team is mainly focused on providing annotations for the human genome. This focus allows us to maximize our resources towards understanding human health.",source:"@site/versioned_docs/version-3.16/introduction/covid19.md",sourceDirName:"introduction",slug:"/introduction/covid19",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/introduction/covid19",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/introduction/covid19.md",tags:[],version:"3.16",frontMatter:{title:"Annotating COVID-19"},sidebar:"version-3.16/docs",previous:{title:"Parsing Nirvana JSON",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/introduction/parsing-json"},next:{title:"1000 Genomes",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/1000Genomes"}},s=[{value:"Getting Nirvana",id:"getting-nirvana",children:[],level:2},{value:"Downloading the COVID-19 data files",id:"downloading-the-covid-19-data-files",children:[],level:2},{value:"Download a COVID-19 VCF file",id:"download-a-covid-19-vcf-file",children:[],level:2},{value:"Running Nirvana",id:"running-nirvana",children:[],level:2},{value:"Investigating the Results",id:"investigating-the-results",children:[],level:2}],c={toc:s},u="wrapper";function p(e){let{components:n,...t}=e;return(0,i.kt)(u,(0,a.Z)({},c,t,{components:n,mdxType:"MDXLayout"}),(0,i.kt)("p",null,"The Nirvana development team is mainly focused on providing annotations for the human genome. This focus allows us to maximize our resources towards understanding human health."),(0,i.kt)("p",null,"However, nothing in our architecture prevents us from supporting other genomes. Earlier this year, we had an opportunity to put that statement to the test - we added support for annotating the ",(0,i.kt)("strong",{parentName:"p"},"SARS-CoV-2")," genome, the virus that causes the ",(0,i.kt)("strong",{parentName:"p"},"COVID-19")," disease."),(0,i.kt)("p",null,"In addition to normal transcript annotation, we also supply:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"allele frequencies"),(0,i.kt)("li",{parentName:"ul"},"protein domains")),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"SARS-CoV-2 Galaxy Project")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The allele frequencies used by Nirvana were provided by the ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/galaxyproject/SARS-CoV-2"},"SARS-CoV-2 Galaxy Project"),". This is an international effort that provides ongoing analysis of COVID-19 using Galaxy, BioConda, and public research infrastructures."))),(0,i.kt)("h2",{id:"getting-nirvana"},"Getting Nirvana"),(0,i.kt)("p",null,"If you don't have Nirvana already, please consult our ",(0,i.kt)("a",{parentName:"p",href:"getting-started"},"Getting Started")," page first."),(0,i.kt)("h2",{id:"downloading-the-covid-19-data-files"},"Downloading the COVID-19 data files"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/Covid19Data.zip"},"a data zip file")," containing new gene models, reference, and external data sources for SARS-CoV-2:"),(0,i.kt)("p",null,"Just go to the directory that contains your Nirvana ",(0,i.kt)("inlineCode",{parentName:"p"},"Data")," directory."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"cd ~/Nirvana\ncurl -O https://illumina.github.io/NirvanaDocumentation/files/Covid19Data.zip\nunzip Covid19Data.zip\n")),(0,i.kt)("h2",{id:"download-a-covid-19-vcf-file"},"Download a COVID-19 VCF file"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/Covid19Mutations.vcf.gz"},"a COVID-19 VCF file")," you can play around with:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"curl -O https://illumina.github.io/NirvanaDocumentation/files/Covid19Mutations.vcf.gz\n")),(0,i.kt)("h2",{id:"running-nirvana"},"Running Nirvana"),(0,i.kt)("p",null,"Once you have downloaded the data sets, use the following command to annotate your VCF:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/Nirvana.dll \\\n -c Data/Cache/SARS-CoV-2/SARS-CoV-2 \\\n --sd Data/SupplementaryAnnotation/SARS-CoV-2 \\\n -r Data/References/SARS-CoV-2.ASM985889v3.dat \\\n -i Covid19Mutations.vcf.gz \\\n -o Covid19Mutations\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-c")," argument specifies the cache prefix"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"--sd")," argument specifies the supplementary annotation directory"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-r")," argument specifies the compressed reference path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input VCF path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output filename prefix")),(0,i.kt)("p",null,"When running Nirvana, performance metrics are shown as it evaluates each chromosome in the input VCF file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"---------------------------------------------------------------------------\nNirvana (c) 2020 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0\n---------------------------------------------------------------------------\n\nInitialization Time Positions/s\n---------------------------------------------------------------------------\nCache 00:00:00.0\nSA Position Scan 00:00:00.0 1763\n\nReference Preload Annotation Variants/s\n---------------------------------------------------------------------------\nNC_045512 00:00:00.0 00:00:00.1 173\n\nSummary Time Percent\n---------------------------------------------------------------------------\nInitialization 00:00:00.0 2.0 %\nPreload 00:00:00.0 0.3 %\nAnnotation 00:00:00.1 6.0 %\n\nTime: 00:00:01.5\n")),(0,i.kt)("p",null,"The output will be a JSON file called ",(0,i.kt)("inlineCode",{parentName:"p"},"Covid19Mutations.json.gz"),". Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/Covid19Mutations.json.gz"},"the full JSON file"),"."),(0,i.kt)("h2",{id:"investigating-the-results"},"Investigating the Results"),(0,i.kt)("p",null,"Here's an example of what a COVID-19 variant looks like in the JSON output:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{\n "chromosome":"NC_045512.2",\n "position":27323,\n "refAllele":"C",\n "altAlleles":[\n "T"\n ],\n "filters":[\n "PASS"\n ],\n "proteinDomains":[\n {\n "start":27202,\n "end":27384,\n "proteinId":"YP_009724394.1",\n "domainId":"cl13556",\n "domainName":"Sars6 super family",\n "reciprocalOverlap":0.00546,\n "annotationOverlap":0.00546\n }\n ],\n "variants":[\n {\n "vid":"NC_045512.2-27323-C-T",\n "chromosome":"NC_045512.2",\n "begin":27323,\n "end":27323,\n "refAllele":"C",\n "altAllele":"T",\n "variantType":"SNV",\n "hgvsg":"NC_045512.2:g.27323C>T",\n "alleleFrequency":{\n "refAllele":"C",\n "altAllele":"T",\n "allAc":8,\n "allAn":1058,\n "allAf":0.007561\n },\n "transcripts":[\n {\n "transcript":"YP_009724394.1",\n "source":"RefSeq",\n "bioType":"protein_coding",\n "codons":"tCt/tTt",\n "aminoAcids":"S/F",\n "cdnaPos":"122",\n "cdsPos":"122",\n "exons":"1/1",\n "proteinPos":"41",\n "geneId":"43740572",\n "hgnc":"ORF6",\n "consequence":[\n "missense_variant"\n ],\n "hgvsc":"YP_009724394.1:c.122C>T",\n "hgvsp":"YP_009724394.1:p.(Ser41Phe)",\n "proteinId":"YP_009724394.1"\n },\n {\n "transcript":"YP_009724395.1",\n "source":"RefSeq",\n "bioType":"protein_coding",\n "geneId":"43740573",\n "hgnc":"ORF7a",\n "consequence":[\n "upstream_gene_variant"\n ],\n "proteinId":"YP_009724395.1"\n }\n ]\n }\n ]\n}\n')))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/7064adfe.3f33a2cc.js b/assets/js/7064adfe.3f33a2cc.js deleted file mode 100644 index 94ae1373..00000000 --- a/assets/js/7064adfe.3f33a2cc.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4592],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>k});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function i(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var o=a.createContext({}),s=function(t){var e=a.useContext(o),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},m=function(t){var e=s(t.components);return a.createElement(o.Provider,{value:e},t.children)},c="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},u=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,o=t.parentName,m=p(t,["components","mdxType","originalType","parentName"]),c=s(n),u=r,k=c["".concat(o,".").concat(u)]||c[u]||d[u]||l;return n?a.createElement(k,i(i({ref:e},m),{},{components:n})):a.createElement(k,i({ref:e},m))}));function k(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,i=new Array(l);i[0]=u;var p={};for(var o in e)hasOwnProperty.call(e,o)&&(p[o]=e[o]);p.originalType=t,p[c]="string"==typeof t?t:r,i[1]=p;for(var s=2;s{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>c,frontMatter:()=>l,metadata:()=>p,toc:()=>o});var a=n(87462),r=(n(67294),n(3905));const l={},i=void 0,p={unversionedId:"data-sources/clinvar-json",id:"version-3.17/data-sources/clinvar-json",title:"clinvar-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/clinvar-json.md",sourceDirName:"data-sources",slug:"/data-sources/clinvar-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/clinvar-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/clinvar-json.md",tags:[],version:"3.17",frontMatter:{}},o=[],s={toc:o},m="wrapper";function c(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},s,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "id":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "significance":[\n "benign"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "lastUpdatedDate":"2020-03-01",\n "isAlleleSpecific":true\n },\n {\n "id":"RCV000030258.4",\n "variationId":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "alleleOrigins":[\n "germline"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "phenotypes":[\n "Lynch syndrome"\n ],\n "medGenIds":[\n "C1333990"\n ],\n "omimIds":[\n "120435"\n ],\n "significance":[\n "benign"\n ],\n "lastUpdatedDate":"2017-05-01",\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variationId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar VCV ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"reviewStatus"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"alleleOrigins"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"medGenIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MedGen IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"omimIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"orphanetIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Orphanet IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"significance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"lastUpdatedDate"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the ClinVar alternate allele")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"reviewStatus:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no assertion provided"),(0,r.kt)("li",{parentName:"ul"},"no assertion criteria provided"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, single submitter"),(0,r.kt)("li",{parentName:"ul"},"practice guideline"),(0,r.kt)("li",{parentName:"ul"},"classified by multiple submitters"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, conflicting interpretations"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, multiple submitters, no conflicts"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"alleleOrigins:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"unknown"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"germline"),(0,r.kt)("li",{parentName:"ul"},"somatic"),(0,r.kt)("li",{parentName:"ul"},"inherited"),(0,r.kt)("li",{parentName:"ul"},"paternal"),(0,r.kt)("li",{parentName:"ul"},"maternal"),(0,r.kt)("li",{parentName:"ul"},"de-novo"),(0,r.kt)("li",{parentName:"ul"},"biparental"),(0,r.kt)("li",{parentName:"ul"},"uniparental"),(0,r.kt)("li",{parentName:"ul"},"not-tested"),(0,r.kt)("li",{parentName:"ul"},"tested-inconclusive")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"significance:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"uncertain significance"),(0,r.kt)("li",{parentName:"ul"},"not provided"),(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"drug response"),(0,r.kt)("li",{parentName:"ul"},"histocompatibility"),(0,r.kt)("li",{parentName:"ul"},"association"),(0,r.kt)("li",{parentName:"ul"},"risk factor"),(0,r.kt)("li",{parentName:"ul"},"protective"),(0,r.kt)("li",{parentName:"ul"},"affects"),(0,r.kt)("li",{parentName:"ul"},"conflicting data from submitters"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant"),(0,r.kt)("li",{parentName:"ul"},"conflicting interpretations of pathogenicity")))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/711e2eca.60c4ac08.js b/assets/js/711e2eca.60c4ac08.js deleted file mode 100644 index 692642c0..00000000 --- a/assets/js/711e2eca.60c4ac08.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5578],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>g});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function o(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function i(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var s=r.createContext({}),p=function(t){var e=r.useContext(s),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},m=function(t){var e=p(t.components);return r.createElement(s.Provider,{value:e},t.children)},c="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},u=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,o=t.originalType,s=t.parentName,m=l(t,["components","mdxType","originalType","parentName"]),c=p(n),u=a,g=c["".concat(s,".").concat(u)]||c[u]||d[u]||o;return n?r.createElement(g,i(i({ref:e},m),{},{components:n})):r.createElement(g,i({ref:e},m))}));function g(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var o=n.length,i=new Array(o);i[0]=u;var l={};for(var s in e)hasOwnProperty.call(e,s)&&(l[s]=e[s]);l.originalType=t,l[c]="string"==typeof t?t:a,i[1]=l;for(var p=2;p{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>c,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var r=n(87462),a=(n(67294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/cosmic-gene-fusion-json",id:"version-3.21/data-sources/cosmic-gene-fusion-json",title:"cosmic-gene-fusion-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/cosmic-gene-fusion-json.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-gene-fusion-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/cosmic-gene-fusion-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/cosmic-gene-fusion-json.md",tags:[],version:"3.21",frontMatter:{}},s=[],p={toc:s},m="wrapper";function c(t){let{components:e,...n}=t;return(0,a.kt)(m,(0,r.Z)({},p,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},' "cosmicGeneFusions":[\n {\n "id":"COSF881",\n "numSamples":6,\n "geneSymbols":[\n "MYB",\n "NFIB"\n ],\n "hgvsr":"ENST00000341911.5(MYB):r.1_2368::ENST00000397581.2(NFIB):r.2592_3318",\n "histologies":[\n {\n "name":"adenoid cystic carcinoma",\n "numSamples":6\n }\n ],\n "sites":[\n {\n "name":"salivary gland (submandibular)",\n "numSamples":1\n },\n {\n "name":"salivary gland (parotid)",\n "numSamples":1\n },\n {\n "name":"salivary gland (nasal cavity)",\n "numSamples":1\n },\n {\n "name":"breast",\n "numSamples":3\n }\n ],\n "pubMedIds":[\n 19841262\n ]\n }\n ]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"id"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"COSMIC fusion ID")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"geneSymbols"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"5' gene & 3' gene")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"hgvsr"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"HGVS RNA translocation fusion notation")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"histologies"),(0,a.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"phenotypic descriptions")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"sites"),(0,a.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"tissue types")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")))),(0,a.kt)("p",null,(0,a.kt)("strong",{parentName:"p"},"Count")),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"name"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"description")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"})))))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/71b515b1.0ae6d8f5.js b/assets/js/71b515b1.0ae6d8f5.js deleted file mode 100644 index 1141b28e..00000000 --- a/assets/js/71b515b1.0ae6d8f5.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7145,7199],{3905:(e,t,n)=>{n.d(t,{Zo:()=>u,kt:()=>h});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},u=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},c="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,u=l(e,["components","mdxType","originalType","parentName"]),c=d(n),m=r,h=c["".concat(s,".").concat(m)]||c[m]||p[m]||o;return n?a.createElement(h,i(i({ref:t},u),{},{components:n})):a.createElement(h,i({ref:t},u))}));function h(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[c]="string"==typeof e?e:r,i[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>c,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/gerp-json",id:"version-3.21/data-sources/gerp-json",title:"gerp-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/gerp-json.md",sourceDirName:"data-sources",slug:"/data-sources/gerp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gerp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/gerp-json.md",tags:[],version:"3.21",frontMatter:{}},s=[],d={toc:s},u="wrapper";function c(e){let{components:t,...n}=e;return(0,r.kt)(u,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gerpScore": 1.27\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"gerpScore"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: -\u221e to +\u221e")))))}c.isMDXComponent=!0},19568:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>p,frontMatter:()=>i,metadata:()=>s,toc:()=>d});var a=n(87462),r=(n(67294),n(3905)),o=n(63365);const i={title:"GERP"},l=void 0,s={unversionedId:"data-sources/gerp",id:"version-3.21/data-sources/gerp",title:"GERP",description:"Overview",source:"@site/versioned_docs/version-3.21/data-sources/gerp.mdx",sourceDirName:"data-sources",slug:"/data-sources/gerp",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gerp",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/gerp.mdx",tags:[],version:"3.21",frontMatter:{title:"GERP"},sidebar:"docs",previous:{title:"FusionCatcher",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/fusioncatcher"},next:{title:"GME Variome",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gme"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"Source Files",id:"source-files",children:[{value:"Example GRCh37",id:"example-grch37",children:[],level:3},{value:"Example GRCh38",id:"example-grch38",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[{value:"GRCh37",id:"grch37",children:[],level:3},{value:"GRCh38",id:"grch38",children:[],level:3}],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],u={toc:d},c="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"GERP identifies constrained elements in multiple alignments by quantifying substitution deficits.\nThese deficits represent substitutions that would have occurred if the element were neutral DNA, but did not occur because the element has been under functional constraint (Rejected Substitutions).\nNirvana uses GERP++ which is based on a significantly faster and more statistically robust maximum likelihood estimation procedure to compute expected rates of evolution."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},'Davydov, Eugene V., et al. "Identifying a high fraction of the human genome to be under selective constraint using GERP++." ',(0,r.kt)("em",{parentName:"p"},"PLoS computational biology")," ",(0,r.kt)("strong",{parentName:"p"},"6.12")," e1001025 (2010). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1371/journal.pcbi.1001025"},"https://doi.org/10.1371/journal.pcbi.1001025")))),(0,r.kt)("h2",{id:"source-files"},"Source Files"),(0,r.kt)("h3",{id:"example-grch37"},"Example GRCh37"),(0,r.kt)("p",null,"GRCh37 file is a TSV format"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-tsv"},"chr position GERP\n1 12177 0.83\n1 12178 -0.206\n1 12179 -0.492\n1 12180 -1.66\n1 12181 0.83\n1 12182 0.83\n1 12183 -0.417\n1 12184 0.83\n")),(0,r.kt)("h3",{id:"example-grch38"},"Example GRCh38"),(0,r.kt)("p",null,"GRCh38 file is a lift-over BED format"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-tsv"},"chr pos_start pos_end GERP\n1 12646 12647 0.298\n1 12647 12648 2.63\n1 12648 12649 1.87\n1 12649 12650 0.252\n1 12650 12651 -2.06\n1 12651 12652 2.61\n1 12652 12653 3.97\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the CSV file, we are interested in columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"chr")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"position")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"GERP"))),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("p",null,"None"),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("h3",{id:"grch37"},"GRCh37"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html"},"http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html")),(0,r.kt)("h3",{id:"grch38"},"GRCh38"),(0,r.kt)("p",null,"The data is not available for GRCh38 on GERP++ website, and was obtained from ",(0,r.kt)("a",{parentName:"p",href:"https://personal.broadinstitute.org/konradk/loftee_data/GRCh38/"},"https://personal.broadinstitute.org/konradk/loftee_data/GRCh38/")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(o.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/71ee411d.622f3d38.js b/assets/js/71ee411d.622f3d38.js deleted file mode 100644 index 2afbf491..00000000 --- a/assets/js/71ee411d.622f3d38.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2155],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>u});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},p=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},m="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},h=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),m=d(n),h=i,u=m["".concat(s,".").concat(h)]||m[h]||c[h]||r;return n?a.createElement(u,o(o({ref:t},p),{},{components:n})):a.createElement(u,o({ref:t},p))}));function u(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,o=new Array(r);o[0]=h;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[m]="string"==typeof e?e:i,o[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>m,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={title:"Mitochondrial Heteroplasmy"},o=void 0,l={unversionedId:"data-sources/mito-heteroplasmy",id:"version-3.17/data-sources/mito-heteroplasmy",title:"Mitochondrial Heteroplasmy",description:"Overview",source:"@site/versioned_docs/version-3.17/data-sources/mito-heteroplasmy.md",sourceDirName:"data-sources",slug:"/data-sources/mito-heteroplasmy",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/mito-heteroplasmy",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/mito-heteroplasmy.md",tags:[],version:"3.17",frontMatter:{title:"Mitochondrial Heteroplasmy"},sidebar:"version-3.17/docs",previous:{title:"gnomAD",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/gnomad"},next:{title:"MITOMAP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/mitomap"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"JSON File",id:"json-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Binning VRF Data",id:"binning-vrf-data",children:[],level:4},{value:"Pre-processing the Data",id:"pre-processing-the-data",children:[],level:4},{value:"Algorithm",id:"algorithm",children:[],level:4}],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],d={toc:s},p="wrapper";function m(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"Mitochondrial Heteroplasmy is an aggregate population data set that characterizes the amount of heteroplasmy observed for each variant. The latest version of this data set is based on re-processed 1000 Genomes Project data using the Illumina DRAGEN pipeline."),(0,i.kt)("h2",{id:"json-file"},"JSON File"),(0,i.kt)("h3",{id:"example"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{\n "T:C":{\n "ad":[\n 1,\n 1,\n 1,\n 1,\n 1,\n 1\n ],\n "allele_type":"alt",\n "vrf":[\n 0.002369668246445498,\n 0.0024937655860349127,\n 0.0016129032258064516,\n 0.0025188916876574307,\n 0.0022935779816513763,\n 0.002008032128514056\n ],\n "vrf_stats":{\n "kurtosis":38.889891511122556,\n "max":0.0025188916876574307,\n "mean":5.4052190471990743e-05,\n "min":0.0,\n "nobs":246,\n "skewness":6.346664692283075,\n "stdev":0.0003461416264750575,\n "variance":1.1981402557879823e-07\n }\n }\n}\n\n')),(0,i.kt)("h3",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"From the JSON file, we're mainly interested in the following keys:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"variant")," (i.e. ",(0,i.kt)("inlineCode",{parentName:"li"},"T:C"),")"),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"ad")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"vrf")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"nobs")," (number of observations)")),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Adjusting for null observations")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The ",(0,i.kt)("inlineCode",{parentName:"p"},"nobs")," value indicates how many observations were made. Ideally this would have been represented in the ",(0,i.kt)("inlineCode",{parentName:"p"},"ad")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"vrf")," arrays, but it's left as an exercise for the reader."))),(0,i.kt)("h4",{id:"binning-vrf-data"},"Binning VRF Data"),(0,i.kt)("p",null,"The ",(0,i.kt)("inlineCode",{parentName:"p"},"vrf")," (variant read frequency) array in the JSON object above is paired with with the ",(0,i.kt)("inlineCode",{parentName:"p"},"ad")," array (allele depths) shown above."),(0,i.kt)("p",null,"The data in the JSON object has a crazy number of significant digits. This means that as the number of samples increase, this array will grow. To make this more future-proof, Nirvana bins everything according to 0.1% increments."),(0,i.kt)("p",null,"With the binned data, we end up having 775 distinct ",(0,i.kt)("inlineCode",{parentName:"p"},"vrf")," values in the entire JSON file. This also means that the variant with the largest number of VRFs would originally have 246 entries, but due to binning this will decrease to 143."),(0,i.kt)("h4",{id:"pre-processing-the-data"},"Pre-processing the Data"),(0,i.kt)("p",null,"The JSON file is converted into a small TSV file that is ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/Illumina/Nirvana/blob/main/MitoHeteroplasmy/Resources/MitoHeteroplasmy.tsv.gz"},"embedded in Nirvana"),". Here is an example of the TSV file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS REF ALT VRF_BINS VRF_COUNTS\nchrM 1 G . 0.981,0.987,0.988,0.989,0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.998,0.999 1,2,2,4,7,8,11,19,43,60,48,64,499,1736\nchrM 2 A . 0.981,0.987,0.988,0.989,0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.998,0.999 1,2,2,4,7,8,11,19,43,60,48,64,499,1736\n")),(0,i.kt)("h4",{id:"algorithm"},"Algorithm"),(0,i.kt)("p",null,"Nirvana will calculate mitochondrial heteroplasmy data for every sample in the VCF. Using the computed VRF for each sample, we compute where in the empirical mitochondrial heteroplasmy distribution that VRF occurs and express that as a percentile."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Percentiles")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Nirvana uses the ",(0,i.kt)("a",{parentName:"p",href:"https://en.wikipedia.org/wiki/Percentile"},"statistical definition of percentile")," (indicating the value below which a given percentage of observations in a group of observations falls). Unless the sample's VRF is higher than all the VRFs represented in the distribution, the range will be [0, 1)."))),(0,i.kt)("h2",{id:"download-url"},"Download URL"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Unavailable")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The original data set is only available internally at Illumina at the moment."))),(0,i.kt)("h2",{id:"json-output"},"JSON Output"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{14-17}","{14-17}":!0},'"samples":[\n {\n "genotype":"0/1",\n "variantFrequencies":[\n 0.333,\n 0.5\n ],\n ],\n "alleleDepths":[\n 10,\n 20,\n 30\n ],\n "heteroplasmyPercentile":[\n 23.13,\n 12.65\n ]\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"heteroplasmyPercentile"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"one percentile for each variant frequency (each alternate allele)")))))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/72cec9a0.6aba383a.js b/assets/js/72cec9a0.6aba383a.js deleted file mode 100644 index d04f9360..00000000 --- a/assets/js/72cec9a0.6aba383a.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6446],{3905:(e,t,n)=>{n.d(t,{Zo:()=>s,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var p=r.createContext({}),c=function(e){var t=r.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},s=function(e){var t=c(e.components);return r.createElement(p.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,p=e.parentName,s=i(e,["components","mdxType","originalType","parentName"]),u=c(n),m=a,f=u["".concat(p,".").concat(m)]||u[m]||d[m]||o;return n?r.createElement(f,l(l({ref:t},s),{},{components:n})):r.createElement(f,l({ref:t},s))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=m;var i={};for(var p in t)hasOwnProperty.call(t,p)&&(i[p]=t[p]);i.originalType=e,i[u]="string"==typeof e?e:a,l[1]=i;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>o,metadata:()=>i,toc:()=>p});var r=n(87462),a=(n(67294),n(3905));const o={},l=void 0,i={unversionedId:"data-sources/gnomad-lof-json",id:"version-3.18/data-sources/gnomad-lof-json",title:"gnomad-lof-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/gnomad-lof-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-lof-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gnomad-lof-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/gnomad-lof-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],c={toc:p},s="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(s,(0,r.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD":{ \n "pLi":1.00e0,\n "pNull":8.94e-40,\n "pRec":1.84e-16,\n "synZ":-8.44e-2,\n "misZ":5.96e-1,\n "loeuf":1.13e0\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"pLi"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"pNull"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"pRec"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"synZ"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"misZ"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/73895ac4.066deb1e.js b/assets/js/73895ac4.066deb1e.js deleted file mode 100644 index 8adc4dfc..00000000 --- a/assets/js/73895ac4.066deb1e.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9143,9836],{3905:(e,n,t)=>{t.d(n,{Zo:()=>d,kt:()=>h});var a=t(67294);function r(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function o(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function i(e){for(var n=1;n=0||(r[t]=e[t]);return r}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(r[t]=e[t])}return r}var l=a.createContext({}),c=function(e){var n=a.useContext(l),t=n;return e&&(t="function"==typeof e?e(n):i(i({},n),e)),t},d=function(e){var n=c(e.components);return a.createElement(l.Provider,{value:n},e.children)},u="mdxType",p={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},m=a.forwardRef((function(e,n){var t=e.components,r=e.mdxType,o=e.originalType,l=e.parentName,d=s(e,["components","mdxType","originalType","parentName"]),u=c(t),m=r,h=u["".concat(l,".").concat(m)]||u[m]||p[m]||o;return t?a.createElement(h,i(i({ref:n},d),{},{components:t})):a.createElement(h,i({ref:n},d))}));function h(e,n){var t=arguments,r=n&&n.mdxType;if("string"==typeof e||r){var o=t.length,i=new Array(o);i[0]=m;var s={};for(var l in n)hasOwnProperty.call(n,l)&&(s[l]=n[l]);s.originalType=e,s[u]="string"==typeof e?e:r,i[1]=s;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>s,toc:()=>l});var a=t(87462),r=(t(67294),t(3905));const o={},i=void 0,s={unversionedId:"data-sources/amino-acid-conservation-json",id:"version-3.16/data-sources/amino-acid-conservation-json",title:"amino-acid-conservation-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/amino-acid-conservation-json.md",sourceDirName:"data-sources",slug:"/data-sources/amino-acid-conservation-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/amino-acid-conservation-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/amino-acid-conservation-json.md",tags:[],version:"3.16",frontMatter:{}},l=[],c={toc:l},d="wrapper";function u(e){let{components:n,...t}=e;return(0,r.kt)(d,(0,a.Z)({},c,t,{components:n,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"aminoAcidConservation": {\n "scores": [0.34]\n} \n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"aminoAcidConservation"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scores"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array of doubles"),(0,r.kt)("td",{parentName:"tr",align:"left"},"percent conserved with respect to human amino acid residue. Range: 0.01 - 1.00")))))}u.isMDXComponent=!0},41867:(e,n,t)=>{t.r(n),t.d(n,{contentTitle:()=>s,default:()=>p,frontMatter:()=>i,metadata:()=>l,toc:()=>c});var a=t(87462),r=(t(67294),t(3905)),o=t(99679);const i={title:"Amino Acid Conservation"},s=void 0,l={unversionedId:"data-sources/amino-acid-conservation",id:"version-3.16/data-sources/amino-acid-conservation",title:"Amino Acid Conservation",description:"Overview",source:"@site/versioned_docs/version-3.16/data-sources/amino-acid-conservation.mdx",sourceDirName:"data-sources",slug:"/data-sources/amino-acid-conservation",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/amino-acid-conservation",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/amino-acid-conservation.mdx",tags:[],version:"3.16",frontMatter:{title:"Amino Acid Conservation"},sidebar:"version-3.16/docs",previous:{title:"1000 Genomes",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/1000Genomes"},next:{title:"ClinGen",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/clingen"}},c=[{value:"Overview",id:"overview",children:[],level:2},{value:"FASTA File",id:"fasta-file",children:[],level:2},{value:"Parsing FASTA",id:"parsing-fasta",children:[],level:2},{value:"Assigning scores to Nirvana transcripts",id:"assigning-scores-to-nirvana-transcripts",children:[{value:"GRCh37",id:"grch37",children:[],level:3},{value:"GRCh38",id:"grch38",children:[],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],d={toc:c},u="wrapper";function p(e){let{components:n,...t}=e;return(0,r.kt)(u,(0,a.Z)({},d,t,{components:n,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"Amino acid conservation scores are obtained from multiple alignments of vertebrate exomes to the human ones. The score indicate the frequency with which a particular AA is observed in Humans."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Siepel A, Bejerano G, Pedersen JS, Hinrichs AS, Hou M, Rosenbloom K, Clawson H, Spieth J, Hillier LW, Richards S, et al. Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. ",(0,r.kt)("strong",{parentName:"p"},"Genome Res. 2005")," Aug;15(8):1034-50. (",(0,r.kt)("a",{parentName:"p",href:"http://www.genome.org/cgi/doi/10.1101/gr.3715005"},"http://www.genome.org/cgi/doi/10.1101/gr.3715005"),")"))),(0,r.kt)("h2",{id:"fasta-file"},"FASTA File"),(0,r.kt)("p",null,"The exon alignments are provided in FASTA files as follows:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},">ENST00000641515.2_hg38_1_2 3 0 0 chr1:65565-65573+\nMKK\n>ENST00000641515.2_panTro4_1_2 3 0 0 chrUn_GL393541:146907-146915+\nMKK\n>ENST00000641515.2_gorGor3_1_2 3 0 0\n---\n>ENST00000641515.2_ponAbe2_1_2 3 0 0 chr15:99141417-99141425-\nMKK\n>ENST00000641515.2_hg38_2_2 324 0 0 chr1:69037-70008+\nVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLLHFFGGSEMVILIAMGFDRYIAICKPLHYTTIMCGNACVGIMAVTWGIGFLHSVSQLAFAVHLLFCGPNEVDSFYCDLPRVIKLACTDTYRLDIMVIANSGVLTVCSFVLLIISYTIILMTIQHRPLDKSSKALSTLTAHITVVLLFFGPCVFIYAWPFPIKSLDKFLAVFYSVITPLLNPIIYTLRNKDMKTAIRQLRKWDAHSSVKFZ\n>ENST00000641515.2_panTro4_2_2 324 0 0 chrUn_GL393541:151333-152303+\n")),(0,r.kt)("h2",{id:"parsing-fasta"},"Parsing FASTA"),(0,r.kt)("p",null,"For each Ensembl transcript, we will need to aggregate all the exons together for each of the 100 species. From there, we should get a full alignment that can be used to determine conservation. For example, for ENST00000641515.2 we have:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"Human (hg38) MKKVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLL\nChimp MKKVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFL-MLFFVFYGGIVFGNLLIVRIVVSDSHLHSPMYFLLANLSLIDLSLCSVTAPKMITDFFSQRKVISFKGCLVQIFLL\nGorilla ----------------------------------------------------------------------------------------------------------------------\nOrangutan MKKVTAEAISWNESTSKTNNSVVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVIIVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLL\nGibbon ----------------------------------------------------------------------------------------------------------------------\nRhesus MKKVTEAAISWNESTSETNNSIVTEFIFLGLSDSQELQIFLFVLFLVFYGGIVFGNLLIVITVVSDSHLHSPMYLLLANLSVVDLSLSSVTAPKMITDFFSQRKAISFKGCLVQIFLL\nMacaque MKKVTEAAISWNESTSETNNSIVTEFIFLGLSDSQELQIFLFVLFLVFYGGIVFGNLLIVITVVSDSHLHSPMYLLLANLSVIDLSLSSVTAPKMITDFFSQRKAISFKGCLVQIFLL\n")),(0,r.kt)("p",null,"If we look at position 6, we see that humans have an Alanine (A) residue. This residue is shared by Chimp and Orangutan. However, Rhesus and Macaque have a Glutamic acid (E) residue at that position. Moreover, Gorilla and Gibbon don't even have data for that transcript.\nFor position 6, we would say that we have 43% conservation (3/7) since three organisms share the same residue as humans."),(0,r.kt)("h2",{id:"assigning-scores-to-nirvana-transcripts"},"Assigning scores to Nirvana transcripts"),(0,r.kt)("p",null,"The source FASTA file comes with Ensembl/UCSC transcript ids of the transcripts used for alignments. The Nirvana cache has RefSeq and Ensembl transcripts and our first attempt was to map the given Ensembl/UCSC ids to their equivalent RefSeq/Ensembl ids. This attempt was unsuccessful since UCSC Table Browser provided mapping without version numbers. So we proceeded as follows:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"Take proteins which have a unique mapping (and hence one set of conservation scores). For ones that mapped to both ChrX and ChrY, we accepted the one from ChrX."),(0,r.kt)("li",{parentName:"ul"},"A Nirvana transcript having an exact peptide sequence match with a uniquely aligned protein is assigned the corresponding conservation scores.")),(0,r.kt)("p",null,"Unfortunately this left us with a very small number of transcripts having conservation scores."),(0,r.kt)("h3",{id:"grch37"},"GRCh37"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"Source FASTA contained 41957 protein alignments."),(0,r.kt)("li",{parentName:"ul"},"38165 proteins had unique scores."),(0,r.kt)("li",{parentName:"ul"},"88 aligned proteins existed in Nirvana cache."),(0,r.kt)("li",{parentName:"ul"},"118 transcripts had conservation scores.")),(0,r.kt)("h3",{id:"grch38"},"GRCh38"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"Source FASTA contained 110024 protein alignments."),(0,r.kt)("li",{parentName:"ul"},"88961 proteins had unique scores."),(0,r.kt)("li",{parentName:"ul"},"11688 aligned proteins existed in Nirvana cache."),(0,r.kt)("li",{parentName:"ul"},"12098 transcripts had conservation scores.")),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,"GRCh37: ",(0,r.kt)("a",{parentName:"p",href:"http://hgdownload.soe.ucsc.edu/goldenPath/hg19/multiz100way/alignments/knownGene.exonAA.fa.gz"},"http://hgdownload.soe.ucsc.edu/goldenPath/hg19/multiz100way/alignments/knownGene.exonAA.fa.gz")),(0,r.kt)("p",null,"GRCh38: ",(0,r.kt)("a",{parentName:"p",href:"http://hgdownload.soe.ucsc.edu/goldenPath/hg38/multiz100way/alignments/knownGene.exonAA.fa.gz"},"http://hgdownload.soe.ucsc.edu/goldenPath/hg38/multiz100way/alignments/knownGene.exonAA.fa.gz")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)("p",null,"Conservation scores are reported in the transcript section. One score is reported for each alt allele"),(0,r.kt)(o.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/7411046e.a39df010.js b/assets/js/7411046e.a39df010.js deleted file mode 100644 index 98f6d15f..00000000 --- a/assets/js/7411046e.a39df010.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9791],{3905:(n,e,t)=>{t.d(e,{Zo:()=>d,kt:()=>u});var a=t(67294);function i(n,e,t){return e in n?Object.defineProperty(n,e,{value:t,enumerable:!0,configurable:!0,writable:!0}):n[e]=t,n}function o(n,e){var t=Object.keys(n);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(n);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(n,e).enumerable}))),t.push.apply(t,a)}return t}function r(n){for(var e=1;e=0||(i[t]=n[t]);return i}(n,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(n);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(n,t)&&(i[t]=n[t])}return i}var c=a.createContext({}),l=function(n){var e=a.useContext(c),t=e;return n&&(t="function"==typeof n?n(e):r(r({},e),n)),t},d=function(n){var e=l(n.components);return a.createElement(c.Provider,{value:e},n.children)},p="mdxType",g={inlineCode:"code",wrapper:function(n){var e=n.children;return a.createElement(a.Fragment,{},e)}},m=a.forwardRef((function(n,e){var t=n.components,i=n.mdxType,o=n.originalType,c=n.parentName,d=s(n,["components","mdxType","originalType","parentName"]),p=l(t),m=i,u=p["".concat(c,".").concat(m)]||p[m]||g[m]||o;return t?a.createElement(u,r(r({ref:e},d),{},{components:t})):a.createElement(u,r({ref:e},d))}));function u(n,e){var t=arguments,i=e&&e.mdxType;if("string"==typeof n||i){var o=t.length,r=new Array(o);r[0]=m;var s={};for(var c in e)hasOwnProperty.call(e,c)&&(s[c]=e[c]);s.originalType=n,s[p]="string"==typeof n?n:i,r[1]=s;for(var l=2;l{t.r(e),t.d(e,{contentTitle:()=>r,default:()=>p,frontMatter:()=>o,metadata:()=>s,toc:()=>c});var a=t(87462),i=(t(67294),t(3905));const o={title:"Parsing Nirvana JSON"},r=void 0,s={unversionedId:"introduction/parsing-json",id:"version-3.21/introduction/parsing-json",title:"Parsing Nirvana JSON",description:"Why JSON?",source:"@site/versioned_docs/version-3.21/introduction/parsing-json.md",sourceDirName:"introduction",slug:"/introduction/parsing-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/introduction/parsing-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/introduction/parsing-json.md",tags:[],version:"3.21",frontMatter:{title:"Parsing Nirvana JSON"},sidebar:"docs",previous:{title:"Getting Started",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/introduction/getting-started"},next:{title:"Annotating COVID-19",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/introduction/covid19"}},c=[{value:"Why JSON?",id:"why-json",children:[{value:"What do other annotators use?",id:"what-do-other-annotators-use",children:[],level:3},{value:"What do we gain by using JSON?",id:"what-do-we-gain-by-using-json",children:[],level:3}],level:2},{value:"Parsing JSON",id:"parsing-json",children:[{value:"Organization",id:"organization",children:[],level:3},{value:"JASIX",id:"jasix",children:[],level:3}],level:2}],l={toc:c},d="wrapper";function p(n){let{components:e,...o}=n;return(0,i.kt)(d,(0,a.Z)({},l,o,{components:e,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"why-json"},"Why JSON?"),(0,i.kt)("p",null,"VCF is a fantastic file format that was developed during the methods development activities within the 1000 Genomes Project. Prior to that, variant callers were outputting information into a variety of tab-delimited formats. Sometimes based on existing standards (like GFF), while most were proprietary. The primary intent of VCF files was to provide a human-readable, standardized representation of genetic variants. Similar to SAM/BAM files, VCF files used BCF files as their binary counterpart."),(0,i.kt)("p",null,"In the very beginning, Nirvana offered VCF output for annotation. While many variant annotators offer an option to output VCF files, one could argue if they are still human-readable. Here's an example from a VCF file produced by VEP v102:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre"},"chr3 107840527 . A ATTTTTTTTT,AT,ATTTTTTTT 153.51 PASS AN=6;MQ=244.10;\nSOR=1.739;QD=2.24;DP=57;AF=0.500,0.167,0.333;FS=0.000;AC=3,1,2;CSQ=TTTTTTTTT|\nintron_variant&non_coding_transcript_variant|MODIFIER|LINC00635|ENSG00000241469|\nTranscript|ENST00000608506.6|lncRNA||4/4|ENST00000608506.6:n.622-132_622-124dup|||||||\nrs35564779||-1||HGNC|HGNC:27184|||5|||||||||Ensembl|||||||||||||||||||||||||||||||||||\n|||||||||0.792|-0.109757,T|intron_variant&non_coding_transcript_variant|MODIFIER|\nLINC00635|ENSG00000241469|Transcript|ENST00000608506.6|lncRNA||4/4|\nENST00000608506.6:n.622-124dup|||||||rs35564779||-1||HGNC|HGNC:27184|||5|||||||||\nEnsembl||||||||||||||||||||||||||||||||||||||||||||0.932|-0.075622,TTTTTTTT|\nintron_variant&non_coding_transcript_variant|MODIFIER|LINC00635|ENSG00000241469|\nTranscript|ENST00000608506.6|lncRNA||4/4|ENST00000608506.6:n.622-131_622-124dup|||||||\nrs35564779||-1||HGNC|HGNC:27184|||5|||||||||Ensembl|||||||||||||||||||||||||||||||||||\n|||||||||0.808|-0.105490,TTTTTTTTT|intron_variant&non_coding_transcript_variant|\nMODIFIER|LINC00636|ENSG00000240423|Transcript|ENST00000649048.1|lncRNA||2/3|\nENST00000649048.1:n.179+5223_179+5231dup|||||||rs35564779||1||HGNC|HGNC:27702|||||||||\n|||Ensembl||||||||||||||||||||||||||||||||||||||||||||0.792|-0.109757, (etc.)\n")),(0,i.kt)("p",null,"Originally Nirvana used the same VCF notation as VEP uses above. The problem is that you end up with a large amount of text that is difficult to parse out by eye and requires the use of several delimiters to divide the information into useful segments. When we originally annotated this variant using VEP, ",(0,i.kt)("strong",{parentName:"p"},"this single variant used 488,909 bytes")," (almost \xbd MB). Surprisingly, we found that this broke some downstream tools that had preconceived notions of how long a single line could be in a VCF file."),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Whitespace is not allowed in the VCF INFO field. This means that if you wanted to express a gene description from OMIM: ",(0,i.kt)("strong",{parentName:"p"},'"HRAS PROTOONCOGENE, GTPase; HRAS"'),", you would need to replace the spaces with something else like an underline. You would also need to hope that the VCF parser correctly handles embedded commas and semicolons in the description."))),(0,i.kt)("h3",{id:"what-do-other-annotators-use"},"What do other annotators use?"),(0,i.kt)("p",null,"Unfortunately, file format standardization has not made it all the way to variant annotation yet. The ",(0,i.kt)("a",{parentName:"p",href:"https://ga4gh-gks.github.io/variant_annotation.html"},"GA4GH Annotation group")," had many discussions on the topic several years ago. While a set of JSON schemas were created in that effort, there wasn't enough momentum to make this a new standard."),(0,i.kt)("p",null,"While there is some overlap in general file formats (JSON vs VCF vs TSV), none of those are compatible with each other. I.e. the VCF representation in VEP and snpEff is different just like the JSON schemas used by VEP, Nirvana, and GA4GH are different."),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Source"),(0,i.kt)("th",{parentName:"tr",align:null},"Formats"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"VEP"),(0,i.kt)("td",{parentName:"tr",align:null},(0,i.kt)("strong",{parentName:"td"},"JSON"),", TSV, VCF")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"snpEff"),(0,i.kt)("td",{parentName:"tr",align:null},"VCF")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"Annovar"),(0,i.kt)("td",{parentName:"tr",align:null},"TSV")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"Nirvana"),(0,i.kt)("td",{parentName:"tr",align:null},(0,i.kt)("strong",{parentName:"td"},"JSON"))),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"GA4GH"),(0,i.kt)("td",{parentName:"tr",align:null},(0,i.kt)("strong",{parentName:"td"},"JSON"))))),(0,i.kt)("p",null,"We are interested in working together with others in the annotation space to develop a common annotation file format. Our belief is that this would accelerate methods development and benchmarking activities within annotation much in the same way the creation of SAM/BAM & VCF/BCF accelerated secondary analysis development."),(0,i.kt)("h3",{id:"what-do-we-gain-by-using-json"},"What do we gain by using JSON?"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"JSON files are better at showing hierarchical and other relational data. For example when we output ClinVar data, we often want to output several overlapping RCV entries (variants coupled with a disease phenotype). In each, we would want to output a list of phenotypes, clinical significance, etc. That is difficult to accomplish in a human-readable way using VCF files (without resorting to growing lexicon of delimiters)."),(0,i.kt)("li",{parentName:"ul"},"JSON files use JavaScript data types, while VCF INFO fields don't directly have data types. Instead, external metadata located in the VCF header is required to indicated the preferred data type."),(0,i.kt)("li",{parentName:"ul"},"JSON files are more verbose. Often this is seen as a negative, but compression largely compensates for this. Given the following excerpt from the VCF example above ",(0,i.kt)("inlineCode",{parentName:"li"},"HGNC:27184|||5|||||||||Ensembl")," it's not immediately obvious what the ",(0,i.kt)("inlineCode",{parentName:"li"},"5")," refers to (without checking the VCF header for details). With JSON files, you would always see a key name associated with a value."),(0,i.kt)("li",{parentName:"ul"},"JSON files can be natively imported into different search and analytics solutions like Elasticsearch and Snowflake."),(0,i.kt)("li",{parentName:"ul"},"JSON strings do not have any limitations on the use of whitespace.")),(0,i.kt)("h2",{id:"parsing-json"},"Parsing JSON"),(0,i.kt)("p",null,"Our JSON files are organized similarly to original VCF variants:"),(0,i.kt)("p",null,(0,i.kt)("img",{src:t(95421).Z})),(0,i.kt)("p",null,"Nirvana JSON files can get very large and sometimes we receive feedback that a bioinformatician tried to read the JSON file into Python or R resulting in a program that ran out of available RAM. This happens because those parsers try to load everything into memory all at once."),(0,i.kt)("p",null,"To get around those issues, we play some clever tricks with newlines that enables our users to parse our JSON files quickly and efficiently."),(0,i.kt)("h3",{id:"organization"},"Organization"),(0,i.kt)("p",null,"Our JSON file is arranged as follows:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the header section is located on the first line"),(0,i.kt)("li",{parentName:"ul"},"each line after that corresponds to a position (same as a row in a VCF file)",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"until you reach the genes section ",(0,i.kt)("inlineCode",{parentName:"li"},'],"genes":[')))),(0,i.kt)("li",{parentName:"ul"},"each line after that corresponds to a gene",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"until you reach the end ",(0,i.kt)("inlineCode",{parentName:"li"},"]}"))))),(0,i.kt)("p",null,"Knowing this, you can load each position line as an independent JSON object and extract the information you need. "),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Jupyter Notebook")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"To demonstrate this, we have put together a ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/Illumina/NirvanaDocumentation/blob/master/static/files/parse-nirvana-json-python.ipynb"},"Jupyter notebook demonstrating how to do this in Python")," and a ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/Illumina/NirvanaDocumentation/blob/master/static/files/parse-nirvana-json-r.ipynb"},"R version")," as well."))),(0,i.kt)("h3",{id:"jasix"},"JASIX"),(0,i.kt)("p",null,"One of the tools that we really like in the VCF ecosystem is ",(0,i.kt)("a",{parentName:"p",href:"https://dx.doi.org/10.1093%2Fbioinformatics%2Fbtq671"},"tabix"),". Unfortunately, tabix only works for tab-delimited file formats. As a result, we created a similar tool for Nirvana JSON files called JASIX."),(0,i.kt)("p",null,"Here's an example of how you might use JASIX:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/net6.0/Jasix.dll -i dragen.json.gz -q chr1:942450-942455\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the Nirvana JSON path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-q")," argument specifies a genomic range ",(0,i.kt)("em",{parentName:"li"},"(you can use as many of these as you want)"))),(0,i.kt)("p",null,"JASIX also includes additional options for showing the Nirvana header or for extracting different sections (like the genes section)."),(0,i.kt)("p",null,"The output from JASIX is compliant JSON object shown in pretty-printed form:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{"positions":[\n{\n "chromosome": "chr1",\n "position": 942451,\n "refAllele": "T",\n "altAlleles": [\n "C"\n ],\n "quality": 484.23,\n "filters": [\n "PASS"\n ],\n "cytogeneticBand": "1p36.33",\n "samples": [\n {\n "genotype": "1/1",\n "variantFrequencies": [\n 1\n ],\n "totalDepth": 21,\n "genotypeQuality": 60,\n "alleleDepths": [\n 0,\n 21\n ]\n },\n {\n "genotype": "1/1",\n "variantFrequencies": [\n 1\n ],\n "totalDepth": 32,\n "genotypeQuality": 93,\n "alleleDepths": [\n 0,\n 32\n ]\n },\n {\n "genotype": "1/1",\n "variantFrequencies": [\n 1\n ],\n "totalDepth": 36,\n "genotypeQuality": 105,\n "alleleDepths": [\n 0,\n 36\n ]\n }\n ],\n "variants": [\n {\n "vid": "1-942451-T-C",\n "chromosome": "chr1",\n "begin": 942451,\n "end": 942451,\n "refAllele": "T",\n "altAllele": "C",\n "variantType": "SNV",\n "hgvsg": "NC_000001.11:g.942451T>C",\n "phylopScore": -0.1,\n "clinvar": [\n {\n "id": "VCV000836156.1",\n "reviewStatus": "criteria provided, single submitter",\n "significance": [\n "uncertain significance"\n ],\n "refAllele": "T",\n "altAllele": "T",\n "lastUpdatedDate": "2020-08-20"\n },\n {\n "id": "RCV001037211.1",\n "variationId": 836156,\n "reviewStatus": "criteria provided, single submitter",\n "alleleOrigins": [\n "germline"\n ],\n "refAllele": "T",\n "altAllele": "T",\n "phenotypes": [\n "not provided"\n ],\n "medGenIds": [\n "CN517202"\n ],\n "significance": [\n "uncertain significance"\n ],\n "lastUpdatedDate": "2020-08-20",\n "pubMedIds": [\n "28492532"\n ]\n }\n ],\n "dbsnp": [\n "rs6672356"\n ],\n "gnomad": {\n "coverage": 25,\n "allAf": 0.999855,\n "allAn": 123742,\n "allAc": 123724,\n "allHc": 61853,\n "afrAf": 0.999416,\n "afrAn": 10278,\n "afrAc": 10272,\n "afrHc": 5133,\n "amrAf": 0.99995,\n "amrAn": 20008,\n "amrAc": 20007,\n "amrHc": 10003,\n "easAf": 1,\n "easAn": 6054,\n "easAc": 6054,\n "easHc": 3027,\n "finAf": 1,\n "finAn": 8696,\n "finAc": 8696,\n "finHc": 4348,\n "nfeAf": 0.999899,\n "nfeAn": 49590,\n "nfeAc": 49585,\n "nfeHc": 24790,\n "asjAf": 1,\n "asjAn": 7208,\n "asjAc": 7208,\n "asjHc": 3604,\n "sasAf": 0.99967,\n "sasAn": 18160,\n "sasAc": 18154,\n "sasHc": 9074,\n "othAf": 1,\n "othAn": 3748,\n "othAc": 3748,\n "othHc": 1874,\n "maleAf": 0.9999,\n "maleAn": 69780,\n "maleAc": 69773,\n "maleHc": 34883,\n "femaleAf": 0.999796,\n "femaleAn": 53962,\n "femaleAc": 53951,\n "femaleHc": 26970,\n "controlsAllAf": 0.999815,\n "controlsAllAn": 48654,\n "controlsAllAc": 48645\n },\n "oneKg": {\n "allAf": 1,\n "afrAf": 1,\n "amrAf": 1,\n "easAf": 1,\n "eurAf": 1,\n "sasAf": 1,\n "allAn": 5008,\n "afrAn": 1322,\n "amrAn": 694,\n "easAn": 1008,\n "eurAn": 1006,\n "sasAn": 978,\n "allAc": 5008,\n "afrAc": 1322,\n "amrAc": 694,\n "easAc": 1008,\n "eurAc": 1006,\n "sasAc": 978\n },\n "primateAI": [\n {\n "hgnc": "SAMD11",\n "scorePercentile": 0.87\n }\n ],\n "revel": {\n "score": 0.145\n },\n "topmed": {\n "allAf": 0.999809,\n "allAn": 125568,\n "allAc": 125544,\n "allHc": 62760\n },\n "transcripts": [\n {\n "transcript": "ENST00000420190.6",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "downstream_gene_variant"\n ],\n "proteinId": "ENSP00000411579.2"\n },\n {\n "transcript": "ENST00000342066.7",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "1110",\n "cdsPos": "1027",\n "exons": "10/14",\n "proteinPos": "343",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000342066.7:c.1027T>C",\n "hgvsp": "ENSP00000342313.3:p.(Trp343Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000342313.3",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000618181.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "732",\n "cdsPos": "652",\n "exons": "7/11",\n "proteinPos": "218",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000618181.4:c.652T>C",\n "hgvsp": "ENSP00000480870.1:p.(Trp218Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000480870.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000622503.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "1110",\n "cdsPos": "1030",\n "exons": "10/14",\n "proteinPos": "344",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000622503.4:c.1030T>C",\n "hgvsp": "ENSP00000482138.1:p.(Trp344Arg)",\n "isCanonical": true,\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000482138.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000618323.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "cTg/cCg",\n "aminoAcids": "L/P",\n "cdnaPos": "712",\n "cdsPos": "632",\n "exons": "8/12",\n "proteinPos": "211",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000618323.4:c.632T>C",\n "hgvsp": "ENSP00000480678.1:p.(Leu211Pro)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "unknown",\n "proteinId": "ENSP00000480678.1",\n "siftScore": 0.03,\n "siftPrediction": "deleterious - low confidence"\n },\n {\n "transcript": "ENST00000616016.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "ccT/ccC",\n "aminoAcids": "P",\n "cdnaPos": "944",\n "cdsPos": "864",\n "exons": "9/13",\n "proteinPos": "288",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "synonymous_variant"\n ],\n "hgvsc": "ENST00000616016.4:c.864T>C",\n "hgvsp": "ENST00000616016.4:c.864T>C(p.(Pro288=))",\n "proteinId": "ENSP00000478421.1"\n },\n {\n "transcript": "ENST00000618779.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "921",\n "cdsPos": "841",\n "exons": "9/13",\n "proteinPos": "281",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000618779.4:c.841T>C",\n "hgvsp": "ENSP00000484256.1:p.(Trp281Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000484256.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000616125.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "783",\n "cdsPos": "703",\n "exons": "8/12",\n "proteinPos": "235",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000616125.4:c.703T>C",\n "hgvsp": "ENSP00000484643.1:p.(Trp235Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000484643.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000620200.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "cTg/cCg",\n "aminoAcids": "L/P",\n "cdnaPos": "427",\n "cdsPos": "347",\n "exons": "5/9",\n "proteinPos": "116",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000620200.4:c.347T>C",\n "hgvsp": "ENSP00000484820.1:p.(Leu116Pro)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "unknown",\n "proteinId": "ENSP00000484820.1",\n "siftScore": 0.16,\n "siftPrediction": "tolerated - low confidence"\n },\n {\n "transcript": "ENST00000617307.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "867",\n "cdsPos": "787",\n "exons": "9/13",\n "proteinPos": "263",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000617307.4:c.787T>C",\n "hgvsp": "ENSP00000482090.1:p.(Trp263Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000482090.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "NM_152486.2",\n "source": "RefSeq",\n "bioType": "protein_coding",\n "codons": "Cgg/Cgg",\n "aminoAcids": "R",\n "cdnaPos": "1107",\n "cdsPos": "1027",\n "exons": "10/14",\n "proteinPos": "343",\n "geneId": "148398",\n "hgnc": "SAMD11",\n "consequence": [\n "synonymous_variant"\n ],\n "hgvsc": "NM_152486.2:c.1027T>C",\n "hgvsp": "NM_152486.2:c.1027T>C(p.(Arg343=))",\n "isCanonical": true,\n "proteinId": "NP_689699.2"\n },\n {\n "transcript": "ENST00000341065.8",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "750",\n "cdsPos": "751",\n "exons": "8/12",\n "proteinPos": "251",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000341065.8:c.750T>C",\n "hgvsp": "ENSP00000349216.4:p.(Trp251Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000349216.4",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000455979.1",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "507",\n "cdsPos": "508",\n "exons": "4/7",\n "proteinPos": "170",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000455979.1:c.507T>C",\n "hgvsp": "ENSP00000412228.1:p.(Trp170Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000412228.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000478729.1",\n "source": "Ensembl",\n "bioType": "processed_transcript",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "downstream_gene_variant"\n ]\n },\n {\n "transcript": "ENST00000474461.1",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "cdnaPos": "389",\n "exons": "3/4",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "non_coding_transcript_exon_variant"\n ],\n "hgvsc": "ENST00000474461.1:n.389T>C"\n },\n {\n "transcript": "ENST00000466827.1",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "cdnaPos": "191",\n "exons": "2/2",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "non_coding_transcript_exon_variant"\n ],\n "hgvsc": "ENST00000466827.1:n.191T>C"\n },\n {\n "transcript": "ENST00000464948.1",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "cdnaPos": "286",\n "exons": "1/2",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "non_coding_transcript_exon_variant"\n ],\n "hgvsc": "ENST00000464948.1:n.286T>C"\n },\n {\n "transcript": "NM_015658.3",\n "source": "RefSeq",\n "bioType": "protein_coding",\n "geneId": "26155",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ],\n "isCanonical": true,\n "proteinId": "NP_056473.2"\n },\n {\n "transcript": "ENST00000483767.5",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "geneId": "ENSG00000188976",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ]\n },\n {\n "transcript": "ENST00000327044.6",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "geneId": "ENSG00000188976",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ],\n "isCanonical": true,\n "proteinId": "ENSP00000317992.6"\n },\n {\n "transcript": "ENST00000477976.5",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "geneId": "ENSG00000188976",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ]\n },\n {\n "transcript": "ENST00000496938.1",\n "source": "Ensembl",\n "bioType": "processed_transcript",\n "geneId": "ENSG00000188976",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ]\n }\n ]\n }\n ]\n}\n]}\n')))}p.isMDXComponent=!0},95421:(n,e,t)=>{t.d(e,{Z:()=>a});const a=t.p+"assets/images/JSON-Layout-fc8e5c0cf4c8428981cd206fe9b6feac.svg"}}]); \ No newline at end of file diff --git a/assets/js/749d13f9.1e9f4f7b.js b/assets/js/749d13f9.1e9f4f7b.js deleted file mode 100644 index eb729381..00000000 --- a/assets/js/749d13f9.1e9f4f7b.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2164],{3905:(e,t,n)=>{n.d(t,{Zo:()=>s,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var p=r.createContext({}),c=function(e){var t=r.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},s=function(e){var t=c(e.components);return r.createElement(p.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,p=e.parentName,s=i(e,["components","mdxType","originalType","parentName"]),u=c(n),m=a,f=u["".concat(p,".").concat(m)]||u[m]||d[m]||o;return n?r.createElement(f,l(l({ref:t},s),{},{components:n})):r.createElement(f,l({ref:t},s))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=m;var i={};for(var p in t)hasOwnProperty.call(t,p)&&(i[p]=t[p]);i.originalType=e,i[u]="string"==typeof e?e:a,l[1]=i;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>o,metadata:()=>i,toc:()=>p});var r=n(87462),a=(n(67294),n(3905));const o={},l=void 0,i={unversionedId:"data-sources/gnomad-lof-json",id:"version-3.14/data-sources/gnomad-lof-json",title:"gnomad-lof-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/gnomad-lof-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-lof-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/gnomad-lof-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/gnomad-lof-json.md",tags:[],version:"3.14",frontMatter:{}},p=[],c={toc:p},s="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(s,(0,r.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD":{ \n "pLi":1.00e0,\n "pNull":8.94e-40,\n "pRec":1.84e-16,\n "synZ":-8.44e-2,\n "misZ":5.96e-1,\n "loeuf":1.13e0\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"pLi"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"pNull"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"pRec"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"synZ"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"misZ"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/75a3a2eb.9fbe623d.js b/assets/js/75a3a2eb.9fbe623d.js deleted file mode 100644 index c7283ff0..00000000 --- a/assets/js/75a3a2eb.9fbe623d.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9767],{3905:(t,n,e)=>{e.d(n,{Zo:()=>d,kt:()=>N});var a=e(67294);function r(t,n,e){return n in t?Object.defineProperty(t,n,{value:e,enumerable:!0,configurable:!0,writable:!0}):t[n]=e,t}function l(t,n){var e=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(t,n).enumerable}))),e.push.apply(e,a)}return e}function i(t){for(var n=1;n=0||(r[e]=t[e]);return r}(t,n);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,e)&&(r[e]=t[e])}return r}var m=a.createContext({}),o=function(t){var n=a.useContext(m),e=n;return t&&(e="function"==typeof t?t(n):i(i({},n),t)),e},d=function(t){var n=o(t.components);return a.createElement(m.Provider,{value:n},t.children)},u="mdxType",g={inlineCode:"code",wrapper:function(t){var n=t.children;return a.createElement(a.Fragment,{},n)}},k=a.forwardRef((function(t,n){var e=t.components,r=t.mdxType,l=t.originalType,m=t.parentName,d=p(t,["components","mdxType","originalType","parentName"]),u=o(e),k=r,N=u["".concat(m,".").concat(k)]||u[k]||g[k]||l;return e?a.createElement(N,i(i({ref:n},d),{},{components:e})):a.createElement(N,i({ref:n},d))}));function N(t,n){var e=arguments,r=n&&n.mdxType;if("string"==typeof t||r){var l=e.length,i=new Array(l);i[0]=k;var p={};for(var m in n)hasOwnProperty.call(n,m)&&(p[m]=n[m]);p.originalType=t,p[u]="string"==typeof t?t:r,i[1]=p;for(var o=2;o{e.r(n),e.d(n,{contentTitle:()=>i,default:()=>u,frontMatter:()=>l,metadata:()=>p,toc:()=>m});var a=e(87462),r=(e(67294),e(3905));const l={title:"Transcript Consequence Impact"},i=void 0,p={unversionedId:"core-functionality/transcript-consequence-impacts",id:"core-functionality/transcript-consequence-impacts",title:"Transcript Consequence Impact",description:"Overview",source:"@site/docs/core-functionality/transcript-consequence-impacts.md",sourceDirName:"core-functionality",slug:"/core-functionality/transcript-consequence-impacts",permalink:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/transcript-consequence-impacts",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/core-functionality/transcript-consequence-impacts.md",tags:[],version:"current",frontMatter:{title:"Transcript Consequence Impact"},sidebar:"docs",previous:{title:"Canonical Transcripts",permalink:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/canonical-transcripts"},next:{title:"Gene Fusion Detection",permalink:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/gene-fusions"}},m=[{value:"Overview",id:"overview",children:[],level:2},{value:"Sources",id:"sources",children:[],level:2},{value:"Consequence Impacts",id:"consequence-impacts",children:[{value:"Known Issues",id:"known-issues",children:[],level:3}],level:2},{value:"Example Transcript",id:"example-transcript",children:[],level:2}],o={toc:m},d="wrapper";function u(t){let{components:n,...e}=t;return(0,r.kt)(d,(0,a.Z)({},o,e,{components:n,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"Illumina Connected Annotations provides transcript consequence impacts from ",(0,r.kt)("a",{parentName:"p",href:"https://pcingola.github.io/SnpEff"},"SnpEff"),"."),(0,r.kt)("p",null,"Following definitions are used for the impact ratings as obtained from ",(0,r.kt)("a",{parentName:"p",href:"https://github.com/pcingola/SnpEff/blob/master/src/docs/se_inputoutput.md#impact-prediction"},"SnpEff"),"."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Impact"),(0,r.kt)("th",{parentName:"tr",align:null},"Definition"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"The variant is assumed to have high (disruptive) impact in the protein, probably causing protein truncation, loss of function or triggering nonsense mediated decay.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"moderate"),(0,r.kt)("td",{parentName:"tr",align:null},"A non-disruptive variant that might change protein effectiveness.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"Assumed to be mostly harmless or unlikely to change protein behavior.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"Usually non-coding variants or variants affecting non-coding genes, where predictions are difficult or there is no evidence of impact.")))),(0,r.kt)("h2",{id:"sources"},"Sources"),(0,r.kt)("p",null,"Not all consequences are rated by SnpEff, therefore Illumina Connected Annotations combines the ratings from SnpEff with those from VEP."),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"SnpEff ",(0,r.kt)("a",{parentName:"li",href:"https://pcingola.github.io/SnpEff/se_inputoutput/"},"Documentation")," and ",(0,r.kt)("a",{parentName:"li",href:"https://github.com/pcingola/SnpEff/blob/001b947893b616e3af082e6c565e253eef59db98/src/main/java/org/snpeff/snpEffect/EffectType.java#L54"},"Codebase")),(0,r.kt)("li",{parentName:"ol"},"VEP ",(0,r.kt)("a",{parentName:"li",href:"https://useast.ensembl.org/info/genome/variation/prediction/predicted_data.html"},"Documentation"))),(0,r.kt)("h2",{id:"consequence-impacts"},"Consequence Impacts"),(0,r.kt)("p",null,"Following table gives the combined rating for all consequences recognized by Illumina Connected Annotations."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Consequence"),(0,r.kt)("th",{parentName:"tr",align:null},"SnpEff Impact"),(0,r.kt)("th",{parentName:"tr",align:null},"VEP Impact"),(0,r.kt)("th",{parentName:"tr",align:null},"Illumina Connected Annotations Impact"),(0,r.kt)("th",{parentName:"tr",align:null},"Comment"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"bidirectional_gene_fusion"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"coding_sequence_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"low, modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"Based on CDS")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_change"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_decrease"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_increase"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"downstream_gene_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"feature_elongation"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"feature_truncation"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"five_prime_duplicated_transcript"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"five_prime_UTR_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"frameshift_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"gene_fusion"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"incomplete_terminal_codon_variant"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"inframe_deletion"),(0,r.kt)("td",{parentName:"tr",align:null},"moderate"),(0,r.kt)("td",{parentName:"tr",align:null},"moderate"),(0,r.kt)("td",{parentName:"tr",align:null},"moderate"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"inframe_insertion"),(0,r.kt)("td",{parentName:"tr",align:null},"moderate"),(0,r.kt)("td",{parentName:"tr",align:null},"moderate"),(0,r.kt)("td",{parentName:"tr",align:null},"moderate"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"intron_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"mature_miRNA_variant"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"missense_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"moderate"),(0,r.kt)("td",{parentName:"tr",align:null},"moderate"),(0,r.kt)("td",{parentName:"tr",align:null},"moderate"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"NMD_transcript_variant"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"non_coding_transcript_exon_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"non_coding_transcript_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"protein_altering_variant"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"moderate"),(0,r.kt)("td",{parentName:"tr",align:null},"moderate"),(0,r.kt)("td",{parentName:"tr",align:null},"VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"regulatory_region_ablation"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"regulatory_region_amplification"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"regulatory_region_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"short_tandem_repeat_change"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"short_tandem_repeat_contraction"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"short_tandem_repeat_expansion"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"splice_acceptor_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"splice_donor_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"splice_region_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"moderate, low"),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"Based on SPLICE_SITE_REGION in SnpEff")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"start_lost"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"start_retained_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"stop_gained"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"stop_lost"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"stop_retained_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"synonymous_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"three_prime_duplicated_transcript"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"three_prime_UTR_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"transcript_ablation"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"transcript_amplification"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"transcript_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"unidirectional_gene_fusion"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"upstream_gene_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Note: ")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("ol",{parentName:"div"},(0,r.kt)("li",{parentName:"ol"},"For transcripts with multiple consequences, the most severe impact rating is chosen."),(0,r.kt)("li",{parentName:"ol"},"In case of consequences that do not have any impact rating from SnpEff or VEP, Illumina Connected Annotations provides ",(0,r.kt)("inlineCode",{parentName:"li"},"modifier"),".")))),(0,r.kt)("h3",{id:"known-issues"},"Known Issues"),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"The consequence ",(0,r.kt)("inlineCode",{parentName:"p"},"splice_polypyrimidine_tract_variant"),", is rated as ",(0,r.kt)("inlineCode",{parentName:"p"},"low")," by VEP.\nHowever, this consequence is not annotated by Illumina Connected Annotations, therefore the impact will also not be provided."))),(0,r.kt)("h2",{id:"example-transcript"},"Example Transcript"),(0,r.kt)("p",null,"The key ",(0,r.kt)("inlineCode",{parentName:"p"},"impact")," for each transcript gives the impact rating for the ",(0,r.kt)("inlineCode",{parentName:"p"},"consequence"),"."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json",metastring:"{20-24}","{20-24}":!0},'{\n "variants": [\n {\n "vid": "1-1623412-T-C",\n "chromosome": "1",\n "begin": 1623412,\n "end": 1623412,\n "refAllele": "T",\n "altAllele": "C",\n "variantType": "SNV",\n "hgvsg": "NC_000001.11:g.1623412T>C",\n "transcripts": [\n {\n "transcript": "ENST00000479659.5",\n "source": "Ensembl",\n "bioType": "lncRNA",\n "introns": "2/18",\n "geneId": "ENSG00000197530",\n "hgnc": "MIB2",\n "consequence": [\n "intron_variant",\n "non_coding_transcript_variant"\n ],\n "impact": "modifier",\n "hgvsc": "ENST00000479659.5:n.288-19T>C"\n },\n {\n "transcript": "ENST00000489635.5",\n "source": "VEP",\n "bioType": "mRNA",\n "codons": "aTg/aCg",\n "aminoAcids": "M/T",\n "cdnaPos": "269",\n "cdsPos": "134",\n "exons": "3/20",\n "proteinPos": "45",\n "geneId": "ENSG00000197530",\n "hgnc": "MIB2",\n "consequence": [\n "missense_variant"\n ],\n "impact": "moderate",\n "hgvsc": "ENST00000489635.5:c.134T>C",\n "hgvsp": "ENSP00000426007.1:p.(Met45Thr)",\n "proteinId": "ENSP00000426007.1"\n }\n ]\n }\n ]\n}\n')))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/75a3a2eb.a6960e59.js b/assets/js/75a3a2eb.a6960e59.js new file mode 100644 index 00000000..da36a8bb --- /dev/null +++ b/assets/js/75a3a2eb.a6960e59.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9767],{3905:(t,n,e)=>{e.d(n,{Zo:()=>d,kt:()=>N});var a=e(7294);function r(t,n,e){return n in t?Object.defineProperty(t,n,{value:e,enumerable:!0,configurable:!0,writable:!0}):t[n]=e,t}function l(t,n){var e=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(t,n).enumerable}))),e.push.apply(e,a)}return e}function i(t){for(var n=1;n=0||(r[e]=t[e]);return r}(t,n);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,e)&&(r[e]=t[e])}return r}var m=a.createContext({}),o=function(t){var n=a.useContext(m),e=n;return t&&(e="function"==typeof t?t(n):i(i({},n),t)),e},d=function(t){var n=o(t.components);return a.createElement(m.Provider,{value:n},t.children)},u="mdxType",g={inlineCode:"code",wrapper:function(t){var n=t.children;return a.createElement(a.Fragment,{},n)}},k=a.forwardRef((function(t,n){var e=t.components,r=t.mdxType,l=t.originalType,m=t.parentName,d=p(t,["components","mdxType","originalType","parentName"]),u=o(e),k=r,N=u["".concat(m,".").concat(k)]||u[k]||g[k]||l;return e?a.createElement(N,i(i({ref:n},d),{},{components:e})):a.createElement(N,i({ref:n},d))}));function N(t,n){var e=arguments,r=n&&n.mdxType;if("string"==typeof t||r){var l=e.length,i=new Array(l);i[0]=k;var p={};for(var m in n)hasOwnProperty.call(n,m)&&(p[m]=n[m]);p.originalType=t,p[u]="string"==typeof t?t:r,i[1]=p;for(var o=2;o{e.r(n),e.d(n,{contentTitle:()=>i,default:()=>u,frontMatter:()=>l,metadata:()=>p,toc:()=>m});var a=e(7462),r=(e(7294),e(3905));const l={title:"Transcript Consequence Impact"},i=void 0,p={unversionedId:"core-functionality/transcript-consequence-impacts",id:"core-functionality/transcript-consequence-impacts",title:"Transcript Consequence Impact",description:"Overview",source:"@site/docs/core-functionality/transcript-consequence-impacts.md",sourceDirName:"core-functionality",slug:"/core-functionality/transcript-consequence-impacts",permalink:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/transcript-consequence-impacts",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/core-functionality/transcript-consequence-impacts.md",tags:[],version:"current",frontMatter:{title:"Transcript Consequence Impact"},sidebar:"docs",previous:{title:"Canonical Transcripts",permalink:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/canonical-transcripts"},next:{title:"Gene Fusion Detection",permalink:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/gene-fusions"}},m=[{value:"Overview",id:"overview",children:[],level:2},{value:"Sources",id:"sources",children:[],level:2},{value:"Consequence Impacts",id:"consequence-impacts",children:[{value:"Known Issues",id:"known-issues",children:[],level:3}],level:2},{value:"Example Transcript",id:"example-transcript",children:[],level:2}],o={toc:m},d="wrapper";function u(t){let{components:n,...e}=t;return(0,r.kt)(d,(0,a.Z)({},o,e,{components:n,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"Illumina Connected Annotations provides transcript consequence impacts from ",(0,r.kt)("a",{parentName:"p",href:"https://pcingola.github.io/SnpEff"},"SnpEff"),"."),(0,r.kt)("p",null,"Following definitions are used for the impact ratings as obtained from ",(0,r.kt)("a",{parentName:"p",href:"https://github.com/pcingola/SnpEff/blob/master/src/docs/se_inputoutput.md#impact-prediction"},"SnpEff"),"."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Impact"),(0,r.kt)("th",{parentName:"tr",align:null},"Definition"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"The variant is assumed to have high (disruptive) impact in the protein, probably causing protein truncation, loss of function or triggering nonsense mediated decay.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"moderate"),(0,r.kt)("td",{parentName:"tr",align:null},"A non-disruptive variant that might change protein effectiveness.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"Assumed to be mostly harmless or unlikely to change protein behavior.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"Usually non-coding variants or variants affecting non-coding genes, where predictions are difficult or there is no evidence of impact.")))),(0,r.kt)("h2",{id:"sources"},"Sources"),(0,r.kt)("p",null,"Not all consequences are rated by SnpEff, therefore Illumina Connected Annotations combines the ratings from SnpEff with those from VEP."),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"SnpEff ",(0,r.kt)("a",{parentName:"li",href:"https://pcingola.github.io/SnpEff/se_inputoutput/"},"Documentation")," and ",(0,r.kt)("a",{parentName:"li",href:"https://github.com/pcingola/SnpEff/blob/001b947893b616e3af082e6c565e253eef59db98/src/main/java/org/snpeff/snpEffect/EffectType.java#L54"},"Codebase")),(0,r.kt)("li",{parentName:"ol"},"VEP ",(0,r.kt)("a",{parentName:"li",href:"https://useast.ensembl.org/info/genome/variation/prediction/predicted_data.html"},"Documentation"))),(0,r.kt)("h2",{id:"consequence-impacts"},"Consequence Impacts"),(0,r.kt)("p",null,"Following table gives the combined rating for all consequences recognized by Illumina Connected Annotations."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Consequence"),(0,r.kt)("th",{parentName:"tr",align:null},"SnpEff Impact"),(0,r.kt)("th",{parentName:"tr",align:null},"VEP Impact"),(0,r.kt)("th",{parentName:"tr",align:null},"Illumina Connected Annotations Impact"),(0,r.kt)("th",{parentName:"tr",align:null},"Comment"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"bidirectional_gene_fusion"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"coding_sequence_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"low, modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"Based on CDS")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_change"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_decrease"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_increase"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"downstream_gene_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"feature_elongation"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"feature_truncation"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"five_prime_duplicated_transcript"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"five_prime_UTR_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"frameshift_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"gene_fusion"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"incomplete_terminal_codon_variant"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"inframe_deletion"),(0,r.kt)("td",{parentName:"tr",align:null},"moderate"),(0,r.kt)("td",{parentName:"tr",align:null},"moderate"),(0,r.kt)("td",{parentName:"tr",align:null},"moderate"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"inframe_insertion"),(0,r.kt)("td",{parentName:"tr",align:null},"moderate"),(0,r.kt)("td",{parentName:"tr",align:null},"moderate"),(0,r.kt)("td",{parentName:"tr",align:null},"moderate"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"intron_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"mature_miRNA_variant"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"missense_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"moderate"),(0,r.kt)("td",{parentName:"tr",align:null},"moderate"),(0,r.kt)("td",{parentName:"tr",align:null},"moderate"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"NMD_transcript_variant"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"non_coding_transcript_exon_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"non_coding_transcript_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"protein_altering_variant"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"moderate"),(0,r.kt)("td",{parentName:"tr",align:null},"moderate"),(0,r.kt)("td",{parentName:"tr",align:null},"VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"regulatory_region_ablation"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"regulatory_region_amplification"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"regulatory_region_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"short_tandem_repeat_change"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"short_tandem_repeat_contraction"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"short_tandem_repeat_expansion"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"splice_acceptor_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"splice_donor_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"splice_region_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"moderate, low"),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"Based on SPLICE_SITE_REGION in SnpEff")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"start_lost"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"start_retained_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"stop_gained"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"stop_lost"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"stop_retained_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"synonymous_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"low"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"three_prime_duplicated_transcript"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"three_prime_UTR_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"transcript_ablation"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"transcript_amplification"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"VEP")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"transcript_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"unidirectional_gene_fusion"),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null}),(0,r.kt)("td",{parentName:"tr",align:null},"high"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"upstream_gene_variant"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"modifier"),(0,r.kt)("td",{parentName:"tr",align:null},"SnpEff + VEP")))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Note: ")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("ol",{parentName:"div"},(0,r.kt)("li",{parentName:"ol"},"For transcripts with multiple consequences, the most severe impact rating is chosen."),(0,r.kt)("li",{parentName:"ol"},"In case of consequences that do not have any impact rating from SnpEff or VEP, Illumina Connected Annotations provides ",(0,r.kt)("inlineCode",{parentName:"li"},"modifier"),".")))),(0,r.kt)("h3",{id:"known-issues"},"Known Issues"),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"The consequence ",(0,r.kt)("inlineCode",{parentName:"p"},"splice_polypyrimidine_tract_variant"),", is rated as ",(0,r.kt)("inlineCode",{parentName:"p"},"low")," by VEP.\nHowever, this consequence is not annotated by Illumina Connected Annotations, therefore the impact will also not be provided."))),(0,r.kt)("h2",{id:"example-transcript"},"Example Transcript"),(0,r.kt)("p",null,"The key ",(0,r.kt)("inlineCode",{parentName:"p"},"impact")," for each transcript gives the impact rating for the ",(0,r.kt)("inlineCode",{parentName:"p"},"consequence"),"."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json",metastring:"{20-24}","{20-24}":!0},'{\n "variants": [\n {\n "vid": "1-1623412-T-C",\n "chromosome": "1",\n "begin": 1623412,\n "end": 1623412,\n "refAllele": "T",\n "altAllele": "C",\n "variantType": "SNV",\n "hgvsg": "NC_000001.11:g.1623412T>C",\n "transcripts": [\n {\n "transcript": "ENST00000479659.5",\n "source": "Ensembl",\n "bioType": "lncRNA",\n "introns": "2/18",\n "geneId": "ENSG00000197530",\n "hgnc": "MIB2",\n "consequence": [\n "intron_variant",\n "non_coding_transcript_variant"\n ],\n "impact": "modifier",\n "hgvsc": "ENST00000479659.5:n.288-19T>C"\n },\n {\n "transcript": "ENST00000489635.5",\n "source": "VEP",\n "bioType": "mRNA",\n "codons": "aTg/aCg",\n "aminoAcids": "M/T",\n "cdnaPos": "269",\n "cdsPos": "134",\n "exons": "3/20",\n "proteinPos": "45",\n "geneId": "ENSG00000197530",\n "hgnc": "MIB2",\n "consequence": [\n "missense_variant"\n ],\n "impact": "moderate",\n "hgvsc": "ENST00000489635.5:c.134T>C",\n "hgvsp": "ENSP00000426007.1:p.(Met45Thr)",\n "proteinId": "ENSP00000426007.1"\n }\n ]\n }\n ]\n}\n')))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/75a881fd.9ba61c43.js b/assets/js/75a881fd.9ba61c43.js deleted file mode 100644 index 825402b3..00000000 --- a/assets/js/75a881fd.9ba61c43.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7616],{3905:(t,e,a)=>{a.d(e,{Zo:()=>p,kt:()=>g});var n=a(67294);function l(t,e,a){return e in t?Object.defineProperty(t,e,{value:a,enumerable:!0,configurable:!0,writable:!0}):t[e]=a,t}function r(t,e){var a=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),a.push.apply(a,n)}return a}function i(t){for(var e=1;e=0||(l[a]=t[a]);return l}(t,e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,a)&&(l[a]=t[a])}return l}var s=n.createContext({}),m=function(t){var e=n.useContext(s),a=e;return t&&(a="function"==typeof t?t(e):i(i({},e),t)),a},p=function(t){var e=m(t.components);return n.createElement(s.Provider,{value:e},t.children)},d="mdxType",k={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},N=n.forwardRef((function(t,e){var a=t.components,l=t.mdxType,r=t.originalType,s=t.parentName,p=o(t,["components","mdxType","originalType","parentName"]),d=m(a),N=l,g=d["".concat(s,".").concat(N)]||d[N]||k[N]||r;return a?n.createElement(g,i(i({ref:e},p),{},{components:a})):n.createElement(g,i({ref:e},p))}));function g(t,e){var a=arguments,l=e&&e.mdxType;if("string"==typeof t||l){var r=a.length,i=new Array(r);i[0]=N;var o={};for(var s in e)hasOwnProperty.call(e,s)&&(o[s]=e[s]);o.originalType=t,o[d]="string"==typeof t?t:l,i[1]=o;for(var m=2;m{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>d,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var n=a(87462),l=(a(67294),a(3905));const r={title:"Custom Annotations"},i=void 0,o={unversionedId:"file-formats/custom-annotations",id:"version-3.14/file-formats/custom-annotations",title:"Custom Annotations",description:"Overview",source:"@site/versioned_docs/version-3.14/file-formats/custom-annotations.md",sourceDirName:"file-formats",slug:"/file-formats/custom-annotations",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/file-formats/custom-annotations",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/file-formats/custom-annotations.md",tags:[],version:"3.14",frontMatter:{title:"Custom Annotations"},sidebar:"version-3.14/docs",previous:{title:"Nirvana JSON File Format",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/file-formats/nirvana-json-file-format"},next:{title:"Variant IDs",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/core-functionality/variant-ids"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Variant File Format",id:"variant-file-format",children:[{value:"Basic Allele Frequency Example",id:"basic-allele-frequency-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv",children:[],level:4},{value:"Convert to Nirvana Format",id:"convert-to-nirvana-format",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results",children:[],level:4}],level:3},{value:"Categories & Descriptions Example",id:"categories--descriptions-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-1",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana-1",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-1",children:[],level:4},{value:"Using Positional Matches",id:"using-positional-matches",children:[],level:4}],level:3},{value:"Genomic Region Example",id:"genomic-region-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-2",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana-2",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-2",children:[],level:4}],level:3},{value:"Mixing Small Variants and Genomic Regions",id:"mixing-small-variants-and-genomic-regions",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-3",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana-3",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-3",children:[],level:4}],level:3}],level:2},{value:"Gene File Format",id:"gene-file-format",children:[{value:"Basic Gene Example",id:"basic-gene-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-4",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana-4",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-4",children:[],level:4}],level:3}],level:2},{value:"Customizing the Header",id:"customizing-the-header",children:[{value:"Title",id:"title",children:[],level:3},{value:"Genome Assemblies",id:"genome-assemblies",children:[],level:3},{value:"Matching Criteria",id:"matching-criteria",children:[],level:3},{value:"Categories",id:"categories",children:[],level:3},{value:"Descriptions",id:"descriptions",children:[{value:"Populations",id:"populations",children:[],level:4}],level:3},{value:"Data Types",id:"data-types",children:[],level:3}],level:2},{value:"Using SAUtils",id:"using-sautils",children:[{value:"Convert Variant File",id:"convert-variant-file",children:[],level:3},{value:"Convert Gene File",id:"convert-gene-file",children:[],level:3}],level:2}],m={toc:s},p="wrapper";function d(t){let{components:e,...a}=t;return(0,l.kt)(p,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,l.kt)("h2",{id:"overview"},"Overview"),(0,l.kt)("p",null,"While the team tries to keep data sources up-to-date, you might want to start incorporate new annotations ahead of our update cycle. Another\ncommon use case involves protected health information (PHI). Custom annotations are a mechanism that enables both use cases."),(0,l.kt)("p",null,"Here are some examples of how our collaborators use custom annotations:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"associating context from both a patient-level and a patient cohort level with the variant annotations"),(0,l.kt)("li",{parentName:"ul"},"adding content that is licensed (e.g. HGMD) to the variant annotations")),(0,l.kt)("p",null,"At the moment, we have two different custom annotation file formats. One provides additional annotations to variants (both small variants and SVs)\nwhile the other caters to gene annotations."),(0,l.kt)("p",null,"In both cases, the custom annotation file format is a tab-delimited file that is separated into two parts: the header & the data."),(0,l.kt)("p",null,"The header is where you can customize how you want the data to appear in the JSON file and provide context about the genome assembly and how\nNirvana should match the variants."),(0,l.kt)("p",null,"At Illumina, there are usually many components downstream of Nirvana that have to parse our annotations. If a customer provides a custom\nannotation, those downstream tools need to understand more about the data such as:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"data type (e.g. number, boolean, or a string)"),(0,l.kt)("li",{parentName:"ul"},"data category (e.g. is this an allele count, allele number, allele frequency, etc.)"),(0,l.kt)("li",{parentName:"ul"},"associated population (i.e. if this is an allele frequency)")),(0,l.kt)("p",null,"For each custom annotation, Nirvana uses this context to create a ",(0,l.kt)("a",{parentName:"p",href:"https://json-schema.org/"},"JSON schema")," that can be sent to downstream tools. If\na tool knows that this is an allele frequency, it can validate user input to ensure that it's in the range of ","[0, 1]","."),(0,l.kt)("h2",{id:"variant-file-format"},"Variant File Format"),(0,l.kt)("h3",{id:"basic-allele-frequency-example"},"Basic Allele Frequency Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Imagine that you want to create a basic allele frequency custom annotation for small variants. If we visualized the tab-delimited file\n(TSV), it would look something like this:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allAf")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleFrequency")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"number")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"23603511"),(0,l.kt)("td",{parentName:"tr",align:"left"},"TGA"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006579")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"68801894"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006569")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr19"),(0,l.kt)("td",{parentName:"tr",align:"left"},"11107436"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.00003291")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over the header and discuss the contents:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"title")," indicates the name of the JSON key"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"assembly")," indicates that this data is only valid for ",(0,l.kt)("inlineCode",{parentName:"li"},"GRCh38")),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"matchVariantsBy")," indicates that we should only match the annotations if they are allele-specific"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"categories")," provides hints to downstream tools on how they might want to treat the data. In this case, we indicate that it's an allele\nfrequency."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"descriptions")," are used in special circumstances to provide more context. Even though column 5 is called ",(0,l.kt)("inlineCode",{parentName:"li"},"allAf"),", it might not be clear to a\ndownstream tool that this means a global allele frequency using all sub-populations. In this case, ",(0,l.kt)("inlineCode",{parentName:"li"},"ALL")," indicates the intended population."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"type")," indicates to downstream tools the data type. Since allele frequencies are numbers, we'll write ",(0,l.kt)("inlineCode",{parentName:"li"},"number")," in this column.")),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Reference Base Checking")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Nirvana validates all the reference bases in a custom annotation. If a variant or genomic region is specified that has the wrong reference base, an error will be produced."))),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Sorting")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"The variants within each chromosome must be sorted by genomic position."))),(0,l.kt)("h4",{id:"convert-to-nirvana-format"},"Convert to Nirvana Format"),(0,l.kt)("p",null,"First we need to convert the TSV file to Nirvana's native file format and let's put that file in a new directory called CA:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"$ mkdir CA\n$ dotnet bin/Release/netcoreapp2.1/SAUtils.dll customvar \\\n -r Data/References/Homo_sapiens.GRCh38.Nirvana.dat -i MyDataSource.tsv -o CA\n---------------------------------------------------------------------------\nSAUtils (c) 2020 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0\n---------------------------------------------------------------------------\n\nChromosome 16 completed in 00:00:00.1\nChromosome 19 completed in 00:00:00.0\n\nTime: 00:00:00.2\n")),(0,l.kt)("h4",{id:"annotate-with-nirvana"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's annotate the following VCF (notice that it's one of the variants that we have in our custom annotation):"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n16 68801894 . G A . . .\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA.vcf"},"the full VCF file"),"."),(0,l.kt)("p",null,"Since Nirvana can handle multiple directories with external annotations, all we need to do is specify our new CA directory in addition to\nthe normal Nirvana command-line."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash",metastring:"{3}","{3}":!0},"$ dotnet bin/Release/netcoreapp2.1/Nirvana.dll -c Data/Cache/GRCh38/Both \\\n -r Data/References/Homo_sapiens.GRCh38.Nirvana.dat \\\n --sd Data/SupplementaryAnnotation/GRCh38 --sd CA -i TestCA.vcf -o TestCA\n---------------------------------------------------------------------------\nNirvana (c) 2020 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0\n---------------------------------------------------------------------------\n\nInitialization Time Positions/s\n---------------------------------------------------------------------------\nCache 00:00:01.8\nSA Position Scan 00:00:00.0 19\n\nReference Preload Annotation Variants/s\n---------------------------------------------------------------------------\nchr16 00:00:00.2 00:00:01.3 1\n\nSummary Time Percent\n---------------------------------------------------------------------------\nInitialization 00:00:01.9 25.5 %\nPreload 00:00:00.2 3.3 %\nAnnotation 00:00:01.3 18.2 %\n\nTime: 00:00:06.3\n")),(0,l.kt)("h4",{id:"investigate-the-results"},"Investigate the Results"),(0,l.kt)("p",null,"We would expect the following data to show up in our JSON output file:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-16}","{12-16}":!0},' "variants": [\n {\n "vid": "16-68801894-G-A",\n "chromosome": "16",\n "begin": 68801894,\n "end": 68801894,\n "refAllele": "G",\n "altAllele": "A",\n "variantType": "SNV",\n "hgvsg": "NC_000016.10:g.68801894G>A",\n "phylopScore": 1,\n "MyDataSource": {\n "refAllele": "G",\n "altAllele": "A",\n "allAf": 7e-06\n },\n "clinvar": [\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA.json.gz"},"the full JSON file"),"."),(0,l.kt)("p",null,"Nirvana preserves up to 6 decimal places for allele frequency data."),(0,l.kt)("h3",{id:"categories--descriptions-example"},"Categories & Descriptions Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-1"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Building on the previous example, we can add other types of annotations like predictions and general notes."),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 6"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 7"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,l.kt)("td",{parentName:"tr",align:"left"},"pathogenicity"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleFrequency"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Prediction"),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"number"),(0,l.kt)("td",{parentName:"tr",align:"left"},"string"),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"23603511"),(0,l.kt)("td",{parentName:"tr",align:"left"},"TGA"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006579"),(0,l.kt)("td",{parentName:"tr",align:"left"},"P"),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"68801894"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006569"),(0,l.kt)("td",{parentName:"tr",align:"left"},"LP"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Seen in case 123")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr19"),(0,l.kt)("td",{parentName:"tr",align:"left"},"11107436"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.00003291"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource2.tsv"},"the full TSV file"),"."),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Placeholders")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"You can use a period to denote an empty value (much in the same way as periods are used in VCF files to signify missing values). While\nNirvana also accepts empty columns in the TSV file, we use them in these examples to promote readability."))),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 6")," adds a field called ",(0,l.kt)("inlineCode",{parentName:"li"},"pathogenicity")," which uses the ",(0,l.kt)("inlineCode",{parentName:"li"},"Prediction")," category. When using this category, Nirvana will\nvalidate to make\nsure that the field contains either the abbreviations (B, LB, VUS, LP, and P) or the long-form equivalents (e.g. benign or pathogenic)."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 7")," adds a field called ",(0,l.kt)("inlineCode",{parentName:"li"},"notes")," and it doesn't have a category or description. We're just going to use it to add some internal\nnotes.")),(0,l.kt)("h4",{id:"annotate-with-nirvana-1"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's use a new VCF file. It includes all the same positions as our custom annotation file, but only the middle variant also matches the\nalternate allele (allele-specific match):"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n16 23603511 . TG T . . .\n16 68801894 . G A . . .\n19 11107436 . G C . . .\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA2.vcf"},"the full VCF file"),"."),(0,l.kt)("h4",{id:"investigate-the-results-1"},"Investigate the Results"),(0,l.kt)("p",null,"Because we specified ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=allele")," in our custom annotation file, only the middle variant will get an annotation:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-18}","{12-18}":!0},' "variants": [\n {\n "vid": "16-68801894-G-A",\n "chromosome": "16",\n "begin": 68801894,\n "end": 68801894,\n "refAllele": "G",\n "altAllele": "A",\n "variantType": "SNV",\n "hgvsg": "NC_000016.10:g.68801894G>A",\n "phylopScore": 1,\n "MyDataSource": {\n "refAllele": "G",\n "altAllele": "A",\n "allAf": 7e-06,\n "pathogenicity": "LP",\n "notes": "Seen in case 123"\n },\n "clinvar": [\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA2.json.gz"},"the full JSON file"),"."),(0,l.kt)("h4",{id:"using-positional-matches"},"Using Positional Matches"),(0,l.kt)("p",null,"What would happen if we changed to ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=position"),"? Two things will happen. First, our positional variants will now match:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-17}","{12-17}":!0},' "variants": [\n {\n "vid": "16-23603511-TG-T",\n "chromosome": "16",\n "begin": 23603512,\n "end": 23603512,\n "refAllele": "G",\n "altAllele": "-",\n "variantType": "deletion",\n "hgvsg": "NC_000016.10:g.23603512delG",\n "MyDataSource": [\n {\n "refAllele": "GA",\n "altAllele": "-",\n "allAf": 7e-06,\n "pathogenicity": "P"\n }\n ],\n "clinvar": [\n')),(0,l.kt)("p",null,"In addition, you will now see an extra flag for our allele-specific variant:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-20}","{12-20}":!0},' "variants": [\n {\n "vid": "16-68801894-G-A",\n "chromosome": "16",\n "begin": 68801894,\n "end": 68801894,\n "refAllele": "G",\n "altAllele": "A",\n "variantType": "SNV",\n "hgvsg": "NC_000016.10:g.68801894G>A",\n "phylopScore": 1,\n "MyDataSource": [\n {\n "refAllele": "G",\n "altAllele": "A",\n "allAf": 7e-06,\n "pathogenicity": "LP",\n "notes": "Seen in case 123",\n "isAlleleSpecific": true\n }\n ],\n "clinvar": [\n')),(0,l.kt)("h3",{id:"genomic-region-example"},"Genomic Region Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-2"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"In the previous example, we added a note for the middle variant, but sometimes it's handy to annotate a genomic region. Consider the following example:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"END"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"20000000"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"70000000"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Lots of false positives in this region")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource3.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 5")," now has a field called ",(0,l.kt)("inlineCode",{parentName:"li"},"notes"),". In essence, it looks exactly like column 7 from our previous example."),(0,l.kt)("li",{parentName:"ul"},"The main difference is that now one of our custom annotation entries is actually a genomic region. Any variant that overlaps with that region will get a custom annotation.")),(0,l.kt)("p",null,"In the previous example we learned about positional matching vs allele-specific matching. For genomic regions, ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=allele")," and ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=position")," produce\nthe same result."),(0,l.kt)("h4",{id:"annotate-with-nirvana-2"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's use the same VCF file as our previous example."),(0,l.kt)("h4",{id:"investigate-the-results-2"},"Investigate the Results"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{9-17}","{9-17}":!0},' {\n "chromosome": "16",\n "position": 23603511,\n "refAllele": "TG",\n "altAlleles": [\n "T"\n ],\n "cytogeneticBand": "16p12.2",\n "MyDataSource": [\n {\n "start": 20000000,\n "end": 70000000,\n "notes": "Lots of false positives in this region",\n "reciprocalOverlap": 0,\n "annotationOverlap": 0\n }\n ],\n "variants": [\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA3.json.gz"},"the full JSON file"),"."),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Reciprocal & Annotation Overlap")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"For all intervals, Nirvana internally calculates two overlaps: a ",(0,l.kt)("strong",{parentName:"p"},"variant overlap")," and an ",(0,l.kt)("strong",{parentName:"p"},"annotation overlap"),". Variant overlap is the percentage of the variant's length that is\noverlapped. Annotation overlap is the percentage of the annotation's length that is overlap. "),(0,l.kt)("p",{parentName:"div"},(0,l.kt)("strong",{parentName:"p"},"Reciprocal overlap")," is the minimum of those two overlaps. Given that the annotation is 50 Mbp and the deletion is one 1 bp, both overlaps will be pretty close to 0."))),(0,l.kt)("p",null,"We will also see this annotation for the other variant on chr16:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{9-17}","{9-17}":!0},' {\n "chromosome": "16",\n "position": 68801894,\n "refAllele": "G",\n "altAlleles": [\n "A"\n ],\n "cytogeneticBand": "16q22.1",\n "MyDataSource": [\n {\n "start": 20000000,\n "end": 70000000,\n "notes": "Lots of false positives in this region",\n "reciprocalOverlap": 0,\n "annotationOverlap": 0\n }\n ],\n "variants": [\n')),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Targeting Structural Variants")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Often we use genomic regions to represent other known CNVs and SVs in the genome. In this use case, we usually don't want to match these regions to other small variants. To\nforce Nirvana to match regions only to other SVs, use the ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=sv")," option in the header."))),(0,l.kt)("h3",{id:"mixing-small-variants-and-genomic-regions"},"Mixing Small Variants and Genomic Regions"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-3"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Previously we looked at examples that either had small variants or genomic regions. Let's create a file that contains both:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 6"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"END"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"23603511"),(0,l.kt)("td",{parentName:"tr",align:"left"},"TGA"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"68801894"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr19"),(0,l.kt)("td",{parentName:"tr",align:"left"},"11107436"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr21"),(0,l.kt)("td",{parentName:"tr",align:"left"},"10510818"),(0,l.kt)("td",{parentName:"tr",align:"left"},"C"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"10699435"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Interval #1")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr21"),(0,l.kt)("td",{parentName:"tr",align:"left"},"10510818"),(0,l.kt)("td",{parentName:"tr",align:"left"},"C"),(0,l.kt)("td",{parentName:"tr",align:"left"},"<","DEL",">"),(0,l.kt)("td",{parentName:"tr",align:"left"},"10699435"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Interval #2")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr22"),(0,l.kt)("td",{parentName:"tr",align:"left"},"12370388"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T[chr22:12370729["),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"Known false-positive")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource4.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 4")," now has the ",(0,l.kt)("inlineCode",{parentName:"li"},"REF")," field. Exception for the case listed below, this is only used by small variants or translocation breakends."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 5")," now has the ",(0,l.kt)("inlineCode",{parentName:"li"},"END")," field. This is only used by genomic regions."),(0,l.kt)("li",{parentName:"ul"},"There are two custom annotations on chr21 and the start and end coordinates look the same, so what's different? Interval #2 has ",(0,l.kt)("strong",{parentName:"li"},"a symbolic allele in the ALT column"),". When this is used in custom annotation, the start position is treated as the padding base (using VCF conventions). When Nirvana matches a variant to interval #2, it will ignore the padding base and consider the start position to be at position 10510819.")),(0,l.kt)("h4",{id:"annotate-with-nirvana-3"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's use a new VCF file to study how matching works for intervals #1 and #2:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n21 10510818 . C . . END=10699435;SVTYPE=DUP\n22 12370388 . T T[chr22:12370729[ . . SVTYPE=BND\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA3.vcf"},"the full VCF file"),"."),(0,l.kt)("p",null,'The first variant is similar to the custom annotation labelled "interval #2". Position 10510818 is the padding base, so it effectively starts at position 10510819.'),(0,l.kt)("h4",{id:"investigate-the-results-3"},"Investigate the Results"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{11-26}","{11-26}":!0},' "positions": [\n {\n "chromosome": "21",\n "position": 10510818,\n "svEnd": 10699435,\n "refAllele": "C",\n "altAlleles": [\n ""\n ],\n "cytogeneticBand": "21p11.2",\n "MyDataSource": [\n {\n "start": 10510818,\n "end": 10699435,\n "notes": "Interval #1",\n "reciprocalOverlap": 0.99999,\n "annotationOverlap": 0.99999\n },\n {\n "start": 10510819,\n "end": 10699435,\n "notes": "Interval #2",\n "reciprocalOverlap": 1,\n "annotationOverlap": 1\n }\n ],\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA4.json.gz"},"the full JSON file"),"."),(0,l.kt)("p",null,"As expected, the variant and interval #2 have matching endpoints, therefore there is 100% overlap. Interval #1 technically starts 1 bp earlier, so its overlap 99.9%."),(0,l.kt)("p",null,"Further down the JSON file, we find the annotated translocation breakend:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{11-15}","{11-15}":!0},' "variants": [\n {\n "vid": "22-12370388-T-T[chr22:12370729[",\n "chromosome": "22",\n "begin": 12370388,\n "end": 12370388,\n "isStructuralVariant": true,\n "refAllele": "T",\n "altAllele": "T[chr22:12370729[",\n "variantType": "translocation_breakend",\n "MyDataSource": {\n "refAllele": "T",\n "altAllele": "T[chr22:12370729[",\n "notes": "Known false-positive"\n }\n }\n')),(0,l.kt)("h2",{id:"gene-file-format"},"Gene File Format"),(0,l.kt)("h3",{id:"basic-gene-example"},"Basic Gene Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-4"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Previously we looked at examples that either had small variants or genomic regions, however, sometimes we would like to add custom gene annotations. The gene custom annotation file format\nlooks slightly different:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#geneSymbol"),(0,l.kt)("td",{parentName:"tr",align:"left"},"geneId"),(0,l.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string"),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"TP53"),(0,l.kt)("td",{parentName:"tr",align:"left"},"7157"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Colorectal cancer, hereditary nonpolyposis, type 5"),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"KRAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ENSG00000133703"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mismatch repair cancer syndrome"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Seen in cohort 123")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource5.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 2")," has the ",(0,l.kt)("inlineCode",{parentName:"li"},"geneId")," field. This can be either an ",(0,l.kt)("strong",{parentName:"li"},"Entrez Gene ID")," or an ",(0,l.kt)("strong",{parentName:"li"},"Ensembl ID"),".")),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Gene Symbols")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Gene symbols are always in flux and are being updated on a daily basis at the NCBI and at HGNC. Due to this, Nirvana uses the ",(0,l.kt)("inlineCode",{parentName:"p"},"geneId")," to match genes rather than the gene symbol. However, to\nmake the custom annotation files easier to read, we've included the ",(0,l.kt)("inlineCode",{parentName:"p"},"geneSymbol")," column as well."))),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Unknown Gene IDs")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"When Nirvana parses the gene custom annotation file, it will note any gene IDs that are currently not recognized in Nirvana. In such a case, Nirvana will display an error showing all the\nunrecognized gene IDs."))),(0,l.kt)("h4",{id:"annotate-with-nirvana-4"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's use a VCF file that contain variants in TP53 and KRAS:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n12 25227255 . A T . . .\n17 7675074 . C A . . .\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA4.vcf"},"the full VCF file"),"."),(0,l.kt)("h4",{id:"investigate-the-results-4"},"Investigate the Results"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{24-27}","{24-27}":!0},' "genes": [\n {\n "name": "KRAS",\n "clingenGeneValidity": [\n {\n "diseaseId": "MONDO_0009026",\n "disease": "Costello syndrome",\n "classification": "disputed",\n "classificationDate": "2018-07-24"\n }\n ],\n "clingenDosageSensitivityMap": {\n "haploinsufficiency": "no evidence to suggest that dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "no evidence to suggest that dosage sensitivity is associated with clinical phenotype"\n },\n "gnomAD": {\n "pLi": 0.000788,\n "pRec": 0.789,\n "pNull": 0.21,\n "synZ": 0.336,\n "misZ": 2.32,\n "loeuf": 1.24\n },\n "MyDataSource": {\n "phenotype": "Mismatch repair cancer syndrome",\n "notes": "Seen in cohort 123"\n }\n },\n')),(0,l.kt)("p",null,"This is the abbreviated output for KRAS. Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA5.json.gz"},"the full JSON file")," if you want to see the complete KRAS entry."),(0,l.kt)("h2",{id:"customizing-the-header"},"Customizing the Header"),(0,l.kt)("h3",{id:"title"},"Title"),(0,l.kt)("p",null,"For the title, you can provide any string that hasn't already been used. The title should be unique."),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Make sure that the title does not conflict with other keys in the JSON file."))),(0,l.kt)("p",null,"For small variants, you can't provide a title that conflicts with other keys in the variant object. Some examples of this would be\n",(0,l.kt)("inlineCode",{parentName:"p"},"vid"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"chromosome"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"transcripts"),", etc.. The title should also not conflict with other data source keys like ",(0,l.kt)("inlineCode",{parentName:"p"},"clinvar")," or ",(0,l.kt)("inlineCode",{parentName:"p"},"gnomad"),"."),(0,l.kt)("p",null,"For structural variants, you can't provide a title that conflicts with other keys in the position object. Some examples of this would be\n",(0,l.kt)("inlineCode",{parentName:"p"},"chromosome"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"svLength"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"cytogeneticBand"),", etc. The title should also not conflict with other data source keys like ",(0,l.kt)("inlineCode",{parentName:"p"},"clingen")," or ",(0,l.kt)("inlineCode",{parentName:"p"},"dgv"),"."),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Care should be taken not to annotate using multiple custom annotations that all use the same title."))),(0,l.kt)("h3",{id:"genome-assemblies"},"Genome Assemblies"),(0,l.kt)("p",null,"The following genome assemblies can be specified:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"GRCh37"),(0,l.kt)("li",{parentName:"ul"},"GRCh38")),(0,l.kt)("h3",{id:"matching-criteria"},"Matching Criteria"),(0,l.kt)("p",null,"The matching criteria instructs how Nirvana should match a VCF variant to the custom annotation."),(0,l.kt)("p",null,"The following matching criteria can be specified:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"allele")," - use this when you only want allele-specific matches. This is commonly the case when using allele frequency data sources like ",(0,l.kt)("inlineCode",{parentName:"li"},"gnomAD")),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"position")," - use this when you want positional matches. This is commonly used with disease phenotype data sources like ",(0,l.kt)("inlineCode",{parentName:"li"},"ClinVar")),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"sv")," - use this when you want to match to all other overlapping SVs. This use case arose when we were adding custom annotations for baseline\ncopy number intervals along the genome.")),(0,l.kt)("h3",{id:"categories"},"Categories"),(0,l.kt)("p",null,"Categories are not used by Nirvana, but are often used by downstream tools. Categories provide hints for how those tools should filter or display\nthe annotation data."),(0,l.kt)("p",null,"When a category is specified, Nirvana will provide additional validation for those fields. The following table describes each category:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Category"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Description"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Validation"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleCount"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allele counts for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleNumber"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allele numbers for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleFrequency"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allele frequencies for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Prediction"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ACMG-style pathogenicity classifications"),(0,l.kt)("td",{parentName:"tr",align:"left"},"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"benign")," (B)",(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"likely benign")," (LB)",(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"VUS"),(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"likely pathogenic")," (LP)",(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"pathogenic")," (P)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Filter"),(0,l.kt)("td",{parentName:"tr",align:"left"},"free text that signals downstream tools to add the column to the filter"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Max 20 characters")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Description"),(0,l.kt)("td",{parentName:"tr",align:"left"},"free-text description"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Max 100 characters")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Identifier"),(0,l.kt)("td",{parentName:"tr",align:"left"},"any ID"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Max 50 characters")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"HomozygousCount"),(0,l.kt)("td",{parentName:"tr",align:"left"},"count of homozygous individuals for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Score"),(0,l.kt)("td",{parentName:"tr",align:"left"},"any score value"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Any double-precision floating point number")))),(0,l.kt)("h3",{id:"descriptions"},"Descriptions"),(0,l.kt)("p",null,"Descriptions are used to add more context to the categories. For now, descriptions are mainly used to associate allele counts, numbers, and frequencies with their respective populations."),(0,l.kt)("h4",{id:"populations"},"Populations"),(0,l.kt)("p",null,"The following populations were specified in the HapMap project, 1000 Genomes Project, ExAC, and gnomAD."),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Population Code"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Super-population Code"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Description"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ACB"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"African Caribbeans in Barbados")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"African")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"All populations")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Ad Mixed American")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ASJ"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"},"Ashkenazi Jewish")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ASW"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Americans of African Ancestry in SW USA")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"BEB"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Bengali from Bangladesh")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CDX"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Chinese Dai in Xishuangbanna, China")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CEU"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Utah Residents (CEPH) with Northern and Western European Ancestry")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CHB"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Han Chinese in Beijing, China")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CHS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Southern Han Chinese")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CLM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Colombians from Medellin, Colombia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"East Asian")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ESN"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Esan in Nigeria")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"European")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"FIN"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Finnish in Finland")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"GBR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"British in England and Scotland")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"GIH"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Gujarati Indian from Houston, Texas")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"GWD"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Gambian in Western Divisions in the Gambia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"IBS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Iberian population in Spain")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ITU"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Indian Telugu from the UK")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"JPT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Japanese in Tokyo, Japan")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"KHV"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Kinh in Ho Chi Minh City, Vietnam")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"LWK"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Luhya in Webuye, Kenya")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MAG"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mandinka in the Gambia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MKK"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Maasai in Kinyawa, Kenya")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MSL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mende in Sierra Leone")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MXL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mexican Ancestry from Los Angeles, USA")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"NFE"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"European (Non-Finnish)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"OTH"),(0,l.kt)("td",{parentName:"tr",align:"left"},"OTH"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Other")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"PEL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Peruvians from Lima, Peru")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"PJL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Punjabi from Lahore, Pakistan")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"PUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Puerto Ricans from Puerto Rico")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"South Asian")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"STU"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Sri Lankan Tamil from the UK")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"TSI"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Toscani in Italia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"YRI"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Yoruba in Ibadan, Nigeria")))),(0,l.kt)("h3",{id:"data-types"},"Data Types"),(0,l.kt)("p",null,"Each custom annotation can be one of the following data types:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"bool")," - true or false"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"number")," - any integer or floating-point number"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"string")," - text")),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"For boolean variables, only keys with a ",(0,l.kt)("inlineCode",{parentName:"p"},"true")," value will be output to the JSON object."))),(0,l.kt)("h2",{id:"using-sautils"},"Using SAUtils"),(0,l.kt)("p",null,"Nirvana includes a tool called ",(0,l.kt)("inlineCode",{parentName:"p"},"SAUtils")," that converts various data sources into Nirvana's native binary format. The sub-commands ",(0,l.kt)("inlineCode",{parentName:"p"},"customvar")," and ",(0,l.kt)("inlineCode",{parentName:"p"},"customgene")," are used to specify a variant file or a gene file respectively."),(0,l.kt)("h3",{id:"convert-variant-file"},"Convert Variant File"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/SAUtils.dll customvar \\\n -r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n -i MyDataSource.tsv \\\n -o SupplementaryAnnotation\n")),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-r")," argument specifies the compressed reference path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input TSV path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output directory")),(0,l.kt)("h3",{id:"convert-gene-file"},"Convert Gene File"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/SAUtils.dll customgene \\\n --uga Nirvana_UGA.tsv \\\n -i MyDataSource.tsv \\\n -o SupplementaryAnnotation\n")),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"--uga")," argument specifies the Nirvana universal gene archive (UGA) path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input TSV path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output directory")))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/7674fa56.431f5886.js b/assets/js/7674fa56.431f5886.js deleted file mode 100644 index 671205ce..00000000 --- a/assets/js/7674fa56.431f5886.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[975],{23982:n=>{n.exports={blogPosts:[]}}}]); \ No newline at end of file diff --git a/assets/js/7674fa56.d5213332.js b/assets/js/7674fa56.d5213332.js new file mode 100644 index 00000000..e156710e --- /dev/null +++ b/assets/js/7674fa56.d5213332.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[975],{3982:n=>{n.exports={blogPosts:[]}}}]); \ No newline at end of file diff --git a/assets/js/769a5422.1b17db64.js b/assets/js/769a5422.1b17db64.js deleted file mode 100644 index 7c53fb34..00000000 --- a/assets/js/769a5422.1b17db64.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2074],{3905:(e,t,n)=>{n.d(t,{Zo:()=>s,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function i(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var l=r.createContext({}),p=function(e){var t=r.useContext(l),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},s=function(e){var t=p(e.components);return r.createElement(l.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,l=e.parentName,s=c(e,["components","mdxType","originalType","parentName"]),u=p(n),d=a,f=u["".concat(l,".").concat(d)]||u[d]||m[d]||o;return n?r.createElement(f,i(i({ref:t},s),{},{components:n})):r.createElement(f,i({ref:t},s))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,i=new Array(o);i[0]=d;var c={};for(var l in t)hasOwnProperty.call(t,l)&&(c[l]=t[l]);c.originalType=e,c[u]="string"==typeof e?e:a,i[1]=c;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>c,toc:()=>l});var r=n(87462),a=(n(67294),n(3905));const o={},i=void 0,c={unversionedId:"data-sources/primate-ai-json",id:"version-3.17/data-sources/primate-ai-json",title:"primate-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/primate-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/primate-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/primate-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/primate-ai-json.md",tags:[],version:"3.17",frontMatter:{}},l=[],p={toc:l},s="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(s,(0,r.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI":[\n {\n "hgnc":"TP53",\n "scorePercentile":0.3,\n }\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/771fc413.6d433f08.js b/assets/js/771fc413.6d433f08.js deleted file mode 100644 index 18d4ae52..00000000 --- a/assets/js/771fc413.6d433f08.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1506],{3905:(e,n,t)=>{t.d(n,{Zo:()=>p,kt:()=>h});var i=t(67294);function a(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function r(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);n&&(i=i.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,i)}return t}function o(e){for(var n=1;n=0||(a[t]=e[t]);return a}(e,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(i=0;i=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(a[t]=e[t])}return a}var s=i.createContext({}),c=function(e){var n=i.useContext(s),t=n;return e&&(t="function"==typeof e?e(n):o(o({},n),e)),t},p=function(e){var n=c(e.components);return i.createElement(s.Provider,{value:n},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var n=e.children;return i.createElement(i.Fragment,{},n)}},d=i.forwardRef((function(e,n){var t=e.components,a=e.mdxType,r=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),u=c(t),d=a,h=u["".concat(s,".").concat(d)]||u[d]||m[d]||r;return t?i.createElement(h,o(o({ref:n},p),{},{components:t})):i.createElement(h,o({ref:n},p))}));function h(e,n){var t=arguments,a=n&&n.mdxType;if("string"==typeof e||a){var r=t.length,o=new Array(r);o[0]=d;var l={};for(var s in n)hasOwnProperty.call(n,s)&&(l[s]=n[s]);l.originalType=e,l[u]="string"==typeof e?e:a,o[1]=l;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>o,default:()=>u,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var i=t(87462),a=(t(67294),t(3905));const r={title:"Jasix"},o=void 0,l={unversionedId:"utilities/jasix",id:"version-3.21/utilities/jasix",title:"Jasix",description:"Overview",source:"@site/versioned_docs/version-3.21/utilities/jasix.mdx",sourceDirName:"utilities",slug:"/utilities/jasix",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/utilities/jasix",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/utilities/jasix.mdx",tags:[],version:"3.21",frontMatter:{title:"Jasix"},sidebar:"docs",previous:{title:"Variant IDs",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/core-functionality/variant-ids"},next:{title:"SAUtils",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/utilities/sautils"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Creating the Jasix index",id:"creating-the-jasix-index",children:[{value:"Example",id:"example",children:[],level:3}],level:2},{value:"Querying the index",id:"querying-the-index",children:[],level:2},{value:"Extracting a section",id:"extracting-a-section",children:[],level:2}],c={toc:s},p="wrapper";function u(e){let{components:n,...t}=e;return(0,a.kt)(p,(0,i.Z)({},c,t,{components:n,mdxType:"MDXLayout"}),(0,a.kt)("h2",{id:"overview"},"Overview"),(0,a.kt)("p",null,"The Jasix index is aimed at providing TABIX like indexing capabilities for the Nirvana JSON output."),(0,a.kt)("h2",{id:"creating-the-jasix-index"},"Creating the Jasix index"),(0,a.kt)("p",null,"The Jasix index (that comes in a .jsi) file is generated on-the-fly with Nirvana output. It can also be generated independently by running the Jasix command line utility on the JSON output file. Please note that the Jasix utility can only consume JSON files that follow the Nirvana JSON output format. The following code blocks demonstrate the help menu and index generating functionalities of Jasix."),(0,a.kt)("h3",{id:"example"},"Example"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll -h\nUSAGE: dotnet Jasix.dll -i in.json.gz [options]\nIndexes a Nirvana annotated JSON file\n\nOPTIONS:\n --header, -t print also the header lines\n --only-header, -H print only the header lines\n --chromosomes, -l list chromosome names\n --index, -c create index\n --in, -i input\n --out, -o compressed output file name (default:console)\n --query, -q query range\n --section, -s complete section (positions or genes) to output\n --help, -h displays the help menu\n --version, -v displays the version\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll --index -i input.json.gz\n---------------------------------------------------------------------------\nJasix (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0\n---------------------------------------------------------------------------\n\nRef Sequence chrM indexed in 00:00:00.2\nRef Sequence chr1 indexed in 00:00:05.8\nRef Sequence chr2 indexed in 00:00:06.0\n.\n.\n.\nPeak memory usage: 28.5 MB\nTime: 00:01:14.8\n")),(0,a.kt)("h2",{id:"querying-the-index"},"Querying the index"),(0,a.kt)("p",null,"The Jasix query format is chr:start-end. If not provided, it assumes end=start. If only chr is provided, all entries for that chromosome will be provided."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll -i input.json.gz chrM:5000-7000\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'{\n "positions":[\n {\n "chromosome":"chrM",\n "refAllele":"C",\n "position":5581,\n "quality":3070.00,\n "filters":[\n "LowGQXHomSNP"\n ],\n "altAlleles":[\n "T"\n ],\n "samples":[\n {\n "variantFreq":1,\n "totalDepth":1625,\n "genotypeQuality":1,\n "alleleDepths":[\n 0,\n 1625\n ],\n "genotype":"1/1"\n }\n ],\n "variants":[\n {\n "altAllele":"T",\n "refAllele":"C",\n "begin":5581,\n "chromosome":"chrM",\n "end":5581,\n "variantType":"SNV",\n "vid":"MT:5581:T"\n }\n ]\n },\n {\n "chromosome":"chrM",\n "refAllele":"A",\n "position":6267,\n "quality":1637.00,\n "filters":[\n "LowGQXHetSNP"\n ],\n "altAlleles":[\n "G"\n ],\n "samples":[\n {\n "variantFreq":0.6873,\n "totalDepth":323,\n "genotypeQuality":1,\n "alleleDepths":[\n 101,\n 222\n ],\n "genotype":"0/1"\n }\n ],\n "variants":[\n {\n "altAllele":"G",\n "refAllele":"A",\n "begin":6267,\n "chromosome":"chrM",\n "end":6267,\n "variantType":"SNV",\n "vid":"MT:6267:G"\n }\n ]\n }\n ]\n}\n\n')),(0,a.kt)("p",null,'The default output stream is Console. However, if an output filename is provided, Jasix outputs the results to that file in a bgzip compressed format. The output is always a valid JSON entry. If requested (via -t option) the header of the indexed file will be provided. Multiple queries can be submitted in the same command and the output will contain them within the same "positions" block in order of the submitted queries (Warning: if the queries are out of order, or overlapping, the output will be out or order and intersecting).'),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll -i input.json.gz -q chrM:5000-7000 -q chrM:8500-9500 -t\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'{\n "header":{\n "annotator":"Illumina Annotation Engine 1.6.2.0",\n "creationTime":"2017-08-30 11:42:57",\n "genomeAssembly":"GRCh37",\n "schemaVersion":6,\n "dataVersion":"84.24.39",\n "dataSources":[\n {\n "name":"VEP",\n "version":"84",\n "description":"Ensembl",\n "releaseDate":"2017-01-16"\n }\n ],\n "samples":[\n "Mother"\n ]\n },\n "positions":[\n {\n "chromosome":"chrM",\n "refAllele":"C",\n "position":5581,\n "quality":3070.00,\n "filters":[\n "LowGQXHomSNP"\n ],\n "altAlleles":[\n "T"\n ],\n "samples":[\n {\n "variantFreq":1,\n "totalDepth":1625,\n "genotypeQuality":1,\n "alleleDepths":[\n 0,\n 1625\n ],\n "genotype":"1/1"\n }\n ],\n "variants":[\n {\n "altAllele":"T",\n "refAllele":"C",\n "begin":5581,\n "chromosome":"chrM",\n "end":5581,\n "variantType":"SNV",\n "vid":"MT:5581:T"\n }\n ]\n },\n {\n "chromosome":"chrM",\n "refAllele":"A",\n "position":6267,\n "quality":1637.00,\n "filters":[\n "LowGQXHetSNP"\n ],\n "altAlleles":[\n "G"\n ],\n "samples":[\n {\n "variantFreq":0.6873,\n "totalDepth":323,\n "genotypeQuality":1,\n "alleleDepths":[\n 101,\n 222\n ],\n "genotype":"0/1"\n }\n ],\n "variants":[\n {\n "altAllele":"G",\n "refAllele":"A",\n "begin":6267,\n "chromosome":"chrM",\n "end":6267,\n "variantType":"SNV",\n "vid":"MT:6267:G"\n }\n ]\n },\n {\n "chromosome":"chrM",\n "refAllele":"G",\n "position":8702,\n "quality":3070.00,\n "filters":[\n "LowGQXHomSNP"\n ],\n "altAlleles":[\n "A"\n ],\n "samples":[\n {\n "variantFreq":0.9987,\n "totalDepth":1534,\n "genotypeQuality":1,\n "alleleDepths":[\n 2,\n 1532\n ],\n "genotype":"1/1"\n }\n ],\n "variants":[\n {\n "altAllele":"A",\n "refAllele":"G",\n "begin":8702,\n "chromosome":"chrM",\n "end":8702,\n "variantType":"SNV",\n "vid":"MT:8702:A"\n }\n ]\n },\n {\n "chromosome":"chrM",\n "refAllele":"G",\n "position":9378,\n "quality":3070.00,\n "filters":[\n "LowGQXHomSNP"\n ],\n "altAlleles":[\n "A"\n ],\n "samples":[\n {\n "variantFreq":1,\n "totalDepth":1018,\n "genotypeQuality":1,\n "alleleDepths":[\n 0,\n 1018\n ],\n "genotype":"1/1"\n }\n ],\n "variants":[\n {\n "altAllele":"A",\n "refAllele":"G",\n "begin":9378,\n "chromosome":"chrM",\n "end":9378,\n "variantType":"SNV",\n "vid":"MT:9378:A"\n }\n ]\n }\n ]\n}\n')),(0,a.kt)("h2",{id:"extracting-a-section"},"Extracting a section"),(0,a.kt)("p",null,"The Nirvana JSON file has three sections: header, positions and genes. Header can be printed using the -H option. If you are interested in only the positions or genes section, you can use the -s or --section option."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll -i input.json.gz -s genes\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'[\n{\n "name": "ABCB10",\n "omim": [\n {\n "mimNumber": 605454,\n "geneName": "ATP-binding cassette, subfamily B, member 10"\n }\n ]\n},\n{\n "name": "ABCD3",\n "omim": [\n {\n "mimNumber": 170995,\n "geneName": "ATP-binding cassette, subfamily D, member 3 (peroxisomal membrane protein 1, 70kD)",\n "description": "The ABCD3 gene encodes a peroxisomal membrane transporter involved in the transport of branched-chain fatty acids and C27 bile acids into the peroxisome; the latter function is a crucial step in bile acid biosynthesis (summary by Ferdinandusse et al., 2015).",\n "phenotypes": [\n {\n "mimNumber": 616278,\n "phenotype": "?Bile acid synthesis defect, congenital, 5",\n "mapping": "molecular basis of the disorder is known",\n "inheritances": [\n "Autosomal recessive"\n ],\n "comments": [\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n ]\n}\n]\n')))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/771fd362.4fa9676e.js b/assets/js/771fd362.4fa9676e.js new file mode 100644 index 00000000..e4f7f9fa --- /dev/null +++ b/assets/js/771fd362.4fa9676e.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7850,12,829,7870],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>g});var a=n(7294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function r(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):r(r({},t),e)),n},p=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},c="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,l=e.originalType,s=e.parentName,p=o(e,["components","mdxType","originalType","parentName"]),c=d(n),m=i,g=c["".concat(s,".").concat(m)]||c[m]||u[m]||l;return n?a.createElement(g,r(r({ref:t},p),{},{components:n})):a.createElement(g,r({ref:t},p))}));function g(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var l=n.length,r=new Array(l);r[0]=m;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[c]="string"==typeof e?e:i,r[1]=o;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>r,default:()=>c,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var a=n(7462),i=(n(7294),n(3905));const l={},r=void 0,o={unversionedId:"data-sources/clingen-dosage-json",id:"data-sources/clingen-dosage-json",title:"clingen-dosage-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/clingen-dosage-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-dosage-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-dosage-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clingen-dosage-json.md",tags:[],version:"current",frontMatter:{}},s=[],d={toc:s},p="wrapper";function c(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clingenDosageSensitivityMap": [{\n "chromosome": "15",\n "begin": 30900686,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 0.33994\n},\n{\n "chromosome": "15",\n "begin": 31727418,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "dosage sensitivity unlikely",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 1\n}]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Field"),(0,i.kt)("th",{parentName:"tr",align:null},"Type"),(0,i.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clingenDosageSensitivityMap"),(0,i.kt)("td",{parentName:"tr",align:null},"object array"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"begin"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"end"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"haploinsufficiency"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"triplosensitivity"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"(same as haploinsufficiency)\xa0")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"haploinsufficiency and triplosensitivity")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no evidence to suggest that dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"little evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"emerging evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"gene associated with autosomal recessive phenotype"),(0,i.kt)("li",{parentName:"ul"},"dosage sensitivity unlikely")))}c.isMDXComponent=!0},949:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>r,default:()=>c,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var a=n(7462),i=(n(7294),n(3905));const l={},r=void 0,o={unversionedId:"data-sources/clingen-gene-validity-json",id:"data-sources/clingen-gene-validity-json",title:"clingen-gene-validity-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/clingen-gene-validity-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-gene-validity-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-gene-validity-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clingen-gene-validity-json.md",tags:[],version:"current",frontMatter:{}},s=[],d={toc:s},p="wrapper";function c(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clingenGeneValidity":[\n {\n "diseaseId":"MONDO_0007893",\n "disease":"Noonan syndrome with multiple lentigines",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n },\n {\n "diseaseId":"MONDO_0015280",\n "disease":"cardiofaciocutaneous syndrome",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Field"),(0,i.kt)("th",{parentName:"tr",align:null},"Type"),(0,i.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clingenGeneValidity"),(0,i.kt)("td",{parentName:"tr",align:null},"object"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"diseaseId"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Monarch Disease Ontology ID (MONDO)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"disease"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"disease label")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"classification"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"see below for possible values")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"classificationDate"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"yyyy-MM-dd")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"classification")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no reported evidence"),(0,i.kt)("li",{parentName:"ul"},"disputed"),(0,i.kt)("li",{parentName:"ul"},"limited"),(0,i.kt)("li",{parentName:"ul"},"moderate"),(0,i.kt)("li",{parentName:"ul"},"definitive"),(0,i.kt)("li",{parentName:"ul"},"strong"),(0,i.kt)("li",{parentName:"ul"},"refuted"),(0,i.kt)("li",{parentName:"ul"},"no known disease relationship")))}c.isMDXComponent=!0},4674:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>r,default:()=>c,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var a=n(7462),i=(n(7294),n(3905));const l={},r=void 0,o={unversionedId:"data-sources/clingen-json",id:"data-sources/clingen-json",title:"clingen-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/clingen-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clingen-json.md",tags:[],version:"current",frontMatter:{}},s=[],d={toc:s},p="wrapper";function c(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clingen":[\n {\n "chromosome":"17",\n "begin":525,\n "end":14667519,\n "variantType":"copy_number_gain",\n "id":"nsv996083",\n "clinicalInterpretation":"pathogenic",\n "observedGains":1,\n "validated":true,\n "phenotypes":[\n "Intrauterine growth retardation"\n ],\n "phenotypeIds":[\n "HP:0001511",\n "MedGen:C1853481"\n ],\n "reciprocalOverlap":0.00131\n },\n {\n "chromosome":"17",\n "begin":45835,\n "end":7600330,\n "variantType":"copy_number_loss",\n "id":"nsv869419",\n "clinicalInterpretation":"pathogenic",\n "observedLosses":1,\n "validated":true,\n "phenotypes":[\n "Developmental delay AND/OR other significant developmental or morphological phenotypes"\n ],\n "reciprocalOverlap":0.00254\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Field"),(0,i.kt)("th",{parentName:"tr",align:null},"Type"),(0,i.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clingen"),(0,i.kt)("td",{parentName:"tr",align:null},"object array"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"begin"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"end"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"variantType"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Any of the\xa0sequence alterations defined here.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"id"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Identifier from the data source. Alternatively a VID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clinicalInterpretation"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"observedGains"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,i.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"observedLosses"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,i.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"validated"),(0,i.kt)("td",{parentName:"tr",align:null},"boolean"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"phenotypes"),(0,i.kt)("td",{parentName:"tr",align:null},"string array"),(0,i.kt)("td",{parentName:"tr",align:null},"Description of the phenotype.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"phenotypeIds"),(0,i.kt)("td",{parentName:"tr",align:null},"string array"),(0,i.kt)("td",{parentName:"tr",align:null},"Description of the phenotype IDs.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"clinicalInterpretation")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"benign"),(0,i.kt)("li",{parentName:"ul"},"curated benign"),(0,i.kt)("li",{parentName:"ul"},"curated pathogenic"),(0,i.kt)("li",{parentName:"ul"},"likely benign"),(0,i.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"path gain"),(0,i.kt)("li",{parentName:"ul"},"path loss"),(0,i.kt)("li",{parentName:"ul"},"pathogenic"),(0,i.kt)("li",{parentName:"ul"},"uncertain")))}c.isMDXComponent=!0},599:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>d,default:()=>g,frontMatter:()=>s,metadata:()=>p,toc:()=>c});var a=n(7462),i=(n(7294),n(3905)),l=n(4674),r=n(7356),o=n(949);const s={title:"ClinGen"},d=void 0,p={unversionedId:"data-sources/clingen",id:"data-sources/clingen",title:"ClinGen",description:"Overview",source:"@site/docs/data-sources/clingen.mdx",sourceDirName:"data-sources",slug:"/data-sources/clingen",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clingen.mdx",tags:[],version:"current",frontMatter:{title:"ClinGen"},sidebar:"docs",previous:{title:"Cancer Hotspots",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cancer-hotspots"},next:{title:"ClinVar",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clinvar"}},c=[{value:"Overview",id:"overview",children:[],level:2},{value:"ISCA Regions",id:"isca-regions",children:[{value:"TSV Extraction",id:"tsv-extraction",children:[{value:"Status levels",id:"status-levels",children:[],level:4},{value:"Parsing",id:"parsing",children:[],level:4}],level:3}],level:2},{value:"Conflict Resolution",id:"conflict-resolution",children:[{value:"Clinical significance priority",id:"clinical-significance-priority",children:[],level:3},{value:"Validation Priority",id:"validation-priority",children:[],level:3},{value:"Download URL",id:"download-url",children:[],level:3},{value:"JSON Output",id:"json-output",children:[],level:3}],level:2},{value:"Dosage Sensitivity Map",id:"dosage-sensitivity-map",children:[{value:"TSV Source files",id:"tsv-source-files",children:[],level:3},{value:"Dosage Rating System",id:"dosage-rating-system",children:[],level:3},{value:"Download URL",id:"download-url-1",children:[],level:3},{value:"JSON Output",id:"json-output-1",children:[],level:3},{value:"Building the supplementary files",id:"building-the-supplementary-files",children:[],level:3}],level:2},{value:"Gene-Disease Validity",id:"gene-disease-validity",children:[{value:"Source TSV",id:"source-tsv",children:[],level:3},{value:"Download URL",id:"download-url-2",children:[],level:3},{value:"Conflict Resolution",id:"conflict-resolution-1",children:[{value:"Multiple Classifications",id:"multiple-classifications",children:[],level:4},{value:"Multiple Dates",id:"multiple-dates",children:[],level:4}],level:3},{value:"JSON Output",id:"json-output-2",children:[],level:3},{value:"Building the supplementary files",id:"building-the-supplementary-files-1",children:[],level:3}],level:2}],u={toc:c},m="wrapper";function g(e){let{components:t,...n}=e;return(0,i.kt)(m,(0,a.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"ClinGen is a National Institutes of Health (NIH)-funded resource dedicated to building a central resource that defines the clinical relevance of genes and variants for use in precision medicine and research."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Heidi L. Rehm, Ph.D., Jonathan S. Berg, M.D., Ph.D., Lisa D. Brooks, Ph.D., Carlos D. Bustamante, Ph.D., James P. Evans, M.D., Ph.D., Melissa J. Landrum, Ph.D., David H. Ledbetter, Ph.D., Donna R. Maglott, Ph.D., Christa Lese Martin, Ph.D., Robert L. Nussbaum, M.D., Sharon E. Plon, M.D., Ph.D., Erin M. Ramos, Ph.D., Stephen T. Sherry, Ph.D., and Michael S. Watson, Ph.D., for ClinGen. ",(0,i.kt)("strong",{parentName:"p"},"ClinGen The Clinical Genome Resource.")," ",(0,i.kt)("em",{parentName:"p"},"N Engl J Med 2015; 372:2235-2242 June 4, 2015 DOI: 10.1056/NEJMsr1406261.")))),(0,i.kt)("h2",{id:"isca-regions"},"ISCA Regions"),(0,i.kt)("h3",{id:"tsv-extraction"},"TSV Extraction"),(0,i.kt)("p",null,"ClinGen contains only copy number variation variants, since the coordinates in ClinGen original file follow the same rule as BED format, the coordinates had to be adjusted to ","[BEGIN+1, END]","."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#bin chrom chromStart chromEnd name score strand thickStart thickEnd attrCount attrTags attrVals\nnsv530705 1 564405 8597804 0 1 copy_number_loss pathogenic False Developmental delay AND/OR other significant developmental or morphological phenotypes\nnsv530706 1 564424 3262790 0 1 copy_number_loss pathogenic False Abnormal facial shape,Abnormality of cardiac morphology,Global developmental delay,Muscular hypotonia HP:0001252,HP:0001263,HP:0001627,HP:0001999,MedGen:CN001147,MedGen:CN001157,MedGen:CN001482,MedGen:CN001810\nnsv530707 1 564424 7068738 0 1 copy_number_loss pathogenic False Abnormality of cardiac morphology,Cleft upper lip,Failure to thrive,Global developmental delay,Intrauterine growth retardation,Microcephaly,Short stature HP:0000204,HP:0000252,HP:0001263,HP:0001508,HP:0001511,HP:0001627,HP:0004322,MedGen:C0349588,MedGen:C1845868,MedGen:C1853481,MedGen:C2364119,MedGen:CN000197,MedGen:CN001157,MedGen:CN001482\nnsv533512 1 564435 649748 0 1 copy_number_loss benign False Developmental delay AND/OR other significant developmental or morphological phenotypes\nnsv931338 1 714078 4958499 0 1 copy_number_loss pathogenic False Developmental delay AND/OR other significant developmental or morphological phenotypes\nnsv530300 1 728138 5066371 1 0 copy_number_gain pathogenic False Abnormality of cardiac morphology,Cleft palate,Global developmental delay HP:0000175,HP:0001263,HP:0001627,MedGen:C2240378,MedGen:CN001157,MedGen:CN001482\n")),(0,i.kt)("h4",{id:"status-levels"},"Status levels"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"benign"),(0,i.kt)("li",{parentName:"ul"},"curated benign"),(0,i.kt)("li",{parentName:"ul"},"curated pathogenic"),(0,i.kt)("li",{parentName:"ul"},"likely benign"),(0,i.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"path gain"),(0,i.kt)("li",{parentName:"ul"},"path loss"),(0,i.kt)("li",{parentName:"ul"},"pathogenic"),(0,i.kt)("li",{parentName:"ul"},"uncertain")),(0,i.kt)("h4",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"We parse the ClinGen tsv file and extract the following:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"chrom"),(0,i.kt)("li",{parentName:"ul"},"chromStart (note this a 0-based coordinate)"),(0,i.kt)("li",{parentName:"ul"},"chromEnd"),(0,i.kt)("li",{parentName:"ul"},"attrTags"),(0,i.kt)("li",{parentName:"ul"},"attrVals")),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"attrTags")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"attrVals")," are comma separated lists. ",(0,i.kt)("inlineCode",{parentName:"p"},"attrTags")," contains the field keys and ",(0,i.kt)("inlineCode",{parentName:"p"},"attrVals")," contains the field values. We will parse the following keys from the two fields:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"parent (this will be used as the ID in our JSON output)"),(0,i.kt)("li",{parentName:"ul"},"clinical_int"),(0,i.kt)("li",{parentName:"ul"},"validated"),(0,i.kt)("li",{parentName:"ul"},"phenotype (this should be a string array)"),(0,i.kt)("li",{parentName:"ul"},"phenotype_id (this should be a string array)")),(0,i.kt)("p",null,"Observed losses and observed gains will be calculated from entries that share a common parent ID."),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"variants with a common parent ID and same coordinates are grouped",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"calculated observed losses, observed gains for each group"),(0,i.kt)("li",{parentName:"ul"},"Clinical significance and validation status are collapsed using the priority strategy described below"))),(0,i.kt)("li",{parentName:"ul"},"Variants with the same parent ID can have different coordinates (mapped to hg38)",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"nsv491508 : chr14:105583663-106881350 and chr14:105605043-106766076 (only one example)"),(0,i.kt)("li",{parentName:"ul"},"we kept both variants")))),(0,i.kt)("h2",{id:"conflict-resolution"},"Conflict Resolution"),(0,i.kt)("h3",{id:"clinical-significance-priority"},"Clinical significance priority"),(0,i.kt)("p",null,"When there are a mixture of variants belonging to the same parent ID, we will choose the most pathogenic clinical significance from the available values. i.e. if 3 samples were deemed pathogenic and 2 samples were likely pathogenic, we would list the variant as pathogenic."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Priority")," (high to low)"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"Priority"),(0,i.kt)("li",{parentName:"ul"},"Pathogenic"),(0,i.kt)("li",{parentName:"ul"},"Likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"Benign"),(0,i.kt)("li",{parentName:"ul"},"Likely benign"),(0,i.kt)("li",{parentName:"ul"},"Uncertain significance")),(0,i.kt)("h3",{id:"validation-priority"},"Validation Priority"),(0,i.kt)("p",null,"When there are a mixture of variants belonging to same parent ID, we will set the validation status to true if any of the variants were validated."),(0,i.kt)("h3",{id:"download-url"},"Download URL"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"https://cirm.ucsc.edu/cgi-bin/hgTrackUi?db=hg19&g=iscaComposite"},"https://cirm.ucsc.edu/cgi-bin/hgTrackUi?db=hg19&g=iscaComposite")),(0,i.kt)("h3",{id:"json-output"},"JSON Output"),(0,i.kt)(l.default,{mdxType:"CLINGENJSON"}),(0,i.kt)("h2",{id:"dosage-sensitivity-map"},"Dosage Sensitivity Map"),(0,i.kt)("p",null,"The Clinical Genome Resource (ClinGen) consortium is curating genes and regions of the genome to assess whether there is evidence to support that these genes/regions are dosage sensitive and should be targeted on a cytogenomic array. Illumina Connected Annotations reports these annotations for overlapping SVs."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Riggs ER, Nelson T, Merz A, Ackley T, Bunke B, Collins CD, Collinson MN, Fan YS, Goodenberger ML, Golden DM, Haglund-Hazy L, Krgovic D, Lamb AN, Lewis Z, Li G, Liu Y, Meck J, Neufeld-Kaiser W, Runke CK, Sanmann JN, Stavropoulos DJ, Strong E, Su M, Tayeh MK, Kokalj Vokac N, Thorland EC, Andersen E, Martin CL. ",(0,i.kt)("strong",{parentName:"p"},"Copy number variant discrepancy resolution using the ClinGen dosage sensitivity map results in updated clinical interpretations in ClinVar.")," ",(0,i.kt)("em",{parentName:"p"},"Hum Mutat. 2018 Nov;39(11):1650-1659. doi: 10.1002/humu.23610. PMID: 30095202; PMCID: PMC7374944.")))),(0,i.kt)("h3",{id:"tsv-source-files"},"TSV Source files"),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Regions")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#ClinGen Region Curation Results\n#07 May,2019\n#Genomic Locations are reported on GRCh38 (hg38): GCF_000001405.36\n#https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen\n#to create link: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/clingen_region.cgi?id=key\n#ISCA ID ISCA Region Name cytoBand Genomic Location Haploinsufficiency Score Haploinsufficiency Description Haploinsufficiency PMID1 Haploinsufficiency PMID2 Haploinsufficiency PMID3 Triplosensitivity Score Triplosensitivity Description Triplosensitivity PMID1 Triplosensitivity PMID2 Triplosensitivity PMID3 Date Last Evaluated Loss phenotype OMIM ID Triplosensitive phenotype OMIM ID\nISCA-46299 Xp11.22 region (includes HUWE1) Xp11.22 tbd 0 No evidence available 3 Sufficient evidence for dosage pathogenicity 22840365 20655035 26692240 2018-11-19\nISCA-46295 15q13.3 recurrent region (D-CHRNA7 to BP5) (includes CHRNA7 and OTUD7A) 15q13.3 chr15:31727418-32153204 3 Sufficient evidence for dosage pathogenicity 19898479 20236110 22775350 40 Dosage sensitivity unlikely 26968334 22420048 2018-05-10\nISCA-46291 7q11.23 recurrent distal region (includes HIP1, YWHAG) 7q11.23 chr7:75528718-76433859 2 Some evidence for dosage pathogenicity 21109226 16971481 1 Little evidence for dosage pathogenicity 21109226 27867344 2018-12-31\nISCA-46290 Xp11.22p11.23 recurrent region (includes SHROOM4) Xp11.22-p11.23 chrX: 48447780-52444264 0 No evidence available 3 Sufficient evidence for dosage pathogenicity 19716111 21418194 25425167 2017-12-14 300801\n")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Genes")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#ClinGen Gene Curation Results\n#24 May,2019\n#Genomic Locations are reported on GRCh37 (hg19): GCF_000001405.13\n#https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen\n#to create link: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/clingen_gene.cgi?sym=Gene Symbol\n#Gene Symbol Gene ID cytoBand Genomic Location Haploinsufficiency Score Haploinsufficiency Description Haploinsufficiency PMID1 Haploinsufficiency PMID2 Haploinsufficiency PMID3 Triplosensitivity Score Triplosensitivity Description Triplosensitivity PMID1 Triplosensitivity PMID2 Triplosensitivity PMID3 Date Last Evaluated Loss phenotype OMIM ID Triplosensitive phenotype OMIM ID\nA4GALT 53947 22q13.2 chr22:43088121-43117307 30 Gene associated with autosomal recessive phenotype 0 No evidence available 2014-12-11 111400\nAAGAB 79719 15q23 chr15:67493013-67547536 3 Sufficient evidence for dosage pathogenicity 23064416 23000146 0 No evidence available 2013-02-28 148600\n")),(0,i.kt)("h3",{id:"dosage-rating-system"},"Dosage Rating System"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Rating"),(0,i.kt)("th",{parentName:"tr",align:null},"Possible Clinical Interpretation"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"0"),(0,i.kt)("td",{parentName:"tr",align:null},"No evidence to suggest that dosage sensitivity is associated with clinical phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"1"),(0,i.kt)("td",{parentName:"tr",align:null},"Little evidence suggesting dosage sensitivity is associated with clinical phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"2"),(0,i.kt)("td",{parentName:"tr",align:null},"Emerging evidence suggesting dosage sensitivity is associated with clinical phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"3"),(0,i.kt)("td",{parentName:"tr",align:null},"Sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"30"),(0,i.kt)("td",{parentName:"tr",align:null},"Gene associated with autosomal recessive phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"40"),(0,i.kt)("td",{parentName:"tr",align:null},"Dosage sensitivity unlikely")))),(0,i.kt)("p",null,"Reference: ",(0,i.kt)("a",{parentName:"p",href:"https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/help.shtml"},"https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/help.shtml")),(0,i.kt)("h3",{id:"download-url-1"},"Download URL"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"ftp://ftp.clinicalgenome.org/"},"ftp://ftp.clinicalgenome.org/")),(0,i.kt)("h3",{id:"json-output-1"},"JSON Output"),(0,i.kt)(r.default,{mdxType:"ClinGenDosageJson"}),(0,i.kt)("h3",{id:"building-the-supplementary-files"},"Building the supplementary files"),(0,i.kt)("p",null,"The gene dosage sensitivity ",(0,i.kt)("inlineCode",{parentName:"p"},".nga")," for Illumina Connected Annotations can be built using the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's ",(0,i.kt)("inlineCode",{parentName:"p"},"DosageSensitivity")," subcommand. The required data file is ",(0,i.kt)("inlineCode",{parentName:"p"},"ClinGen_gene_curation_list_{ASSEMBLY}.tsv")," (url provided above) and its associated ",(0,i.kt)("inlineCode",{parentName:"p"},".version")," file."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"NAME=ClinGen Dosage Sensitivity Map\nVERSION=20211201\nDATE=2021-12-01\nDESCRIPTION=Dosage sensitivity map from ClinGen (dbVar)\n")),(0,i.kt)("p",null,"Here is a sample run:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet SAUtils.dll DosageSensitivity\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll dosagesensitivity [options]\nCreates a gene annotation database from dbVar data\n\nOPTIONS:\n --tsv, -t input tsv file\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\ndotnet SAUtils.dll DosageSensitivity --out SupplementaryDatabase/64/GRCh37 --tsv ClinGen_gene_curation_list_GRCh37.tsv\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\n\nTime: 00:00:00.1\n")),(0,i.kt)("p",null,"For building the ",(0,i.kt)("inlineCode",{parentName:"p"},".nsi")," files, we use the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's ",(0,i.kt)("inlineCode",{parentName:"p"},"DosageMapRegions")," subcommand. The required data file is ",(0,i.kt)("inlineCode",{parentName:"p"},"ClinGen_region_curation_list_{ASSEMBLY}.tsv")," (url provided above) and its associated ",(0,i.kt)("inlineCode",{parentName:"p"},".version")," file."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"NAME=ClinGen Dosage Sensitivity Map\nVERSION=20211201\nDATE=2021-12-01\nDESCRIPTION=Dosage sensitivity map from ClinGen (dbVar)\n")),(0,i.kt)("p",null,"Here is a sample run:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet SAUtils.dll DosageMapRegions\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll dosagemapregions [options]\nCreates an interval annotation database from dbVar data\n\nOPTIONS:\n --tsv, -t input tsv file\n --ref, -r input reference filename\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\ndotnet SAUtils.dll DosageMapRegions --out SupplementaryDatabase/64/GRCh37 --ref References/7/Homo_sapiens.GRCh37.Nirvana.dat --tsv ClinGen_region_curation_list_GRCh37.tsv\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\nWriting 505 intervals to database...\n\nTime: 00:00:00.1\n")),(0,i.kt)("h2",{id:"gene-disease-validity"},"Gene-Disease Validity"),(0,i.kt)("p",null,"The ClinGen Gene-Disease Clinical Validity curation process involves evaluating the strength of evidence supporting or refuting a claim that variation in a particular gene causes a particular disease. Illumina Connected Annotations reports these annotations for genes in the genes section of the JSON."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Strande NT, Riggs ER, Buchanan AH, et al. ",(0,i.kt)("strong",{parentName:"p"},"Evaluating the Clinical Validity of Gene-Disease Associations: An Evidence-Based Framework Developed by the Clinical Genome Resource.")," ",(0,i.kt)("em",{parentName:"p"},"Am J Hum Genet. 2017;100(6):895-906. doi:10.1016/j.ajhg.2017.04.015")))),(0,i.kt)("h3",{id:"source-tsv"},"Source TSV"),(0,i.kt)("p",null,"The source data comes in a CSV file that we convert to a TSV."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"CLINGEN GENE VALIDITY CURATIONS\nFILE CREATED: 2019-05-28\nWEBPAGE: https://search.clinicalgenome.org/kb/gene-validity\n+++++++++++,++++++++++++++,+++++++++++++,++++++++++++++++++,+++++++++,++++++++++++++,+++++++++++++,+++++++++++++++++++\nGENE SYMBOL,GENE ID (HGNC),DISEASE LABEL,DISEASE ID (MONDO),SOP,CLASSIFICATION,ONLINE REPORT,CLASSIFICATION DATE\n+++++++++++,++++++++++++++,+++++++++++++,++++++++++++++++++,+++++++++,++++++++++++++,+++++++++++++,+++++++++++++++++++\nA2ML1,HGNC:23336,Noonan syndrome with multiple lentigines,MONDO_0007893,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/59b87033-dd91-4f1e-aec1-c9b1f5124b16--2018-06-07T14:37:47,2018-06-07T14:37:47.175Z\nA2ML1,HGNC:23336,cardiofaciocutaneous syndrome,MONDO_0015280,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/fc3c41d8-8497-489b-a350-c9e30016bc6a--2018-06-07T14:31:03,2018-06-07T14:31:03.696Z\nA2ML1,HGNC:23336,Costello syndrome,MONDO_0009026,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/ea72ba8d-cf62-44bc-86be-da64e3848eba--2018-06-07T14:34:05,2018-06-07T14:34:05.324Z\n")),(0,i.kt)("h3",{id:"download-url-2"},"Download URL"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"https://search.clinicalgenome.org/kb/downloads#section_gene-disease-validity"},"https://search.clinicalgenome.org/kb/downloads#section_gene-disease-validity")),(0,i.kt)("h3",{id:"conflict-resolution-1"},"Conflict Resolution"),(0,i.kt)("h4",{id:"multiple-classifications"},"Multiple Classifications"),(0,i.kt)("p",null,"Here is an example of multiple classifications."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"$ grep MONDO_0010192 ClinGen-Gene-Disease-Summary-2019-12-02.csv | grep EDNRB\nEDNRB,HGNC:3180,Waardenburg syndrome type 4A,MONDO_0010192,SOP6,Moderate,https://search.clinicalgenome.org/kb/gene-validity/d7abbd45-7915-437b-849b-dea876bfc2f5--2018-05-08T04:00:00,2018-05-08T04:00:00.000Z\nEDNRB,HGNC:3180,Waardenburg syndrome type 4A,MONDO_0010192,SOP6,Limited,https://search.clinicalgenome.org/kb/gene-validity/73ee9727-60c1-40fd-830f-08c2b513d2ee--2018-05-08T04:00:00,2018-05-08T04:00:00.000Z\n")),(0,i.kt)("p",null,"In such cases, we select the more severe classification."),(0,i.kt)("h4",{id:"multiple-dates"},"Multiple Dates"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"$ grep MONDO_0016419 ClinGen-Gene-Disease-Summary-2019-12-02.csv | grep MUTYH\nMUTYH,HGNC:7527,hereditary breast carcinoma,MONDO_0016419,SOP4,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/9904,2017-05-24T00:00:00\nMUTYH,HGNC:7527,hereditary breast carcinoma,MONDO_0016419,SOP4,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/9902,2017-05-25T00:00:00\n")),(0,i.kt)("p",null,"If the classifications are the same, we should select the latest classification date."),(0,i.kt)("h3",{id:"json-output-2"},"JSON Output"),(0,i.kt)(o.default,{mdxType:"ClinGenGeneValidity"}),(0,i.kt)("h3",{id:"building-the-supplementary-files-1"},"Building the supplementary files"),(0,i.kt)("p",null,"The gene disease validity ",(0,i.kt)("inlineCode",{parentName:"p"},".nga")," for Illumina Connected Annotations can be built using the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's ",(0,i.kt)("inlineCode",{parentName:"p"},"DiseaseValidity")," subcommand. The only required data file is ",(0,i.kt)("inlineCode",{parentName:"p"},"Clingen-Gene-Disease-Summary-2021-12-01.tsv")," (url provided above) and its associated ",(0,i.kt)("inlineCode",{parentName:"p"},".version")," file."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"NAME=ClinGen disease validity curations\nVERSION=20211201\nDATE=2021-12-01\nDESCRIPTION=Disease validity curations from ClinGen (dbVar)\n")),(0,i.kt)("p",null,"Here is a sample run:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"}," dotnet SAUtils.dll DiseaseValidity\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll diseasevalidity [options]\nCreates a gene annotation database from ClinGen gene validity data\n\nOPTIONS:\n --csv, -i ClinGen gene validity file path\n --cache, -c \n input cache directory\n --ref, -r input reference filename\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\ndotnet SAUtils.dll DiseaseValidity --tsv Clingen-Gene-Disease-Summary-2021-12-01.tsv \\\\\n--uga Cache --out SupplementaryDatabase\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\nNumber of geneIds missing from the cache:0 (0%)\n\nTime: 00:00:00.2\n")))}g.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/771fd362.d8330a1d.js b/assets/js/771fd362.d8330a1d.js deleted file mode 100644 index 22be3bc7..00000000 --- a/assets/js/771fd362.d8330a1d.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7850,12,829,7870],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>g});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function r(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):r(r({},t),e)),n},p=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},c="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,l=e.originalType,s=e.parentName,p=o(e,["components","mdxType","originalType","parentName"]),c=d(n),m=i,g=c["".concat(s,".").concat(m)]||c[m]||u[m]||l;return n?a.createElement(g,r(r({ref:t},p),{},{components:n})):a.createElement(g,r({ref:t},p))}));function g(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var l=n.length,r=new Array(l);r[0]=m;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[c]="string"==typeof e?e:i,r[1]=o;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>r,default:()=>c,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const l={},r=void 0,o={unversionedId:"data-sources/clingen-dosage-json",id:"data-sources/clingen-dosage-json",title:"clingen-dosage-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/clingen-dosage-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-dosage-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-dosage-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clingen-dosage-json.md",tags:[],version:"current",frontMatter:{}},s=[],d={toc:s},p="wrapper";function c(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clingenDosageSensitivityMap": [{\n "chromosome": "15",\n "begin": 30900686,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 0.33994\n},\n{\n "chromosome": "15",\n "begin": 31727418,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "dosage sensitivity unlikely",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 1\n}]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Field"),(0,i.kt)("th",{parentName:"tr",align:null},"Type"),(0,i.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clingenDosageSensitivityMap"),(0,i.kt)("td",{parentName:"tr",align:null},"object array"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"begin"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"end"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"haploinsufficiency"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"triplosensitivity"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"(same as haploinsufficiency)\xa0")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"haploinsufficiency and triplosensitivity")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no evidence to suggest that dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"little evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"emerging evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"gene associated with autosomal recessive phenotype"),(0,i.kt)("li",{parentName:"ul"},"dosage sensitivity unlikely")))}c.isMDXComponent=!0},80949:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>r,default:()=>c,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const l={},r=void 0,o={unversionedId:"data-sources/clingen-gene-validity-json",id:"data-sources/clingen-gene-validity-json",title:"clingen-gene-validity-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/clingen-gene-validity-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-gene-validity-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-gene-validity-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clingen-gene-validity-json.md",tags:[],version:"current",frontMatter:{}},s=[],d={toc:s},p="wrapper";function c(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clingenGeneValidity":[\n {\n "diseaseId":"MONDO_0007893",\n "disease":"Noonan syndrome with multiple lentigines",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n },\n {\n "diseaseId":"MONDO_0015280",\n "disease":"cardiofaciocutaneous syndrome",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Field"),(0,i.kt)("th",{parentName:"tr",align:null},"Type"),(0,i.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clingenGeneValidity"),(0,i.kt)("td",{parentName:"tr",align:null},"object"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"diseaseId"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Monarch Disease Ontology ID (MONDO)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"disease"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"disease label")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"classification"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"see below for possible values")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"classificationDate"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"yyyy-MM-dd")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"classification")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no reported evidence"),(0,i.kt)("li",{parentName:"ul"},"disputed"),(0,i.kt)("li",{parentName:"ul"},"limited"),(0,i.kt)("li",{parentName:"ul"},"moderate"),(0,i.kt)("li",{parentName:"ul"},"definitive"),(0,i.kt)("li",{parentName:"ul"},"strong"),(0,i.kt)("li",{parentName:"ul"},"refuted"),(0,i.kt)("li",{parentName:"ul"},"no known disease relationship")))}c.isMDXComponent=!0},44674:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>r,default:()=>c,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const l={},r=void 0,o={unversionedId:"data-sources/clingen-json",id:"data-sources/clingen-json",title:"clingen-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/clingen-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clingen-json.md",tags:[],version:"current",frontMatter:{}},s=[],d={toc:s},p="wrapper";function c(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clingen":[\n {\n "chromosome":"17",\n "begin":525,\n "end":14667519,\n "variantType":"copy_number_gain",\n "id":"nsv996083",\n "clinicalInterpretation":"pathogenic",\n "observedGains":1,\n "validated":true,\n "phenotypes":[\n "Intrauterine growth retardation"\n ],\n "phenotypeIds":[\n "HP:0001511",\n "MedGen:C1853481"\n ],\n "reciprocalOverlap":0.00131\n },\n {\n "chromosome":"17",\n "begin":45835,\n "end":7600330,\n "variantType":"copy_number_loss",\n "id":"nsv869419",\n "clinicalInterpretation":"pathogenic",\n "observedLosses":1,\n "validated":true,\n "phenotypes":[\n "Developmental delay AND/OR other significant developmental or morphological phenotypes"\n ],\n "reciprocalOverlap":0.00254\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Field"),(0,i.kt)("th",{parentName:"tr",align:null},"Type"),(0,i.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clingen"),(0,i.kt)("td",{parentName:"tr",align:null},"object array"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"begin"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"end"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"variantType"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Any of the\xa0sequence alterations defined here.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"id"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Identifier from the data source. Alternatively a VID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clinicalInterpretation"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"observedGains"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,i.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"observedLosses"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,i.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"validated"),(0,i.kt)("td",{parentName:"tr",align:null},"boolean"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"phenotypes"),(0,i.kt)("td",{parentName:"tr",align:null},"string array"),(0,i.kt)("td",{parentName:"tr",align:null},"Description of the phenotype.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"phenotypeIds"),(0,i.kt)("td",{parentName:"tr",align:null},"string array"),(0,i.kt)("td",{parentName:"tr",align:null},"Description of the phenotype IDs.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"clinicalInterpretation")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"benign"),(0,i.kt)("li",{parentName:"ul"},"curated benign"),(0,i.kt)("li",{parentName:"ul"},"curated pathogenic"),(0,i.kt)("li",{parentName:"ul"},"likely benign"),(0,i.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"path gain"),(0,i.kt)("li",{parentName:"ul"},"path loss"),(0,i.kt)("li",{parentName:"ul"},"pathogenic"),(0,i.kt)("li",{parentName:"ul"},"uncertain")))}c.isMDXComponent=!0},40599:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>d,default:()=>g,frontMatter:()=>s,metadata:()=>p,toc:()=>c});var a=n(87462),i=(n(67294),n(3905)),l=n(44674),r=n(37356),o=n(80949);const s={title:"ClinGen"},d=void 0,p={unversionedId:"data-sources/clingen",id:"data-sources/clingen",title:"ClinGen",description:"Overview",source:"@site/docs/data-sources/clingen.mdx",sourceDirName:"data-sources",slug:"/data-sources/clingen",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clingen.mdx",tags:[],version:"current",frontMatter:{title:"ClinGen"},sidebar:"docs",previous:{title:"Cancer Hotspots",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cancer-hotspots"},next:{title:"ClinVar",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clinvar"}},c=[{value:"Overview",id:"overview",children:[],level:2},{value:"ISCA Regions",id:"isca-regions",children:[{value:"TSV Extraction",id:"tsv-extraction",children:[{value:"Status levels",id:"status-levels",children:[],level:4},{value:"Parsing",id:"parsing",children:[],level:4}],level:3}],level:2},{value:"Conflict Resolution",id:"conflict-resolution",children:[{value:"Clinical significance priority",id:"clinical-significance-priority",children:[],level:3},{value:"Validation Priority",id:"validation-priority",children:[],level:3},{value:"Download URL",id:"download-url",children:[],level:3},{value:"JSON Output",id:"json-output",children:[],level:3}],level:2},{value:"Dosage Sensitivity Map",id:"dosage-sensitivity-map",children:[{value:"TSV Source files",id:"tsv-source-files",children:[],level:3},{value:"Dosage Rating System",id:"dosage-rating-system",children:[],level:3},{value:"Download URL",id:"download-url-1",children:[],level:3},{value:"JSON Output",id:"json-output-1",children:[],level:3},{value:"Building the supplementary files",id:"building-the-supplementary-files",children:[],level:3}],level:2},{value:"Gene-Disease Validity",id:"gene-disease-validity",children:[{value:"Source TSV",id:"source-tsv",children:[],level:3},{value:"Download URL",id:"download-url-2",children:[],level:3},{value:"Conflict Resolution",id:"conflict-resolution-1",children:[{value:"Multiple Classifications",id:"multiple-classifications",children:[],level:4},{value:"Multiple Dates",id:"multiple-dates",children:[],level:4}],level:3},{value:"JSON Output",id:"json-output-2",children:[],level:3},{value:"Building the supplementary files",id:"building-the-supplementary-files-1",children:[],level:3}],level:2}],u={toc:c},m="wrapper";function g(e){let{components:t,...n}=e;return(0,i.kt)(m,(0,a.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"ClinGen is a National Institutes of Health (NIH)-funded resource dedicated to building a central resource that defines the clinical relevance of genes and variants for use in precision medicine and research."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Heidi L. Rehm, Ph.D., Jonathan S. Berg, M.D., Ph.D., Lisa D. Brooks, Ph.D., Carlos D. Bustamante, Ph.D., James P. Evans, M.D., Ph.D., Melissa J. Landrum, Ph.D., David H. Ledbetter, Ph.D., Donna R. Maglott, Ph.D., Christa Lese Martin, Ph.D., Robert L. Nussbaum, M.D., Sharon E. Plon, M.D., Ph.D., Erin M. Ramos, Ph.D., Stephen T. Sherry, Ph.D., and Michael S. Watson, Ph.D., for ClinGen. ",(0,i.kt)("strong",{parentName:"p"},"ClinGen The Clinical Genome Resource.")," ",(0,i.kt)("em",{parentName:"p"},"N Engl J Med 2015; 372:2235-2242 June 4, 2015 DOI: 10.1056/NEJMsr1406261.")))),(0,i.kt)("h2",{id:"isca-regions"},"ISCA Regions"),(0,i.kt)("h3",{id:"tsv-extraction"},"TSV Extraction"),(0,i.kt)("p",null,"ClinGen contains only copy number variation variants, since the coordinates in ClinGen original file follow the same rule as BED format, the coordinates had to be adjusted to ","[BEGIN+1, END]","."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#bin chrom chromStart chromEnd name score strand thickStart thickEnd attrCount attrTags attrVals\nnsv530705 1 564405 8597804 0 1 copy_number_loss pathogenic False Developmental delay AND/OR other significant developmental or morphological phenotypes\nnsv530706 1 564424 3262790 0 1 copy_number_loss pathogenic False Abnormal facial shape,Abnormality of cardiac morphology,Global developmental delay,Muscular hypotonia HP:0001252,HP:0001263,HP:0001627,HP:0001999,MedGen:CN001147,MedGen:CN001157,MedGen:CN001482,MedGen:CN001810\nnsv530707 1 564424 7068738 0 1 copy_number_loss pathogenic False Abnormality of cardiac morphology,Cleft upper lip,Failure to thrive,Global developmental delay,Intrauterine growth retardation,Microcephaly,Short stature HP:0000204,HP:0000252,HP:0001263,HP:0001508,HP:0001511,HP:0001627,HP:0004322,MedGen:C0349588,MedGen:C1845868,MedGen:C1853481,MedGen:C2364119,MedGen:CN000197,MedGen:CN001157,MedGen:CN001482\nnsv533512 1 564435 649748 0 1 copy_number_loss benign False Developmental delay AND/OR other significant developmental or morphological phenotypes\nnsv931338 1 714078 4958499 0 1 copy_number_loss pathogenic False Developmental delay AND/OR other significant developmental or morphological phenotypes\nnsv530300 1 728138 5066371 1 0 copy_number_gain pathogenic False Abnormality of cardiac morphology,Cleft palate,Global developmental delay HP:0000175,HP:0001263,HP:0001627,MedGen:C2240378,MedGen:CN001157,MedGen:CN001482\n")),(0,i.kt)("h4",{id:"status-levels"},"Status levels"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"benign"),(0,i.kt)("li",{parentName:"ul"},"curated benign"),(0,i.kt)("li",{parentName:"ul"},"curated pathogenic"),(0,i.kt)("li",{parentName:"ul"},"likely benign"),(0,i.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"path gain"),(0,i.kt)("li",{parentName:"ul"},"path loss"),(0,i.kt)("li",{parentName:"ul"},"pathogenic"),(0,i.kt)("li",{parentName:"ul"},"uncertain")),(0,i.kt)("h4",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"We parse the ClinGen tsv file and extract the following:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"chrom"),(0,i.kt)("li",{parentName:"ul"},"chromStart (note this a 0-based coordinate)"),(0,i.kt)("li",{parentName:"ul"},"chromEnd"),(0,i.kt)("li",{parentName:"ul"},"attrTags"),(0,i.kt)("li",{parentName:"ul"},"attrVals")),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"attrTags")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"attrVals")," are comma separated lists. ",(0,i.kt)("inlineCode",{parentName:"p"},"attrTags")," contains the field keys and ",(0,i.kt)("inlineCode",{parentName:"p"},"attrVals")," contains the field values. We will parse the following keys from the two fields:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"parent (this will be used as the ID in our JSON output)"),(0,i.kt)("li",{parentName:"ul"},"clinical_int"),(0,i.kt)("li",{parentName:"ul"},"validated"),(0,i.kt)("li",{parentName:"ul"},"phenotype (this should be a string array)"),(0,i.kt)("li",{parentName:"ul"},"phenotype_id (this should be a string array)")),(0,i.kt)("p",null,"Observed losses and observed gains will be calculated from entries that share a common parent ID."),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"variants with a common parent ID and same coordinates are grouped",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"calculated observed losses, observed gains for each group"),(0,i.kt)("li",{parentName:"ul"},"Clinical significance and validation status are collapsed using the priority strategy described below"))),(0,i.kt)("li",{parentName:"ul"},"Variants with the same parent ID can have different coordinates (mapped to hg38)",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"nsv491508 : chr14:105583663-106881350 and chr14:105605043-106766076 (only one example)"),(0,i.kt)("li",{parentName:"ul"},"we kept both variants")))),(0,i.kt)("h2",{id:"conflict-resolution"},"Conflict Resolution"),(0,i.kt)("h3",{id:"clinical-significance-priority"},"Clinical significance priority"),(0,i.kt)("p",null,"When there are a mixture of variants belonging to the same parent ID, we will choose the most pathogenic clinical significance from the available values. i.e. if 3 samples were deemed pathogenic and 2 samples were likely pathogenic, we would list the variant as pathogenic."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Priority")," (high to low)"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"Priority"),(0,i.kt)("li",{parentName:"ul"},"Pathogenic"),(0,i.kt)("li",{parentName:"ul"},"Likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"Benign"),(0,i.kt)("li",{parentName:"ul"},"Likely benign"),(0,i.kt)("li",{parentName:"ul"},"Uncertain significance")),(0,i.kt)("h3",{id:"validation-priority"},"Validation Priority"),(0,i.kt)("p",null,"When there are a mixture of variants belonging to same parent ID, we will set the validation status to true if any of the variants were validated."),(0,i.kt)("h3",{id:"download-url"},"Download URL"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"https://cirm.ucsc.edu/cgi-bin/hgTrackUi?db=hg19&g=iscaComposite"},"https://cirm.ucsc.edu/cgi-bin/hgTrackUi?db=hg19&g=iscaComposite")),(0,i.kt)("h3",{id:"json-output"},"JSON Output"),(0,i.kt)(l.default,{mdxType:"CLINGENJSON"}),(0,i.kt)("h2",{id:"dosage-sensitivity-map"},"Dosage Sensitivity Map"),(0,i.kt)("p",null,"The Clinical Genome Resource (ClinGen) consortium is curating genes and regions of the genome to assess whether there is evidence to support that these genes/regions are dosage sensitive and should be targeted on a cytogenomic array. Illumina Connected Annotations reports these annotations for overlapping SVs."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Riggs ER, Nelson T, Merz A, Ackley T, Bunke B, Collins CD, Collinson MN, Fan YS, Goodenberger ML, Golden DM, Haglund-Hazy L, Krgovic D, Lamb AN, Lewis Z, Li G, Liu Y, Meck J, Neufeld-Kaiser W, Runke CK, Sanmann JN, Stavropoulos DJ, Strong E, Su M, Tayeh MK, Kokalj Vokac N, Thorland EC, Andersen E, Martin CL. ",(0,i.kt)("strong",{parentName:"p"},"Copy number variant discrepancy resolution using the ClinGen dosage sensitivity map results in updated clinical interpretations in ClinVar.")," ",(0,i.kt)("em",{parentName:"p"},"Hum Mutat. 2018 Nov;39(11):1650-1659. doi: 10.1002/humu.23610. PMID: 30095202; PMCID: PMC7374944.")))),(0,i.kt)("h3",{id:"tsv-source-files"},"TSV Source files"),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Regions")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#ClinGen Region Curation Results\n#07 May,2019\n#Genomic Locations are reported on GRCh38 (hg38): GCF_000001405.36\n#https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen\n#to create link: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/clingen_region.cgi?id=key\n#ISCA ID ISCA Region Name cytoBand Genomic Location Haploinsufficiency Score Haploinsufficiency Description Haploinsufficiency PMID1 Haploinsufficiency PMID2 Haploinsufficiency PMID3 Triplosensitivity Score Triplosensitivity Description Triplosensitivity PMID1 Triplosensitivity PMID2 Triplosensitivity PMID3 Date Last Evaluated Loss phenotype OMIM ID Triplosensitive phenotype OMIM ID\nISCA-46299 Xp11.22 region (includes HUWE1) Xp11.22 tbd 0 No evidence available 3 Sufficient evidence for dosage pathogenicity 22840365 20655035 26692240 2018-11-19\nISCA-46295 15q13.3 recurrent region (D-CHRNA7 to BP5) (includes CHRNA7 and OTUD7A) 15q13.3 chr15:31727418-32153204 3 Sufficient evidence for dosage pathogenicity 19898479 20236110 22775350 40 Dosage sensitivity unlikely 26968334 22420048 2018-05-10\nISCA-46291 7q11.23 recurrent distal region (includes HIP1, YWHAG) 7q11.23 chr7:75528718-76433859 2 Some evidence for dosage pathogenicity 21109226 16971481 1 Little evidence for dosage pathogenicity 21109226 27867344 2018-12-31\nISCA-46290 Xp11.22p11.23 recurrent region (includes SHROOM4) Xp11.22-p11.23 chrX: 48447780-52444264 0 No evidence available 3 Sufficient evidence for dosage pathogenicity 19716111 21418194 25425167 2017-12-14 300801\n")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Genes")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#ClinGen Gene Curation Results\n#24 May,2019\n#Genomic Locations are reported on GRCh37 (hg19): GCF_000001405.13\n#https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen\n#to create link: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/clingen_gene.cgi?sym=Gene Symbol\n#Gene Symbol Gene ID cytoBand Genomic Location Haploinsufficiency Score Haploinsufficiency Description Haploinsufficiency PMID1 Haploinsufficiency PMID2 Haploinsufficiency PMID3 Triplosensitivity Score Triplosensitivity Description Triplosensitivity PMID1 Triplosensitivity PMID2 Triplosensitivity PMID3 Date Last Evaluated Loss phenotype OMIM ID Triplosensitive phenotype OMIM ID\nA4GALT 53947 22q13.2 chr22:43088121-43117307 30 Gene associated with autosomal recessive phenotype 0 No evidence available 2014-12-11 111400\nAAGAB 79719 15q23 chr15:67493013-67547536 3 Sufficient evidence for dosage pathogenicity 23064416 23000146 0 No evidence available 2013-02-28 148600\n")),(0,i.kt)("h3",{id:"dosage-rating-system"},"Dosage Rating System"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Rating"),(0,i.kt)("th",{parentName:"tr",align:null},"Possible Clinical Interpretation"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"0"),(0,i.kt)("td",{parentName:"tr",align:null},"No evidence to suggest that dosage sensitivity is associated with clinical phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"1"),(0,i.kt)("td",{parentName:"tr",align:null},"Little evidence suggesting dosage sensitivity is associated with clinical phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"2"),(0,i.kt)("td",{parentName:"tr",align:null},"Emerging evidence suggesting dosage sensitivity is associated with clinical phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"3"),(0,i.kt)("td",{parentName:"tr",align:null},"Sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"30"),(0,i.kt)("td",{parentName:"tr",align:null},"Gene associated with autosomal recessive phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"40"),(0,i.kt)("td",{parentName:"tr",align:null},"Dosage sensitivity unlikely")))),(0,i.kt)("p",null,"Reference: ",(0,i.kt)("a",{parentName:"p",href:"https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/help.shtml"},"https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/help.shtml")),(0,i.kt)("h3",{id:"download-url-1"},"Download URL"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"ftp://ftp.clinicalgenome.org/"},"ftp://ftp.clinicalgenome.org/")),(0,i.kt)("h3",{id:"json-output-1"},"JSON Output"),(0,i.kt)(r.default,{mdxType:"ClinGenDosageJson"}),(0,i.kt)("h3",{id:"building-the-supplementary-files"},"Building the supplementary files"),(0,i.kt)("p",null,"The gene dosage sensitivity ",(0,i.kt)("inlineCode",{parentName:"p"},".nga")," for Illumina Connected Annotations can be built using the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's ",(0,i.kt)("inlineCode",{parentName:"p"},"DosageSensitivity")," subcommand. The required data file is ",(0,i.kt)("inlineCode",{parentName:"p"},"ClinGen_gene_curation_list_{ASSEMBLY}.tsv")," (url provided above) and its associated ",(0,i.kt)("inlineCode",{parentName:"p"},".version")," file."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"NAME=ClinGen Dosage Sensitivity Map\nVERSION=20211201\nDATE=2021-12-01\nDESCRIPTION=Dosage sensitivity map from ClinGen (dbVar)\n")),(0,i.kt)("p",null,"Here is a sample run:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet SAUtils.dll DosageSensitivity\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll dosagesensitivity [options]\nCreates a gene annotation database from dbVar data\n\nOPTIONS:\n --tsv, -t input tsv file\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\ndotnet SAUtils.dll DosageSensitivity --out SupplementaryDatabase/64/GRCh37 --tsv ClinGen_gene_curation_list_GRCh37.tsv\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\n\nTime: 00:00:00.1\n")),(0,i.kt)("p",null,"For building the ",(0,i.kt)("inlineCode",{parentName:"p"},".nsi")," files, we use the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's ",(0,i.kt)("inlineCode",{parentName:"p"},"DosageMapRegions")," subcommand. The required data file is ",(0,i.kt)("inlineCode",{parentName:"p"},"ClinGen_region_curation_list_{ASSEMBLY}.tsv")," (url provided above) and its associated ",(0,i.kt)("inlineCode",{parentName:"p"},".version")," file."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"NAME=ClinGen Dosage Sensitivity Map\nVERSION=20211201\nDATE=2021-12-01\nDESCRIPTION=Dosage sensitivity map from ClinGen (dbVar)\n")),(0,i.kt)("p",null,"Here is a sample run:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet SAUtils.dll DosageMapRegions\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll dosagemapregions [options]\nCreates an interval annotation database from dbVar data\n\nOPTIONS:\n --tsv, -t input tsv file\n --ref, -r input reference filename\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\ndotnet SAUtils.dll DosageMapRegions --out SupplementaryDatabase/64/GRCh37 --ref References/7/Homo_sapiens.GRCh37.Nirvana.dat --tsv ClinGen_region_curation_list_GRCh37.tsv\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\nWriting 505 intervals to database...\n\nTime: 00:00:00.1\n")),(0,i.kt)("h2",{id:"gene-disease-validity"},"Gene-Disease Validity"),(0,i.kt)("p",null,"The ClinGen Gene-Disease Clinical Validity curation process involves evaluating the strength of evidence supporting or refuting a claim that variation in a particular gene causes a particular disease. Illumina Connected Annotations reports these annotations for genes in the genes section of the JSON."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Strande NT, Riggs ER, Buchanan AH, et al. ",(0,i.kt)("strong",{parentName:"p"},"Evaluating the Clinical Validity of Gene-Disease Associations: An Evidence-Based Framework Developed by the Clinical Genome Resource.")," ",(0,i.kt)("em",{parentName:"p"},"Am J Hum Genet. 2017;100(6):895-906. doi:10.1016/j.ajhg.2017.04.015")))),(0,i.kt)("h3",{id:"source-tsv"},"Source TSV"),(0,i.kt)("p",null,"The source data comes in a CSV file that we convert to a TSV."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"CLINGEN GENE VALIDITY CURATIONS\nFILE CREATED: 2019-05-28\nWEBPAGE: https://search.clinicalgenome.org/kb/gene-validity\n+++++++++++,++++++++++++++,+++++++++++++,++++++++++++++++++,+++++++++,++++++++++++++,+++++++++++++,+++++++++++++++++++\nGENE SYMBOL,GENE ID (HGNC),DISEASE LABEL,DISEASE ID (MONDO),SOP,CLASSIFICATION,ONLINE REPORT,CLASSIFICATION DATE\n+++++++++++,++++++++++++++,+++++++++++++,++++++++++++++++++,+++++++++,++++++++++++++,+++++++++++++,+++++++++++++++++++\nA2ML1,HGNC:23336,Noonan syndrome with multiple lentigines,MONDO_0007893,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/59b87033-dd91-4f1e-aec1-c9b1f5124b16--2018-06-07T14:37:47,2018-06-07T14:37:47.175Z\nA2ML1,HGNC:23336,cardiofaciocutaneous syndrome,MONDO_0015280,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/fc3c41d8-8497-489b-a350-c9e30016bc6a--2018-06-07T14:31:03,2018-06-07T14:31:03.696Z\nA2ML1,HGNC:23336,Costello syndrome,MONDO_0009026,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/ea72ba8d-cf62-44bc-86be-da64e3848eba--2018-06-07T14:34:05,2018-06-07T14:34:05.324Z\n")),(0,i.kt)("h3",{id:"download-url-2"},"Download URL"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"https://search.clinicalgenome.org/kb/downloads#section_gene-disease-validity"},"https://search.clinicalgenome.org/kb/downloads#section_gene-disease-validity")),(0,i.kt)("h3",{id:"conflict-resolution-1"},"Conflict Resolution"),(0,i.kt)("h4",{id:"multiple-classifications"},"Multiple Classifications"),(0,i.kt)("p",null,"Here is an example of multiple classifications."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"$ grep MONDO_0010192 ClinGen-Gene-Disease-Summary-2019-12-02.csv | grep EDNRB\nEDNRB,HGNC:3180,Waardenburg syndrome type 4A,MONDO_0010192,SOP6,Moderate,https://search.clinicalgenome.org/kb/gene-validity/d7abbd45-7915-437b-849b-dea876bfc2f5--2018-05-08T04:00:00,2018-05-08T04:00:00.000Z\nEDNRB,HGNC:3180,Waardenburg syndrome type 4A,MONDO_0010192,SOP6,Limited,https://search.clinicalgenome.org/kb/gene-validity/73ee9727-60c1-40fd-830f-08c2b513d2ee--2018-05-08T04:00:00,2018-05-08T04:00:00.000Z\n")),(0,i.kt)("p",null,"In such cases, we select the more severe classification."),(0,i.kt)("h4",{id:"multiple-dates"},"Multiple Dates"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"$ grep MONDO_0016419 ClinGen-Gene-Disease-Summary-2019-12-02.csv | grep MUTYH\nMUTYH,HGNC:7527,hereditary breast carcinoma,MONDO_0016419,SOP4,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/9904,2017-05-24T00:00:00\nMUTYH,HGNC:7527,hereditary breast carcinoma,MONDO_0016419,SOP4,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/9902,2017-05-25T00:00:00\n")),(0,i.kt)("p",null,"If the classifications are the same, we should select the latest classification date."),(0,i.kt)("h3",{id:"json-output-2"},"JSON Output"),(0,i.kt)(o.default,{mdxType:"ClinGenGeneValidity"}),(0,i.kt)("h3",{id:"building-the-supplementary-files-1"},"Building the supplementary files"),(0,i.kt)("p",null,"The gene disease validity ",(0,i.kt)("inlineCode",{parentName:"p"},".nga")," for Illumina Connected Annotations can be built using the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's ",(0,i.kt)("inlineCode",{parentName:"p"},"DiseaseValidity")," subcommand. The only required data file is ",(0,i.kt)("inlineCode",{parentName:"p"},"Clingen-Gene-Disease-Summary-2021-12-01.tsv")," (url provided above) and its associated ",(0,i.kt)("inlineCode",{parentName:"p"},".version")," file."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"NAME=ClinGen disease validity curations\nVERSION=20211201\nDATE=2021-12-01\nDESCRIPTION=Disease validity curations from ClinGen (dbVar)\n")),(0,i.kt)("p",null,"Here is a sample run:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"}," dotnet SAUtils.dll DiseaseValidity\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll diseasevalidity [options]\nCreates a gene annotation database from ClinGen gene validity data\n\nOPTIONS:\n --csv, -i ClinGen gene validity file path\n --cache, -c \n input cache directory\n --ref, -r input reference filename\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\ndotnet SAUtils.dll DiseaseValidity --tsv Clingen-Gene-Disease-Summary-2021-12-01.tsv \\\\\n--uga Cache --out SupplementaryDatabase\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\nNumber of geneIds missing from the cache:0 (0%)\n\nTime: 00:00:00.2\n")))}g.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/77207806.d6ff37d5.js b/assets/js/77207806.d6ff37d5.js deleted file mode 100644 index 270a8f94..00000000 --- a/assets/js/77207806.d6ff37d5.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9236,3759],{3905:(t,e,a)=>{a.d(e,{Zo:()=>d,kt:()=>g});var n=a(67294);function r(t,e,a){return e in t?Object.defineProperty(t,e,{value:a,enumerable:!0,configurable:!0,writable:!0}):t[e]=a,t}function l(t,e){var a=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),a.push.apply(a,n)}return a}function i(t){for(var e=1;e=0||(r[a]=t[a]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,a)&&(r[a]=t[a])}return r}var p=n.createContext({}),m=function(t){var e=n.useContext(p),a=e;return t&&(a="function"==typeof t?t(e):i(i({},e),t)),a},d=function(t){var e=m(t.components);return n.createElement(p.Provider,{value:e},t.children)},s="mdxType",c={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},N=n.forwardRef((function(t,e){var a=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,d=o(t,["components","mdxType","originalType","parentName"]),s=m(a),N=r,g=s["".concat(p,".").concat(N)]||s[N]||c[N]||l;return a?n.createElement(g,i(i({ref:e},d),{},{components:a})):n.createElement(g,i({ref:e},d))}));function g(t,e){var a=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=a.length,i=new Array(l);i[0]=N;var o={};for(var p in e)hasOwnProperty.call(e,p)&&(o[p]=e[p]);o.originalType=t,o[s]="string"==typeof t?t:r,i[1]=o;for(var m=2;m{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/fusioncatcher-json",id:"version-3.21/data-sources/fusioncatcher-json",title:"fusioncatcher-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/fusioncatcher-json.md",sourceDirName:"data-sources",slug:"/data-sources/fusioncatcher-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/fusioncatcher-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/fusioncatcher-json.md",tags:[],version:"3.21",frontMatter:{}},p=[{value:"genes",id:"genes",children:[],level:4},{value:"gene",id:"gene",children:[],level:4}],m={toc:p},d="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(d,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},' "fusionCatcher":[\n {\n "genes":{\n "first":{\n "hgnc":"ETV6",\n "isOncogene":true\n },\n "second":{\n "hgnc":"RUNX1"\n },\n "isParalogPair":true,\n "isPseudogenePair":true,\n "isReadthrough":true\n },\n "germlineSources":[\n "1000 Genomes Project"\n ],\n "somaticSources":[\n "COSMIC",\n "TCGA oesophageal carcinomas"\n ]\n }\n ]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genes"),(0,r.kt)("td",{parentName:"tr",align:"center"},"genes object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"5' gene & 3' gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"germlineSources"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"matches in known germline data sources")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"somaticSources"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"matches in known somatic data sources")))),(0,r.kt)("h4",{id:"genes"},"genes"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"first"),(0,r.kt)("td",{parentName:"tr",align:"center"},"gene object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"5' gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"second"),(0,r.kt)("td",{parentName:"tr",align:"center"},"gene object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"3' gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isParalogPair"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when both genes are paralogs for each other")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isPseudogenePair"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when both genes are pseudogenes for each other")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isReadthrough"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this fusion gene is a readthrough event (both are on the same strand and there are no genes between them)")))),(0,r.kt)("h4",{id:"gene"},"gene"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isOncogene"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this gene is an oncogene")))))}s.isMDXComponent=!0},15206:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>o,default:()=>c,frontMatter:()=>i,metadata:()=>p,toc:()=>m});var n=a(87462),r=(a(67294),a(3905)),l=a(47980);const i={title:"FusionCatcher"},o=void 0,p={unversionedId:"data-sources/fusioncatcher",id:"version-3.21/data-sources/fusioncatcher",title:"FusionCatcher",description:"Overview",source:"@site/versioned_docs/version-3.21/data-sources/fusioncatcher.mdx",sourceDirName:"data-sources",slug:"/data-sources/fusioncatcher",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/fusioncatcher",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/fusioncatcher.mdx",tags:[],version:"3.21",frontMatter:{title:"FusionCatcher"},sidebar:"docs",previous:{title:"DECIPHER",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/decipher"},next:{title:"GERP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gerp"}},m=[{value:"Overview",id:"overview",children:[],level:2},{value:"Supported Data Sources",id:"supported-data-sources",children:[{value:"Oncogenes",id:"oncogenes",children:[],level:3},{value:"Germline",id:"germline",children:[],level:3},{value:"Somatic",id:"somatic",children:[],level:3}],level:2},{value:"Gene Pair TSV File",id:"gene-pair-tsv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Gene TSV File",id:"gene-tsv-file",children:[{value:"Example",id:"example-1",children:[],level:3},{value:"Parsing",id:"parsing-1",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],d={toc:m},s="wrapper";function c(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://github.com/ndaniel/fusioncatcher"},"FusionCatcher")," is a well-known tool that searches for somatic novel/known fusion genes, translocations, and/or chimeras in RNA-seq data. While FusionCatcher itself is not part of Nirvana, we have included a subset of their genomic databases in Nirvana."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Daniel Nicorici, Mihaela \u015eatalan, Henrik Edgren, Sara Kangaspeska, Astrid Murum\xe4gi, Olli Kallioniemi, Sami Virtanen, Olavi Kilkku. (2014) ",(0,r.kt)("a",{parentName:"p",href:"https://www.biorxiv.org/content/10.1101/011650v1"},"FusionCatcher \u2013 a tool for finding somatic fusion genes in paired-end RNA-sequencing data"),". ",(0,r.kt)("em",{parentName:"p"},"bioRxiv")," 011650"))),(0,r.kt)("h2",{id:"supported-data-sources"},"Supported Data Sources"),(0,r.kt)("h3",{id:"oncogenes"},"Oncogenes"),(0,r.kt)("p",null,"The following data sources are aggregated and used to populate the ",(0,r.kt)("inlineCode",{parentName:"p"},"isOncogene")," field in the gene JSON object:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Description"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Reference"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Data"),(0,r.kt)("th",{parentName:"tr",align:"left"},"FusionCatcher filename"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Bushman"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://www.bushmanlab.org/links/genelists"},"bushmanlab.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"cancer_genes.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ONGENE"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.sciencedirect.com/science/article/pii/S1673852716302053"},"JGG")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://ongene.bioinfo-minzhao.org"},"bioinfo-minzhao.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"oncogenes_more.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"UniProt tumor genes"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/49/D1/D480/6006196"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.uniprot.org/downloads"},"uniprot.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tumor_genes.txt")))),(0,r.kt)("h3",{id:"germline"},"Germline"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Nirvana label"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Reference"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Data"),(0,r.kt)("th",{parentName:"tr",align:"left"},"FusionCatcher filename"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"1000 Genomes Project"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0104567"},"PLOS ONE")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"1000genomes.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Healthy (strong support)"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"banned.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Illumina Body Map 2.0"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.ebi.ac.uk/arrayexpress/experiments/E-MTAB-513"},"EBI")),(0,r.kt)("td",{parentName:"tr",align:"left"},"bodymap2.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"CACG"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.sciencedirect.com/science/article/pii/S0888754312000821"},"Genomics")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"cacg.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ConjoinG"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0013284"},"PLOS ONE")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"conjoing.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Healthy prefrontal cortex"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://bmcmedgenomics.biomedcentral.com/articles/10.1186/s12920-016-0164-y"},"BMC Medical Genomics")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE68719"},"NCBI GEO")),(0,r.kt)("td",{parentName:"tr",align:"left"},"cortex.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Duplicated Genes Database"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0050653"},"PLOS ONE")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://dgd.genouest.org/"},"genouest.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"dgd.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"GTEx healthy tissues"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://gtexportal.org/home/"},"gtexportal.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"gtex.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Healthy"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"healthy.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Human Protein Atlas"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.mcponline.org/article/S1535-9476(20)34633-8/fulltext"},"MCP")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.ebi.ac.uk/arrayexpress/experiments/E-MTAB-1733/"},"EBI")),(0,r.kt)("td",{parentName:"tr",align:"left"},"hpa.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Babiceanu non-cancer tissues"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/44/6/2859/2499453"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/44/6/2859/2499453#supplementary-data"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-cancer_tissues.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"non-tumor cell lines"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-tumor_cells.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TumorFusions normal"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/46/D1/D1144/4584571"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/46/D1/D1144/4584571#supplementary-data"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga-normal.txt")))),(0,r.kt)("h3",{id:"somatic"},"Somatic"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Nirvana label"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Reference"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Data"),(0,r.kt)("th",{parentName:"tr",align:"left"},"FusionCatcher filename"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Alaei-Mahabadi 18 cancers"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.pnas.org/content/113/48/13768.long"},"PNAS")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"18cancers.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"DepMap CCLE"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://depmap.org/portal/download/"},"depmap.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"ccle.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"CCLE Klijn"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nbt.3080"},"Nature Biotechnology")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nbt.3080#Sec27"},"Nature Biotechnology")),(0,r.kt)("td",{parentName:"tr",align:"left"},"ccle2.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"CCLE Vellichirammal"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cell.com/molecular-therapy-family/nucleic-acids/fulltext/S2162-2531(20)30058-5"},"Molecular Therapy Nucleic Acids")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"ccle3.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Cancer Genome Project"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://cancer.sanger.ac.uk/cosmic/download"},"COSMIC")),(0,r.kt)("td",{parentName:"tr",align:"left"},"cgp.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ChimerKB 4.0"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/48/D1/D817/5611671"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.kobic.re.kr/chimerdb_mirror/download"},"kobic.re.kr")),(0,r.kt)("td",{parentName:"tr",align:"left"},"chimerdb4kb.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ChimerPub 4.0"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/48/D1/D817/5611671"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.kobic.re.kr/chimerdb_mirror/download"},"kobic.re.kr")),(0,r.kt)("td",{parentName:"tr",align:"left"},"chimerdb4pub.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ChimerSeq 4.0"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/48/D1/D817/5611671"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.kobic.re.kr/chimerdb_mirror/download"},"kobic.re.kr")),(0,r.kt)("td",{parentName:"tr",align:"left"},"chimerdb4seq.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"COSMIC"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/47/D1/D941/5146192"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://cancer.sanger.ac.uk/cosmic/download"},"COSMIC")),(0,r.kt)("td",{parentName:"tr",align:"left"},"cosmic.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Bao gliomas"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://genome.cshlp.org/content/24/11/1765"},"Genome Research")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"gliomas.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Known"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"known.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Mitelman DB"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://mitelmandatabase.isb-cgc.org"},"ISB-CGC")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://storage.cloud.google.com/mitelman-data-files/prod/mitelman_db.zip"},"Google Cloud")),(0,r.kt)("td",{parentName:"tr",align:"left"},"mitelman.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TCGA oesophageal carcinomas"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nature20805"},"Nature")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"oesophagus.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Bailey pancreatic cancers"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nature16965"},"Nature")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nature16965#Sec44"},"Nature")),(0,r.kt)("td",{parentName:"tr",align:"left"},"pancreases.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"PCAWG"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://doi.org/10.1016/j.cell.2018.03.042"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://dcc.icgc.org/releases/PCAWG/transcriptome/fusion"},"ICGC")),(0,r.kt)("td",{parentName:"tr",align:"left"},"pcawg.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Robinson prostate cancers"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://doi.org/10.1016/j.cell.2015.05.001"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cell.com/cell/fulltext/S0092-8674(15)00548-6?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS0092867415005486%3Fshowall%3Dtrue#supplementaryMaterial"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},"prostate_cancer.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TCGA"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cancer.gov/about-nci/organization/ccg/research/structural-genomics/tcga"},"cancer.gov")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TumorFusions tumor"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/46/D1/D1144/4584571"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/46/D1/D1144/4584571#supplementary-data"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga-cancer.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TCGA Gao"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://doi.org/10.1016/j.celrep.2018.03.050"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cell.com/cell-reports/fulltext/S2211-1247(18)30395-4?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS2211124718303954%3Fshowall%3Dtrue#supplementaryMaterial"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga2.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TCGA Vellichirammal"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cell.com/molecular-therapy-family/nucleic-acids/fulltext/S2162-2531(20)30058-5"},"Molecular Therapy Nucleic Acids")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga3.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TICdb"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://bmcgenomics.biomedcentral.com/articles/10.1186/1471-2164-8-33"},"BMC Genomics")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://genetica.unav.edu/TICdb/allseqs_TICdb.txt"},"unav.edu")),(0,r.kt)("td",{parentName:"tr",align:"left"},"ticdb.txt")))),(0,r.kt)("h2",{id:"gene-pair-tsv-file"},"Gene Pair TSV File"),(0,r.kt)("p",null,"Most of the data files in FusionCatcher are two-column TSV files containing the Ensembl gene IDs that are paired together."),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("p",null,"Here are the first few lines of the 1000genomes.txt file:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre"},"ENSG00000006210 ENSG00000102962\nENSG00000006652 ENSG00000181016\nENSG00000014138 ENSG00000149798\nENSG00000026297 ENSG00000071242\nENSG00000035499 ENSG00000155959\nENSG00000055211 ENSG00000131013\nENSG00000055332 ENSG00000179915\nENSG00000062485 ENSG00000257727\nENSG00000065978 ENSG00000166501\nENSG00000066044 ENSG00000104980\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"In Nirvana, we will only import a gene pair if both Ensembl gene IDs are recognized from either our GRCh37 or GRCh38 cache files."),(0,r.kt)("h2",{id:"gene-tsv-file"},"Gene TSV File"),(0,r.kt)("p",null,"Some of the data files are single-column files containing Ensembl gene IDs. This is commonly used in the data files representing oncogene data sources."),(0,r.kt)("h3",{id:"example-1"},"Example"),(0,r.kt)("p",null,"Here are the first few lines of the oncogenes_more.txt file:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre"},"ENSG00000000938\nENSG00000003402\nENSG00000005469\nENSG00000005884\nENSG00000006128\nENSG00000006453\nENSG00000006468\nENSG00000007350\nENSG00000008294\nENSG00000008952\n")),(0,r.kt)("h3",{id:"parsing-1"},"Parsing"),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"FusionCatcher also uses creates custom Ensembl genes (e.g. ",(0,r.kt)("inlineCode",{parentName:"p"},"ENSG09000000002"),") to handle missing Ensembl genes. Nirvana will ignore these entries since we only include the gene IDs that are currently recognized by Nirvana."),(0,r.kt)("p",{parentName:"div"},"I suspect that these were originally RefSeq genes and if so, we can support those directly in Nirvana in the future."))),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://sourceforge.net/projects/fusioncatcher/files/data"},"https://sourceforge.net/projects/fusioncatcher/files/data")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(l.default,{mdxType:"JSON"}))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/783965c1.fb65d940.js b/assets/js/783965c1.fb65d940.js deleted file mode 100644 index a1b29739..00000000 --- a/assets/js/783965c1.fb65d940.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6082,7268],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>v});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},u="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),u=d(n),m=r,v=u["".concat(s,".").concat(m)]||u[m]||p[m]||o;return n?a.createElement(v,i(i({ref:t},c),{},{components:n})):a.createElement(v,i({ref:t},c))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[u]="string"==typeof e?e:r,i[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/dann-json",id:"version-3.21/data-sources/dann-json",title:"dann-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/dann-json.md",sourceDirName:"data-sources",slug:"/data-sources/dann-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/dann-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/dann-json.md",tags:[],version:"3.21",frontMatter:{}},s=[],d={toc:s},c="wrapper";function u(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"dannScore": 0.27\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"dannScore"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1.0")))))}u.isMDXComponent=!0},12918:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>p,frontMatter:()=>i,metadata:()=>s,toc:()=>d});var a=n(87462),r=(n(67294),n(3905)),o=n(59907);const i={title:"DANN"},l=void 0,s={unversionedId:"data-sources/dann",id:"version-3.21/data-sources/dann",title:"DANN",description:"Overview",source:"@site/versioned_docs/version-3.21/data-sources/dann.mdx",sourceDirName:"data-sources",slug:"/data-sources/dann",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/dann",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/dann.mdx",tags:[],version:"3.21",frontMatter:{title:"DANN"},sidebar:"docs",previous:{title:"COSMIC",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/cosmic"},next:{title:"dbSNP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/dbsnp"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"TSV File",id:"tsv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"GRCh38 liftover",id:"grch38-liftover",children:[],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:d},u="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(u,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"DANN uses the same feature set and training data as CADD (Combined Annotation-Dependent Depletion) to train a deep neural network (DNN).\nCADD is an algorithm designed to annotate both coding and non-coding variants, and has been shown to outperform other annotation algorithms.\nDANN improves on CADD (which uses Support Vector Machines (SVMs)) by capturing non-linear relationships by using a deep neural network instead of SVMs.\nDANN achieves about a 19% relative reduction in the error rate and about a 14% relative increase in the area under the curve (AUC) metric over CADD\u2019s SVM methodology."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Quang, Daniel, Yifei Chen, and Xiaohui Xie. DANN: a deep learning approach for annotating the pathogenicity of genetic variants. ",(0,r.kt)("em",{parentName:"p"},"Bioinformatics")," ",(0,r.kt)("strong",{parentName:"p"},"31.5")," 761-763 (2015). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1093/bioinformatics/btu703"},"https://doi.org/10.1093/bioinformatics/btu703")))),(0,r.kt)("h2",{id:"tsv-file"},"TSV File"),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-tsv"},"chr grch37_pos ref alt DANN\n1 10001 T A 0.16461391399220135\n1 10001 T C 0.4396994049749739\n1 10001 T G 0.38108629377072734\n1 10002 A C 0.36182020272810128\n1 10002 A G 0.44413258111779291\n1 10002 A T 0.16812846819989813\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the CSV file, we are interested in all columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"chr")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"grch37_pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"ref")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"alt")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DANN"))),(0,r.kt)("h2",{id:"grch38-liftover"},"GRCh38 liftover"),(0,r.kt)("p",null,"The data is not available for GRCh38 on DANN website. We performed a liftover from GRCh37 to GRCh38 using crossmap."),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("p",null,"None"),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://cbcl.ics.uci.edu/public_data/DANN/"},"https://cbcl.ics.uci.edu/public_data/DANN/")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(o.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/79ea6ed3.e7ef5491.js b/assets/js/79ea6ed3.e7ef5491.js deleted file mode 100644 index 21439c67..00000000 --- a/assets/js/79ea6ed3.e7ef5491.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9653],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function i(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var s=r.createContext({}),l=function(e){var t=r.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},p=function(e){var t=l(e.components);return r.createElement(s.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,s=e.parentName,p=c(e,["components","mdxType","originalType","parentName"]),u=l(n),m=a,f=u["".concat(s,".").concat(m)]||u[m]||d[m]||o;return n?r.createElement(f,i(i({ref:t},p),{},{components:n})):r.createElement(f,i({ref:t},p))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,i=new Array(o);i[0]=m;var c={};for(var s in t)hasOwnProperty.call(t,s)&&(c[s]=t[s]);c.originalType=e,c[u]="string"==typeof e?e:a,i[1]=c;for(var l=2;l{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>c,toc:()=>s});var r=n(87462),a=(n(67294),n(3905));const o={},i=void 0,c={unversionedId:"data-sources/amino-acid-conservation-json",id:"version-3.18/data-sources/amino-acid-conservation-json",title:"amino-acid-conservation-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/amino-acid-conservation-json.md",sourceDirName:"data-sources",slug:"/data-sources/amino-acid-conservation-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/amino-acid-conservation-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/amino-acid-conservation-json.md",tags:[],version:"3.18",frontMatter:{}},s=[],l={toc:s},p="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},l,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"aminoAcidConservation": {\n "scores": [0.34]\n} \n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"aminoAcidConservation"),(0,a.kt)("td",{parentName:"tr",align:"center"},"object"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"scores"),(0,a.kt)("td",{parentName:"tr",align:"center"},"object array of doubles"),(0,a.kt)("td",{parentName:"tr",align:"left"},"percent conserved with respect to human amino acid residue. Range: 0.01 - 1.00")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/7a836ad3.bb4a1dca.js b/assets/js/7a836ad3.bb4a1dca.js deleted file mode 100644 index 5f9384f3..00000000 --- a/assets/js/7a836ad3.bb4a1dca.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5200],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>f});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var s=a.createContext({}),p=function(t){var e=a.useContext(s),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},m=function(t){var e=p(t.components);return a.createElement(s.Provider,{value:e},t.children)},c="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},u=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,s=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),c=p(n),u=r,f=c["".concat(s,".").concat(u)]||c[u]||d[u]||l;return n?a.createElement(f,o(o({ref:e},m),{},{components:n})):a.createElement(f,o({ref:e},m))}));function f(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=u;var i={};for(var s in e)hasOwnProperty.call(e,s)&&(i[s]=e[s]);i.originalType=t,i[c]="string"==typeof t?t:r,o[1]=i;for(var p=2;p{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>c,frontMatter:()=>l,metadata:()=>i,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/mitomap-small-variants-json",id:"version-3.21/data-sources/mitomap-small-variants-json",title:"mitomap-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/mitomap-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/mitomap-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/mitomap-small-variants-json.md",tags:[],version:"3.21",frontMatter:{}},s=[],p={toc:s},m="wrapper";function c(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},p,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "refAllele":"G",\n "altAllele":"A",\n "diseases":[ \n "Bipolar disorder",\n "Melanoma"\n ],\n "hasHomoplasmy":false,\n "hasHeteroplasmy":true,\n "status":"Reported",\n "clinicalSignificance":"confirmed pathogenic",\n "scorePercentile":83.30,\n "numGenBankFullLengthSeqs":2,\n "pubMedIds":["2316527","6299878","6301949"],\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"diseases"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"associated diseases")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHomoplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHeteroplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"status"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"record status")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"clinicalSignificance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"predicted pathogenicity")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MitoTIP score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numGenBankFullLengthSeqs"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"# of GenBank full-length sequences")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the MITOMAP alternate allele")))))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/7a86a7ec.9525fce6.js b/assets/js/7a86a7ec.9525fce6.js deleted file mode 100644 index 73457446..00000000 --- a/assets/js/7a86a7ec.9525fce6.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5919],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>f});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var s=a.createContext({}),p=function(t){var e=a.useContext(s),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},m=function(t){var e=p(t.components);return a.createElement(s.Provider,{value:e},t.children)},c="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},u=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,s=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),c=p(n),u=r,f=c["".concat(s,".").concat(u)]||c[u]||d[u]||l;return n?a.createElement(f,o(o({ref:e},m),{},{components:n})):a.createElement(f,o({ref:e},m))}));function f(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=u;var i={};for(var s in e)hasOwnProperty.call(e,s)&&(i[s]=e[s]);i.originalType=t,i[c]="string"==typeof t?t:r,o[1]=i;for(var p=2;p{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>c,frontMatter:()=>l,metadata:()=>i,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/mitomap-small-variants-json",id:"version-3.17/data-sources/mitomap-small-variants-json",title:"mitomap-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/mitomap-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/mitomap-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/mitomap-small-variants-json.md",tags:[],version:"3.17",frontMatter:{}},s=[],p={toc:s},m="wrapper";function c(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},p,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "refAllele":"G",\n "altAllele":"A",\n "diseases":[ \n "Bipolar disorder",\n "Melanoma"\n ],\n "hasHomoplasmy":false,\n "hasHeteroplasmy":true,\n "status":"Reported",\n "clinicalSignificance":"confirmed pathogenic",\n "scorePercentile":83.30,\n "numGenBankFullLengthSeqs":2,\n "pubMedIds":["2316527","6299878","6301949"],\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"diseases"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"associated diseases")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHomoplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHeteroplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"status"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"record status")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"clinicalSignificance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"predicted pathogenicity")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MitoTIP score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numGenBankFullLengthSeqs"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"# of GenBank full-length sequences")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the MITOMAP alternate allele")))))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/7aa3e760.ef1ac6fe.js b/assets/js/7aa3e760.ef1ac6fe.js deleted file mode 100644 index 54b04dd8..00000000 --- a/assets/js/7aa3e760.ef1ac6fe.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7121],{3905:(e,n,t)=>{t.d(n,{Zo:()=>p,kt:()=>h});var i=t(67294);function a(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function r(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);n&&(i=i.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,i)}return t}function o(e){for(var n=1;n=0||(a[t]=e[t]);return a}(e,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(i=0;i=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(a[t]=e[t])}return a}var s=i.createContext({}),c=function(e){var n=i.useContext(s),t=n;return e&&(t="function"==typeof e?e(n):o(o({},n),e)),t},p=function(e){var n=c(e.components);return i.createElement(s.Provider,{value:n},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var n=e.children;return i.createElement(i.Fragment,{},n)}},d=i.forwardRef((function(e,n){var t=e.components,a=e.mdxType,r=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),u=c(t),d=a,h=u["".concat(s,".").concat(d)]||u[d]||m[d]||r;return t?i.createElement(h,o(o({ref:n},p),{},{components:t})):i.createElement(h,o({ref:n},p))}));function h(e,n){var t=arguments,a=n&&n.mdxType;if("string"==typeof e||a){var r=t.length,o=new Array(r);o[0]=d;var l={};for(var s in n)hasOwnProperty.call(n,s)&&(l[s]=n[s]);l.originalType=e,l[u]="string"==typeof e?e:a,o[1]=l;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>o,default:()=>u,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var i=t(87462),a=(t(67294),t(3905));const r={title:"Jasix"},o=void 0,l={unversionedId:"utilities/jasix",id:"version-3.17/utilities/jasix",title:"Jasix",description:"Overview",source:"@site/versioned_docs/version-3.17/utilities/jasix.mdx",sourceDirName:"utilities",slug:"/utilities/jasix",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/utilities/jasix",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/utilities/jasix.mdx",tags:[],version:"3.17",frontMatter:{title:"Jasix"},sidebar:"version-3.17/docs",previous:{title:"Variant IDs",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/core-functionality/variant-ids"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Creating the Jasix index",id:"creating-the-jasix-index",children:[{value:"Example",id:"example",children:[],level:3}],level:2},{value:"Querying the index",id:"querying-the-index",children:[],level:2},{value:"Extracting a section",id:"extracting-a-section",children:[],level:2}],c={toc:s},p="wrapper";function u(e){let{components:n,...t}=e;return(0,a.kt)(p,(0,i.Z)({},c,t,{components:n,mdxType:"MDXLayout"}),(0,a.kt)("h2",{id:"overview"},"Overview"),(0,a.kt)("p",null,"The Jasix index is aimed at providing TABIX like indexing capabilities for the Nirvana JSON output."),(0,a.kt)("h2",{id:"creating-the-jasix-index"},"Creating the Jasix index"),(0,a.kt)("p",null,"The Jasix index (that comes in a .jsi) file is generated on-the-fly with Nirvana output. It can also be generated independently by running the Jasix command line utility on the JSON output file. Please note that the Jasix utility can only consume JSON files that follow the Nirvana JSON output format. The following code blocks demonstrate the help menu and index generating functionalities of Jasix."),(0,a.kt)("h3",{id:"example"},"Example"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll -h\nUSAGE: dotnet Jasix.dll -i in.json.gz [options]\nIndexes a Nirvana annotated JSON file\n\nOPTIONS:\n --header, -t print also the header lines\n --only-header, -H print only the header lines\n --chromosomes, -l list chromosome names\n --index, -c create index\n --in, -i input\n --out, -o compressed output file name (default:console)\n --query, -q query range\n --section, -s complete section (positions or genes) to output\n --help, -h displays the help menu\n --version, -v displays the version\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll --index -i input.json.gz\n---------------------------------------------------------------------------\nJasix (c) 2017 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 2.0.0\n---------------------------------------------------------------------------\n\nRef Sequence chrM indexed in 00:00:00.2\nRef Sequence chr1 indexed in 00:00:05.8\nRef Sequence chr2 indexed in 00:00:06.0\n.\n.\n.\nPeak memory usage: 28.5 MB\nTime: 00:01:14.8\n")),(0,a.kt)("h2",{id:"querying-the-index"},"Querying the index"),(0,a.kt)("p",null,"The Jasix query format is chr:start-end. If not provided, it assumes end=start. If only chr is provided, all entries for that chromosome will be provided."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll -i input.json.gz chrM:5000-7000\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'{\n "positions":[\n {\n "chromosome":"chrM",\n "refAllele":"C",\n "position":5581,\n "quality":3070.00,\n "filters":[\n "LowGQXHomSNP"\n ],\n "altAlleles":[\n "T"\n ],\n "samples":[\n {\n "variantFreq":1,\n "totalDepth":1625,\n "genotypeQuality":1,\n "alleleDepths":[\n 0,\n 1625\n ],\n "genotype":"1/1"\n }\n ],\n "variants":[\n {\n "altAllele":"T",\n "refAllele":"C",\n "begin":5581,\n "chromosome":"chrM",\n "end":5581,\n "variantType":"SNV",\n "vid":"MT:5581:T"\n }\n ]\n },\n {\n "chromosome":"chrM",\n "refAllele":"A",\n "position":6267,\n "quality":1637.00,\n "filters":[\n "LowGQXHetSNP"\n ],\n "altAlleles":[\n "G"\n ],\n "samples":[\n {\n "variantFreq":0.6873,\n "totalDepth":323,\n "genotypeQuality":1,\n "alleleDepths":[\n 101,\n 222\n ],\n "genotype":"0/1"\n }\n ],\n "variants":[\n {\n "altAllele":"G",\n "refAllele":"A",\n "begin":6267,\n "chromosome":"chrM",\n "end":6267,\n "variantType":"SNV",\n "vid":"MT:6267:G"\n }\n ]\n }\n ]\n}\n\n')),(0,a.kt)("p",null,'The default output stream is Console. However, if an output filename is provided, Jasix outputs the results to that file in a bgzip compressed format. The output is always a valid JSON entry. If requested (via -t option) the header of the indexed file will be provided. Multiple queries can be submitted in the same command and the output will contain them within the same "positions" block in order of the submitted queries (Warning: if the queries are out of order, or overlapping, the output will be out or order and intersecting).'),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll -i input.json.gz -q chrM:5000-7000 -q chrM:8500-9500 -t\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'{\n "header":{\n "annotator":"Illumina Annotation Engine 1.6.2.0",\n "creationTime":"2017-08-30 11:42:57",\n "genomeAssembly":"GRCh37",\n "schemaVersion":6,\n "dataVersion":"84.24.39",\n "dataSources":[\n {\n "name":"VEP",\n "version":"84",\n "description":"Ensembl",\n "releaseDate":"2017-01-16"\n }\n ],\n "samples":[\n "Mother"\n ]\n },\n "positions":[\n {\n "chromosome":"chrM",\n "refAllele":"C",\n "position":5581,\n "quality":3070.00,\n "filters":[\n "LowGQXHomSNP"\n ],\n "altAlleles":[\n "T"\n ],\n "samples":[\n {\n "variantFreq":1,\n "totalDepth":1625,\n "genotypeQuality":1,\n "alleleDepths":[\n 0,\n 1625\n ],\n "genotype":"1/1"\n }\n ],\n "variants":[\n {\n "altAllele":"T",\n "refAllele":"C",\n "begin":5581,\n "chromosome":"chrM",\n "end":5581,\n "variantType":"SNV",\n "vid":"MT:5581:T"\n }\n ]\n },\n {\n "chromosome":"chrM",\n "refAllele":"A",\n "position":6267,\n "quality":1637.00,\n "filters":[\n "LowGQXHetSNP"\n ],\n "altAlleles":[\n "G"\n ],\n "samples":[\n {\n "variantFreq":0.6873,\n "totalDepth":323,\n "genotypeQuality":1,\n "alleleDepths":[\n 101,\n 222\n ],\n "genotype":"0/1"\n }\n ],\n "variants":[\n {\n "altAllele":"G",\n "refAllele":"A",\n "begin":6267,\n "chromosome":"chrM",\n "end":6267,\n "variantType":"SNV",\n "vid":"MT:6267:G"\n }\n ]\n },\n {\n "chromosome":"chrM",\n "refAllele":"G",\n "position":8702,\n "quality":3070.00,\n "filters":[\n "LowGQXHomSNP"\n ],\n "altAlleles":[\n "A"\n ],\n "samples":[\n {\n "variantFreq":0.9987,\n "totalDepth":1534,\n "genotypeQuality":1,\n "alleleDepths":[\n 2,\n 1532\n ],\n "genotype":"1/1"\n }\n ],\n "variants":[\n {\n "altAllele":"A",\n "refAllele":"G",\n "begin":8702,\n "chromosome":"chrM",\n "end":8702,\n "variantType":"SNV",\n "vid":"MT:8702:A"\n }\n ]\n },\n {\n "chromosome":"chrM",\n "refAllele":"G",\n "position":9378,\n "quality":3070.00,\n "filters":[\n "LowGQXHomSNP"\n ],\n "altAlleles":[\n "A"\n ],\n "samples":[\n {\n "variantFreq":1,\n "totalDepth":1018,\n "genotypeQuality":1,\n "alleleDepths":[\n 0,\n 1018\n ],\n "genotype":"1/1"\n }\n ],\n "variants":[\n {\n "altAllele":"A",\n "refAllele":"G",\n "begin":9378,\n "chromosome":"chrM",\n "end":9378,\n "variantType":"SNV",\n "vid":"MT:9378:A"\n }\n ]\n }\n ]\n}\n')),(0,a.kt)("h2",{id:"extracting-a-section"},"Extracting a section"),(0,a.kt)("p",null,"The Nirvana JSON file has three sections: header, positions and genes. Header can be printed using the -H option. If you are interested in only the positions or genes section, you can use the -s or --section option."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll -i input.json.gz -s genes\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'[\n{\n "name": "ABCB10",\n "omim": [\n {\n "mimNumber": 605454,\n "geneName": "ATP-binding cassette, subfamily B, member 10"\n }\n ]\n},\n{\n "name": "ABCD3",\n "omim": [\n {\n "mimNumber": 170995,\n "geneName": "ATP-binding cassette, subfamily D, member 3 (peroxisomal membrane protein 1, 70kD)",\n "description": "The ABCD3 gene encodes a peroxisomal membrane transporter involved in the transport of branched-chain fatty acids and C27 bile acids into the peroxisome; the latter function is a crucial step in bile acid biosynthesis (summary by Ferdinandusse et al., 2015).",\n "phenotypes": [\n {\n "mimNumber": 616278,\n "phenotype": "?Bile acid synthesis defect, congenital, 5",\n "mapping": "molecular basis of the disorder is known",\n "inheritances": [\n "Autosomal recessive"\n ],\n "comments": [\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n ]\n}\n]\n')))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/7b3bfa5e.445b93c4.js b/assets/js/7b3bfa5e.445b93c4.js deleted file mode 100644 index 0a6ecc36..00000000 --- a/assets/js/7b3bfa5e.445b93c4.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3084,1633],{3905:(e,n,t)=>{t.d(n,{Zo:()=>d,kt:()=>h});var a=t(67294);function o(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function r(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function i(e){for(var n=1;n=0||(o[t]=e[t]);return o}(e,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(o[t]=e[t])}return o}var l=a.createContext({}),c=function(e){var n=a.useContext(l),t=n;return e&&(t="function"==typeof e?e(n):i(i({},n),e)),t},d=function(e){var n=c(e.components);return a.createElement(l.Provider,{value:n},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},p=a.forwardRef((function(e,n){var t=e.components,o=e.mdxType,r=e.originalType,l=e.parentName,d=s(e,["components","mdxType","originalType","parentName"]),u=c(t),p=o,h=u["".concat(l,".").concat(p)]||u[p]||m[p]||r;return t?a.createElement(h,i(i({ref:n},d),{},{components:t})):a.createElement(h,i({ref:n},d))}));function h(e,n){var t=arguments,o=n&&n.mdxType;if("string"==typeof e||o){var r=t.length,i=new Array(r);i[0]=p;var s={};for(var l in n)hasOwnProperty.call(n,l)&&(s[l]=n[l]);s.originalType=e,s[u]="string"==typeof e?e:o,i[1]=s;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>i,default:()=>u,frontMatter:()=>r,metadata:()=>s,toc:()=>l});var a=t(87462),o=(t(67294),t(3905));const r={},i=void 0,s={unversionedId:"data-sources/amino-acid-conservation-json",id:"data-sources/amino-acid-conservation-json",title:"amino-acid-conservation-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/amino-acid-conservation-json.md",sourceDirName:"data-sources",slug:"/data-sources/amino-acid-conservation-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/amino-acid-conservation-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/amino-acid-conservation-json.md",tags:[],version:"current",frontMatter:{}},l=[],c={toc:l},d="wrapper";function u(e){let{components:n,...t}=e;return(0,o.kt)(d,(0,a.Z)({},c,t,{components:n,mdxType:"MDXLayout"}),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-json"},'"aminoAcidConservation": {\n "scores": [0.34]\n} \n')),(0,o.kt)("table",null,(0,o.kt)("thead",{parentName:"table"},(0,o.kt)("tr",{parentName:"thead"},(0,o.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,o.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,o.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,o.kt)("tbody",{parentName:"table"},(0,o.kt)("tr",{parentName:"tbody"},(0,o.kt)("td",{parentName:"tr",align:"left"},"aminoAcidConservation"),(0,o.kt)("td",{parentName:"tr",align:"center"},"object"),(0,o.kt)("td",{parentName:"tr",align:"left"})),(0,o.kt)("tr",{parentName:"tbody"},(0,o.kt)("td",{parentName:"tr",align:"left"},"scores"),(0,o.kt)("td",{parentName:"tr",align:"center"},"object array of doubles"),(0,o.kt)("td",{parentName:"tr",align:"left"},"percent conserved with respect to human amino acid residue. Range: 0.01 - 1.00")))))}u.isMDXComponent=!0},17041:(e,n,t)=>{t.r(n),t.d(n,{contentTitle:()=>s,default:()=>m,frontMatter:()=>i,metadata:()=>l,toc:()=>c});var a=t(87462),o=(t(67294),t(3905)),r=t(99729);const i={title:"Amino Acid Conservation"},s=void 0,l={unversionedId:"data-sources/amino-acid-conservation",id:"data-sources/amino-acid-conservation",title:"Amino Acid Conservation",description:"Overview",source:"@site/docs/data-sources/amino-acid-conservation.mdx",sourceDirName:"data-sources",slug:"/data-sources/amino-acid-conservation",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/amino-acid-conservation",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/amino-acid-conservation.mdx",tags:[],version:"current",frontMatter:{title:"Amino Acid Conservation"},sidebar:"docs",previous:{title:"1000 Genomes",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes"},next:{title:"Cancer Hotspots",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cancer-hotspots"}},c=[{value:"Overview",id:"overview",children:[],level:2},{value:"FASTA File",id:"fasta-file",children:[],level:2},{value:"Parsing FASTA",id:"parsing-fasta",children:[],level:2},{value:"Assigning scores to Illumina Connected Annotations transcripts",id:"assigning-scores-to-illumina-connected-annotations-transcripts",children:[{value:"GRCh37",id:"grch37",children:[],level:3},{value:"GRCh38",id:"grch38",children:[],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],d={toc:c},u="wrapper";function m(e){let{components:n,...t}=e;return(0,o.kt)(u,(0,a.Z)({},d,t,{components:n,mdxType:"MDXLayout"}),(0,o.kt)("h2",{id:"overview"},"Overview"),(0,o.kt)("p",null,"Amino acid conservation scores are obtained from multiple alignments of vertebrate exomes to the human ones. The score indicate the frequency with which a particular AA is observed in Humans."),(0,o.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,o.kt)("div",{parentName:"div",className:"admonition-heading"},(0,o.kt)("h5",{parentName:"div"},(0,o.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,o.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,o.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,o.kt)("div",{parentName:"div",className:"admonition-content"},(0,o.kt)("p",{parentName:"div"},"Siepel A, Bejerano G, Pedersen JS, Hinrichs AS, Hou M, Rosenbloom K, Clawson H, Spieth J, Hillier LW, Richards S, et al. Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. ",(0,o.kt)("strong",{parentName:"p"},"Genome Res. 2005")," Aug;15(8):1034-50. (",(0,o.kt)("a",{parentName:"p",href:"http://www.genome.org/cgi/doi/10.1101/gr.3715005"},"http://www.genome.org/cgi/doi/10.1101/gr.3715005"),")"))),(0,o.kt)("h2",{id:"fasta-file"},"FASTA File"),(0,o.kt)("p",null,"The exon alignments are provided in FASTA files as follows:"),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-scss"},">ENST00000641515.2_hg38_1_2 3 0 0 chr1:65565-65573+\nMKK\n>ENST00000641515.2_panTro4_1_2 3 0 0 chrUn_GL393541:146907-146915+\nMKK\n>ENST00000641515.2_gorGor3_1_2 3 0 0\n---\n>ENST00000641515.2_ponAbe2_1_2 3 0 0 chr15:99141417-99141425-\nMKK\n>ENST00000641515.2_hg38_2_2 324 0 0 chr1:69037-70008+\nVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLLHFFGGSEMVILIAMGFDRYIAICKPLHYTTIMCGNACVGIMAVTWGIGFLHSVSQLAFAVHLLFCGPNEVDSFYCDLPRVIKLACTDTYRLDIMVIANSGVLTVCSFVLLIISYTIILMTIQHRPLDKSSKALSTLTAHITVVLLFFGPCVFIYAWPFPIKSLDKFLAVFYSVITPLLNPIIYTLRNKDMKTAIRQLRKWDAHSSVKFZ\n>ENST00000641515.2_panTro4_2_2 324 0 0 chrUn_GL393541:151333-152303+\n")),(0,o.kt)("h2",{id:"parsing-fasta"},"Parsing FASTA"),(0,o.kt)("p",null,"For each Ensembl transcript, we will need to aggregate all the exons together for each of the 100 species. From there, we should get a full alignment that can be used to determine conservation. For example, for ENST00000641515.2 we have:"),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-scss"},"Human (hg38) MKKVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLL\nChimp MKKVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFL-MLFFVFYGGIVFGNLLIVRIVVSDSHLHSPMYFLLANLSLIDLSLCSVTAPKMITDFFSQRKVISFKGCLVQIFLL\nGorilla ----------------------------------------------------------------------------------------------------------------------\nOrangutan MKKVTAEAISWNESTSKTNNSVVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVIIVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLL\nGibbon ----------------------------------------------------------------------------------------------------------------------\nRhesus MKKVTEAAISWNESTSETNNSIVTEFIFLGLSDSQELQIFLFVLFLVFYGGIVFGNLLIVITVVSDSHLHSPMYLLLANLSVVDLSLSSVTAPKMITDFFSQRKAISFKGCLVQIFLL\nMacaque MKKVTEAAISWNESTSETNNSIVTEFIFLGLSDSQELQIFLFVLFLVFYGGIVFGNLLIVITVVSDSHLHSPMYLLLANLSVIDLSLSSVTAPKMITDFFSQRKAISFKGCLVQIFLL\n")),(0,o.kt)("p",null,"If we look at position 6, we see that humans have an Alanine (A) residue. This residue is shared by Chimp and Orangutan. However, Rhesus and Macaque have a Glutamic acid (E) residue at that position. Moreover, Gorilla and Gibbon don't even have data for that transcript.\nFor position 6, we would say that we have 43% conservation (3/7) since three organisms share the same residue as humans."),(0,o.kt)("h2",{id:"assigning-scores-to-illumina-connected-annotations-transcripts"},"Assigning scores to Illumina Connected Annotations transcripts"),(0,o.kt)("p",null,"The source FASTA file comes with Ensembl/UCSC transcript ids of the transcripts used for alignments. The Illumina Connected Annotations cache has RefSeq and Ensembl transcripts and our first attempt was to map the given Ensembl/UCSC ids to their equivalent RefSeq/Ensembl ids. This attempt was unsuccessful since UCSC Table Browser provided mapping without version numbers. So we proceeded as follows:"),(0,o.kt)("ul",null,(0,o.kt)("li",{parentName:"ul"},"Take proteins which have a unique mapping (and hence one set of conservation scores). For ones that mapped to both ChrX and ChrY, we accepted the one from ChrX."),(0,o.kt)("li",{parentName:"ul"},"A Illumina Connected Annotations transcript having an exact peptide sequence match with a uniquely aligned protein is assigned the corresponding conservation scores.")),(0,o.kt)("p",null,"Unfortunately this left us with a very small number of transcripts having conservation scores."),(0,o.kt)("h3",{id:"grch37"},"GRCh37"),(0,o.kt)("ul",null,(0,o.kt)("li",{parentName:"ul"},"Source FASTA contained 41957 protein alignments."),(0,o.kt)("li",{parentName:"ul"},"38165 proteins had unique scores."),(0,o.kt)("li",{parentName:"ul"},"88 aligned proteins existed in Illumina Connected Annotations cache."),(0,o.kt)("li",{parentName:"ul"},"118 transcripts had conservation scores.")),(0,o.kt)("h3",{id:"grch38"},"GRCh38"),(0,o.kt)("ul",null,(0,o.kt)("li",{parentName:"ul"},"Source FASTA contained 110024 protein alignments."),(0,o.kt)("li",{parentName:"ul"},"88961 proteins had unique scores."),(0,o.kt)("li",{parentName:"ul"},"11688 aligned proteins existed in Illumina Connected Annotations cache."),(0,o.kt)("li",{parentName:"ul"},"12098 transcripts had conservation scores.")),(0,o.kt)("h2",{id:"download-url"},"Download URL"),(0,o.kt)("p",null,"GRCh37: ",(0,o.kt)("a",{parentName:"p",href:"http://hgdownload.soe.ucsc.edu/goldenPath/hg19/multiz100way/alignments/knownGene.exonAA.fa.gz"},"http://hgdownload.soe.ucsc.edu/goldenPath/hg19/multiz100way/alignments/knownGene.exonAA.fa.gz")),(0,o.kt)("p",null,"GRCh38: ",(0,o.kt)("a",{parentName:"p",href:"http://hgdownload.soe.ucsc.edu/goldenPath/hg38/multiz100way/alignments/knownGene.exonAA.fa.gz"},"http://hgdownload.soe.ucsc.edu/goldenPath/hg38/multiz100way/alignments/knownGene.exonAA.fa.gz")),(0,o.kt)("h2",{id:"json-output"},"JSON Output"),(0,o.kt)("p",null,"Conservation scores are reported in the transcript section. One score is reported for each alt allele"),(0,o.kt)(r.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/7b3bfa5e.eaf4c632.js b/assets/js/7b3bfa5e.eaf4c632.js new file mode 100644 index 00000000..190ec60b --- /dev/null +++ b/assets/js/7b3bfa5e.eaf4c632.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3389,1633],{3905:(e,n,t)=>{t.d(n,{Zo:()=>d,kt:()=>h});var a=t(7294);function o(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function r(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function i(e){for(var n=1;n=0||(o[t]=e[t]);return o}(e,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(o[t]=e[t])}return o}var l=a.createContext({}),c=function(e){var n=a.useContext(l),t=n;return e&&(t="function"==typeof e?e(n):i(i({},n),e)),t},d=function(e){var n=c(e.components);return a.createElement(l.Provider,{value:n},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},p=a.forwardRef((function(e,n){var t=e.components,o=e.mdxType,r=e.originalType,l=e.parentName,d=s(e,["components","mdxType","originalType","parentName"]),u=c(t),p=o,h=u["".concat(l,".").concat(p)]||u[p]||m[p]||r;return t?a.createElement(h,i(i({ref:n},d),{},{components:t})):a.createElement(h,i({ref:n},d))}));function h(e,n){var t=arguments,o=n&&n.mdxType;if("string"==typeof e||o){var r=t.length,i=new Array(r);i[0]=p;var s={};for(var l in n)hasOwnProperty.call(n,l)&&(s[l]=n[l]);s.originalType=e,s[u]="string"==typeof e?e:o,i[1]=s;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>i,default:()=>u,frontMatter:()=>r,metadata:()=>s,toc:()=>l});var a=t(7462),o=(t(7294),t(3905));const r={},i=void 0,s={unversionedId:"data-sources/amino-acid-conservation-json",id:"data-sources/amino-acid-conservation-json",title:"amino-acid-conservation-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/amino-acid-conservation-json.md",sourceDirName:"data-sources",slug:"/data-sources/amino-acid-conservation-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/amino-acid-conservation-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/amino-acid-conservation-json.md",tags:[],version:"current",frontMatter:{}},l=[],c={toc:l},d="wrapper";function u(e){let{components:n,...t}=e;return(0,o.kt)(d,(0,a.Z)({},c,t,{components:n,mdxType:"MDXLayout"}),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-json"},'"aminoAcidConservation": {\n "scores": [0.34]\n} \n')),(0,o.kt)("table",null,(0,o.kt)("thead",{parentName:"table"},(0,o.kt)("tr",{parentName:"thead"},(0,o.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,o.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,o.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,o.kt)("tbody",{parentName:"table"},(0,o.kt)("tr",{parentName:"tbody"},(0,o.kt)("td",{parentName:"tr",align:"left"},"aminoAcidConservation"),(0,o.kt)("td",{parentName:"tr",align:"center"},"object"),(0,o.kt)("td",{parentName:"tr",align:"left"})),(0,o.kt)("tr",{parentName:"tbody"},(0,o.kt)("td",{parentName:"tr",align:"left"},"scores"),(0,o.kt)("td",{parentName:"tr",align:"center"},"object array of doubles"),(0,o.kt)("td",{parentName:"tr",align:"left"},"percent conserved with respect to human amino acid residue. Range: 0.01 - 1.00")))))}u.isMDXComponent=!0},1877:(e,n,t)=>{t.r(n),t.d(n,{contentTitle:()=>s,default:()=>m,frontMatter:()=>i,metadata:()=>l,toc:()=>c});var a=t(7462),o=(t(7294),t(3905)),r=t(9729);const i={title:"Amino Acid Conservation"},s=void 0,l={unversionedId:"data-sources/amino-acid-conservation",id:"data-sources/amino-acid-conservation",title:"Amino Acid Conservation",description:"Overview",source:"@site/docs/data-sources/amino-acid-conservation.mdx",sourceDirName:"data-sources",slug:"/data-sources/amino-acid-conservation",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/amino-acid-conservation",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/amino-acid-conservation.mdx",tags:[],version:"current",frontMatter:{title:"Amino Acid Conservation"},sidebar:"docs",previous:{title:"1000 Genomes",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes"},next:{title:"Cancer Hotspots",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cancer-hotspots"}},c=[{value:"Overview",id:"overview",children:[],level:2},{value:"FASTA File",id:"fasta-file",children:[],level:2},{value:"Parsing FASTA",id:"parsing-fasta",children:[],level:2},{value:"Assigning scores to Illumina Connected Annotations transcripts",id:"assigning-scores-to-illumina-connected-annotations-transcripts",children:[{value:"GRCh37",id:"grch37",children:[],level:3},{value:"GRCh38",id:"grch38",children:[],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],d={toc:c},u="wrapper";function m(e){let{components:n,...t}=e;return(0,o.kt)(u,(0,a.Z)({},d,t,{components:n,mdxType:"MDXLayout"}),(0,o.kt)("h2",{id:"overview"},"Overview"),(0,o.kt)("p",null,"Amino acid conservation scores are obtained from multiple alignments of vertebrate exomes to the human ones. The score indicate the frequency with which a particular AA is observed in Humans."),(0,o.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,o.kt)("div",{parentName:"div",className:"admonition-heading"},(0,o.kt)("h5",{parentName:"div"},(0,o.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,o.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,o.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,o.kt)("div",{parentName:"div",className:"admonition-content"},(0,o.kt)("p",{parentName:"div"},"Siepel A, Bejerano G, Pedersen JS, Hinrichs AS, Hou M, Rosenbloom K, Clawson H, Spieth J, Hillier LW, Richards S, et al. Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. ",(0,o.kt)("strong",{parentName:"p"},"Genome Res. 2005")," Aug;15(8):1034-50. (",(0,o.kt)("a",{parentName:"p",href:"http://www.genome.org/cgi/doi/10.1101/gr.3715005"},"http://www.genome.org/cgi/doi/10.1101/gr.3715005"),")"))),(0,o.kt)("h2",{id:"fasta-file"},"FASTA File"),(0,o.kt)("p",null,"The exon alignments are provided in FASTA files as follows:"),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-scss"},">ENST00000641515.2_hg38_1_2 3 0 0 chr1:65565-65573+\nMKK\n>ENST00000641515.2_panTro4_1_2 3 0 0 chrUn_GL393541:146907-146915+\nMKK\n>ENST00000641515.2_gorGor3_1_2 3 0 0\n---\n>ENST00000641515.2_ponAbe2_1_2 3 0 0 chr15:99141417-99141425-\nMKK\n>ENST00000641515.2_hg38_2_2 324 0 0 chr1:69037-70008+\nVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLLHFFGGSEMVILIAMGFDRYIAICKPLHYTTIMCGNACVGIMAVTWGIGFLHSVSQLAFAVHLLFCGPNEVDSFYCDLPRVIKLACTDTYRLDIMVIANSGVLTVCSFVLLIISYTIILMTIQHRPLDKSSKALSTLTAHITVVLLFFGPCVFIYAWPFPIKSLDKFLAVFYSVITPLLNPIIYTLRNKDMKTAIRQLRKWDAHSSVKFZ\n>ENST00000641515.2_panTro4_2_2 324 0 0 chrUn_GL393541:151333-152303+\n")),(0,o.kt)("h2",{id:"parsing-fasta"},"Parsing FASTA"),(0,o.kt)("p",null,"For each Ensembl transcript, we will need to aggregate all the exons together for each of the 100 species. From there, we should get a full alignment that can be used to determine conservation. For example, for ENST00000641515.2 we have:"),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-scss"},"Human (hg38) MKKVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLL\nChimp MKKVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFL-MLFFVFYGGIVFGNLLIVRIVVSDSHLHSPMYFLLANLSLIDLSLCSVTAPKMITDFFSQRKVISFKGCLVQIFLL\nGorilla ----------------------------------------------------------------------------------------------------------------------\nOrangutan MKKVTAEAISWNESTSKTNNSVVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVIIVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLL\nGibbon ----------------------------------------------------------------------------------------------------------------------\nRhesus MKKVTEAAISWNESTSETNNSIVTEFIFLGLSDSQELQIFLFVLFLVFYGGIVFGNLLIVITVVSDSHLHSPMYLLLANLSVVDLSLSSVTAPKMITDFFSQRKAISFKGCLVQIFLL\nMacaque MKKVTEAAISWNESTSETNNSIVTEFIFLGLSDSQELQIFLFVLFLVFYGGIVFGNLLIVITVVSDSHLHSPMYLLLANLSVIDLSLSSVTAPKMITDFFSQRKAISFKGCLVQIFLL\n")),(0,o.kt)("p",null,"If we look at position 6, we see that humans have an Alanine (A) residue. This residue is shared by Chimp and Orangutan. However, Rhesus and Macaque have a Glutamic acid (E) residue at that position. Moreover, Gorilla and Gibbon don't even have data for that transcript.\nFor position 6, we would say that we have 43% conservation (3/7) since three organisms share the same residue as humans."),(0,o.kt)("h2",{id:"assigning-scores-to-illumina-connected-annotations-transcripts"},"Assigning scores to Illumina Connected Annotations transcripts"),(0,o.kt)("p",null,"The source FASTA file comes with Ensembl/UCSC transcript ids of the transcripts used for alignments. The Illumina Connected Annotations cache has RefSeq and Ensembl transcripts and our first attempt was to map the given Ensembl/UCSC ids to their equivalent RefSeq/Ensembl ids. This attempt was unsuccessful since UCSC Table Browser provided mapping without version numbers. So we proceeded as follows:"),(0,o.kt)("ul",null,(0,o.kt)("li",{parentName:"ul"},"Take proteins which have a unique mapping (and hence one set of conservation scores). For ones that mapped to both ChrX and ChrY, we accepted the one from ChrX."),(0,o.kt)("li",{parentName:"ul"},"A Illumina Connected Annotations transcript having an exact peptide sequence match with a uniquely aligned protein is assigned the corresponding conservation scores.")),(0,o.kt)("p",null,"Unfortunately this left us with a very small number of transcripts having conservation scores."),(0,o.kt)("h3",{id:"grch37"},"GRCh37"),(0,o.kt)("ul",null,(0,o.kt)("li",{parentName:"ul"},"Source FASTA contained 41957 protein alignments."),(0,o.kt)("li",{parentName:"ul"},"38165 proteins had unique scores."),(0,o.kt)("li",{parentName:"ul"},"88 aligned proteins existed in Illumina Connected Annotations cache."),(0,o.kt)("li",{parentName:"ul"},"118 transcripts had conservation scores.")),(0,o.kt)("h3",{id:"grch38"},"GRCh38"),(0,o.kt)("ul",null,(0,o.kt)("li",{parentName:"ul"},"Source FASTA contained 110024 protein alignments."),(0,o.kt)("li",{parentName:"ul"},"88961 proteins had unique scores."),(0,o.kt)("li",{parentName:"ul"},"11688 aligned proteins existed in Illumina Connected Annotations cache."),(0,o.kt)("li",{parentName:"ul"},"12098 transcripts had conservation scores.")),(0,o.kt)("h2",{id:"download-url"},"Download URL"),(0,o.kt)("p",null,"GRCh37: ",(0,o.kt)("a",{parentName:"p",href:"http://hgdownload.soe.ucsc.edu/goldenPath/hg19/multiz100way/alignments/knownGene.exonAA.fa.gz"},"http://hgdownload.soe.ucsc.edu/goldenPath/hg19/multiz100way/alignments/knownGene.exonAA.fa.gz")),(0,o.kt)("p",null,"GRCh38: ",(0,o.kt)("a",{parentName:"p",href:"http://hgdownload.soe.ucsc.edu/goldenPath/hg38/multiz100way/alignments/knownGene.exonAA.fa.gz"},"http://hgdownload.soe.ucsc.edu/goldenPath/hg38/multiz100way/alignments/knownGene.exonAA.fa.gz")),(0,o.kt)("h2",{id:"json-output"},"JSON Output"),(0,o.kt)("p",null,"Conservation scores are reported in the transcript section. One score is reported for each alt allele"),(0,o.kt)(r.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/7b7aa079.325e751c.js b/assets/js/7b7aa079.325e751c.js deleted file mode 100644 index e99a15fa..00000000 --- a/assets/js/7b7aa079.325e751c.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6754],{3905:(t,n,e)=>{e.d(n,{Zo:()=>m,kt:()=>k});var a=e(67294);function l(t,n,e){return n in t?Object.defineProperty(t,n,{value:e,enumerable:!0,configurable:!0,writable:!0}):t[n]=e,t}function r(t,n){var e=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(t,n).enumerable}))),e.push.apply(e,a)}return e}function o(t){for(var n=1;n=0||(l[e]=t[e]);return l}(t,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,e)&&(l[e]=t[e])}return l}var p=a.createContext({}),u=function(t){var n=a.useContext(p),e=n;return t&&(e="function"==typeof t?t(n):o(o({},n),t)),e},m=function(t){var n=u(t.components);return a.createElement(p.Provider,{value:n},t.children)},d="mdxType",g={inlineCode:"code",wrapper:function(t){var n=t.children;return a.createElement(a.Fragment,{},n)}},N=a.forwardRef((function(t,n){var e=t.components,l=t.mdxType,r=t.originalType,p=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),d=u(e),N=l,k=d["".concat(p,".").concat(N)]||d[N]||g[N]||r;return e?a.createElement(k,o(o({ref:n},m),{},{components:e})):a.createElement(k,o({ref:n},m))}));function k(t,n){var e=arguments,l=n&&n.mdxType;if("string"==typeof t||l){var r=e.length,o=new Array(r);o[0]=N;var i={};for(var p in n)hasOwnProperty.call(n,p)&&(i[p]=n[p]);i.originalType=t,i[d]="string"==typeof t?t:l,o[1]=i;for(var u=2;u{e.r(n),e.d(n,{contentTitle:()=>o,default:()=>d,frontMatter:()=>r,metadata:()=>i,toc:()=>p});var a=e(87462),l=(e(67294),e(3905));const r={},o=void 0,i={unversionedId:"data-sources/gnomad-small-variants-json",id:"version-3.18/data-sources/gnomad-small-variants-json",title:"gnomad-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/gnomad-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gnomad-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/gnomad-small-variants-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],u={toc:p},m="wrapper";function d(t){let{components:n,...e}=t;return(0,l.kt)(m,(0,a.Z)({},u,e,{components:n,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomad":{ \n "coverage":20,\n "allAf":0.190317,\n "maleAf":0.193,\n "femaleAf": 0.1935,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "maleAn":15096,\n "femaleAn":15700\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "maleAc":2930,\n "femaleAc": 2931,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "maleHc":280,\n "femaleHc":281,\n "controlsAllAf":0.190317,\n "controlsAllAn":30796,\n "controlsAllAc":5861,\n "lowComplexityRegion":true,\n "failedFilter":true\n}\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"coverage"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the controls subset. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for male population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for female population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the controls subset. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for male population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for female population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the controls subset. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the South Asian population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the South Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the South Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"lowComplexityRegion"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant is located in a low complexity region.")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/7bc16216.b0648cfe.js b/assets/js/7bc16216.b0648cfe.js new file mode 100644 index 00000000..e9fb9fa3 --- /dev/null +++ b/assets/js/7bc16216.b0648cfe.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3232],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>k});var a=n(7294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function i(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var o=a.createContext({}),s=function(t){var e=a.useContext(o),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},m=function(t){var e=s(t.components);return a.createElement(o.Provider,{value:e},t.children)},c="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},u=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,o=t.parentName,m=p(t,["components","mdxType","originalType","parentName"]),c=s(n),u=r,k=c["".concat(o,".").concat(u)]||c[u]||d[u]||l;return n?a.createElement(k,i(i({ref:e},m),{},{components:n})):a.createElement(k,i({ref:e},m))}));function k(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,i=new Array(l);i[0]=u;var p={};for(var o in e)hasOwnProperty.call(e,o)&&(p[o]=e[o]);p.originalType=t,p[c]="string"==typeof t?t:r,i[1]=p;for(var s=2;s{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>c,frontMatter:()=>l,metadata:()=>p,toc:()=>o});var a=n(7462),r=(n(7294),n(3905));const l={},i=void 0,p={unversionedId:"data-sources/clinvar-json",id:"data-sources/clinvar-json",title:"clinvar-json",description:"small variants:",source:"@site/docs/data-sources/clinvar-json.md",sourceDirName:"data-sources",slug:"/data-sources/clinvar-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clinvar-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clinvar-json.md",tags:[],version:"current",frontMatter:{}},o=[],s={toc:o},m="wrapper";function c(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},s,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"small variants:")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "id":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "significance":[\n "benign"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "lastUpdatedDate":"2020-03-01",\n "isAlleleSpecific":true\n },\n {\n "id":"RCV000030258.4",\n "variationId":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "alleleOrigins":[\n "germline"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "phenotypes":[\n "Lynch syndrome"\n ],\n "medGenIds":[\n "C1333990"\n ],\n "omimIds":[\n "120435"\n ],\n "significance":[\n "benign"\n ],\n "lastUpdatedDate":"2017-05-01",\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"large variants:")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "chromosome":"1", \n "begin":629025, \n "end":8537745, \n "variantType":"copy_number_loss", \n "id":"RCV000051993.4", \n "variationId":"VCV000058242.1", \n "reviewStatus":"criteria provided, single submitter", \n "alleleOrigins":[\n "not provided"\n ], \n "phenotypes":[\n "See cases"\n ], \n "significance":[\n "pathogenic"\n ], \n "lastUpdatedDate":"2022-04-21", \n "pubMedIds":[\n "21844811"\n ]\n },\n {\n "id":"VCV000058242.1",\n "reviewStatus":"criteria provided, single submitter",\n "significance":[\n "pathogenic"\n ],\n "lastUpdatedDate":"2022-04-21"\n },\n ......\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variationId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar VCV ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"variant type")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"reviewStatus"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"alleleOrigins"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"medGenIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MedGen IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"omimIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"orphanetIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Orphanet IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"significance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"lastUpdatedDate"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the ClinVar alternate allele")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"reviewStatus:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no assertion provided"),(0,r.kt)("li",{parentName:"ul"},"no assertion criteria provided"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, single submitter"),(0,r.kt)("li",{parentName:"ul"},"practice guideline"),(0,r.kt)("li",{parentName:"ul"},"classified by multiple submitters"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, conflicting interpretations"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, multiple submitters, no conflicts"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"alleleOrigins:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"unknown"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"germline"),(0,r.kt)("li",{parentName:"ul"},"somatic"),(0,r.kt)("li",{parentName:"ul"},"inherited"),(0,r.kt)("li",{parentName:"ul"},"paternal"),(0,r.kt)("li",{parentName:"ul"},"maternal"),(0,r.kt)("li",{parentName:"ul"},"de-novo"),(0,r.kt)("li",{parentName:"ul"},"biparental"),(0,r.kt)("li",{parentName:"ul"},"uniparental"),(0,r.kt)("li",{parentName:"ul"},"not-tested"),(0,r.kt)("li",{parentName:"ul"},"tested-inconclusive")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"significance:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"uncertain significance"),(0,r.kt)("li",{parentName:"ul"},"not provided"),(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"drug response"),(0,r.kt)("li",{parentName:"ul"},"histocompatibility"),(0,r.kt)("li",{parentName:"ul"},"association"),(0,r.kt)("li",{parentName:"ul"},"risk factor"),(0,r.kt)("li",{parentName:"ul"},"protective"),(0,r.kt)("li",{parentName:"ul"},"affects"),(0,r.kt)("li",{parentName:"ul"},"conflicting data from submitters"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant"),(0,r.kt)("li",{parentName:"ul"},"conflicting interpretations of pathogenicity")))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/7bc16216.d7ca5277.js b/assets/js/7bc16216.d7ca5277.js deleted file mode 100644 index d5cf7b7b..00000000 --- a/assets/js/7bc16216.d7ca5277.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3232],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>k});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function i(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var o=a.createContext({}),s=function(t){var e=a.useContext(o),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},m=function(t){var e=s(t.components);return a.createElement(o.Provider,{value:e},t.children)},c="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},u=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,o=t.parentName,m=p(t,["components","mdxType","originalType","parentName"]),c=s(n),u=r,k=c["".concat(o,".").concat(u)]||c[u]||d[u]||l;return n?a.createElement(k,i(i({ref:e},m),{},{components:n})):a.createElement(k,i({ref:e},m))}));function k(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,i=new Array(l);i[0]=u;var p={};for(var o in e)hasOwnProperty.call(e,o)&&(p[o]=e[o]);p.originalType=t,p[c]="string"==typeof t?t:r,i[1]=p;for(var s=2;s{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>c,frontMatter:()=>l,metadata:()=>p,toc:()=>o});var a=n(87462),r=(n(67294),n(3905));const l={},i=void 0,p={unversionedId:"data-sources/clinvar-json",id:"data-sources/clinvar-json",title:"clinvar-json",description:"small variants:",source:"@site/docs/data-sources/clinvar-json.md",sourceDirName:"data-sources",slug:"/data-sources/clinvar-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clinvar-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clinvar-json.md",tags:[],version:"current",frontMatter:{}},o=[],s={toc:o},m="wrapper";function c(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},s,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"small variants:")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "id":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "significance":[\n "benign"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "lastUpdatedDate":"2020-03-01",\n "isAlleleSpecific":true\n },\n {\n "id":"RCV000030258.4",\n "variationId":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "alleleOrigins":[\n "germline"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "phenotypes":[\n "Lynch syndrome"\n ],\n "medGenIds":[\n "C1333990"\n ],\n "omimIds":[\n "120435"\n ],\n "significance":[\n "benign"\n ],\n "lastUpdatedDate":"2017-05-01",\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"large variants:")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "chromosome":"1", \n "begin":629025, \n "end":8537745, \n "variantType":"copy_number_loss", \n "id":"RCV000051993.4", \n "variationId":"VCV000058242.1", \n "reviewStatus":"criteria provided, single submitter", \n "alleleOrigins":[\n "not provided"\n ], \n "phenotypes":[\n "See cases"\n ], \n "significance":[\n "pathogenic"\n ], \n "lastUpdatedDate":"2022-04-21", \n "pubMedIds":[\n "21844811"\n ]\n },\n {\n "id":"VCV000058242.1",\n "reviewStatus":"criteria provided, single submitter",\n "significance":[\n "pathogenic"\n ],\n "lastUpdatedDate":"2022-04-21"\n },\n ......\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variationId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar VCV ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"variant type")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"reviewStatus"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"alleleOrigins"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"medGenIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MedGen IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"omimIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"orphanetIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Orphanet IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"significance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"lastUpdatedDate"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the ClinVar alternate allele")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"reviewStatus:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no assertion provided"),(0,r.kt)("li",{parentName:"ul"},"no assertion criteria provided"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, single submitter"),(0,r.kt)("li",{parentName:"ul"},"practice guideline"),(0,r.kt)("li",{parentName:"ul"},"classified by multiple submitters"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, conflicting interpretations"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, multiple submitters, no conflicts"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"alleleOrigins:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"unknown"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"germline"),(0,r.kt)("li",{parentName:"ul"},"somatic"),(0,r.kt)("li",{parentName:"ul"},"inherited"),(0,r.kt)("li",{parentName:"ul"},"paternal"),(0,r.kt)("li",{parentName:"ul"},"maternal"),(0,r.kt)("li",{parentName:"ul"},"de-novo"),(0,r.kt)("li",{parentName:"ul"},"biparental"),(0,r.kt)("li",{parentName:"ul"},"uniparental"),(0,r.kt)("li",{parentName:"ul"},"not-tested"),(0,r.kt)("li",{parentName:"ul"},"tested-inconclusive")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"significance:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"uncertain significance"),(0,r.kt)("li",{parentName:"ul"},"not provided"),(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"drug response"),(0,r.kt)("li",{parentName:"ul"},"histocompatibility"),(0,r.kt)("li",{parentName:"ul"},"association"),(0,r.kt)("li",{parentName:"ul"},"risk factor"),(0,r.kt)("li",{parentName:"ul"},"protective"),(0,r.kt)("li",{parentName:"ul"},"affects"),(0,r.kt)("li",{parentName:"ul"},"conflicting data from submitters"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant"),(0,r.kt)("li",{parentName:"ul"},"conflicting interpretations of pathogenicity")))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/7c03f26c.382b229c.js b/assets/js/7c03f26c.382b229c.js deleted file mode 100644 index 87f2f768..00000000 --- a/assets/js/7c03f26c.382b229c.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3966],{3905:(t,e,n)=>{n.d(e,{Zo:()=>c,kt:()=>d});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function o(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var i=r.createContext({}),m=function(t){var e=r.useContext(i),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},c=function(t){var e=m(t.components);return r.createElement(i.Provider,{value:e},t.children)},s="mdxType",f={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},u=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,l=t.originalType,i=t.parentName,c=p(t,["components","mdxType","originalType","parentName"]),s=m(n),u=a,d=s["".concat(i,".").concat(u)]||s[u]||f[u]||l;return n?r.createElement(d,o(o({ref:e},c),{},{components:n})):r.createElement(d,o({ref:e},c))}));function d(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var l=n.length,o=new Array(l);o[0]=u;var p={};for(var i in e)hasOwnProperty.call(e,i)&&(p[i]=e[i]);p.originalType=t,p[s]="string"==typeof t?t:a,o[1]=p;for(var m=2;m{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>s,frontMatter:()=>l,metadata:()=>p,toc:()=>i});var r=n(87462),a=(n(67294),n(3905));const l={},o=void 0,p={unversionedId:"data-sources/1000Genomes-snv-json",id:"version-3.17/data-sources/1000Genomes-snv-json",title:"1000Genomes-snv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/1000Genomes-snv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-snv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/1000Genomes-snv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/1000Genomes-snv-json.md",tags:[],version:"3.17",frontMatter:{}},i=[],m={toc:i},c="wrapper";function s(t){let{components:e,...n}=t;return(0,a.kt)(c,(0,r.Z)({},m,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":{\n "allAf":0.200879,\n "afrAf":0.210287,\n "amrAf":0.139769,\n "easAf":0.275794,\n "eurAf":0.181909,\n "sasAf":0.173824,\n "allAn":5008,\n "afrAn":1322,\n "amrAn":694,\n "easAn":1008,\n "eurAn":1006,\n "sasAn":978,\n "allAc":1006,\n "afrAc":278,\n "amrAc":97,\n "easAc":278,\n "eurAc":183,\n "sasAc":170\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for all populations. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"allAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for all populations. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"allAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for all populations. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"afrAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the African super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"afrAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the African super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"afrAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the African super population. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"amrAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"amrAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the Ad Mixed American super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"amrAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the Ad Mixed American super population. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"easAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"easAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the East Asian super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"easAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the East Asian super population. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"eurAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the European super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"eurAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the European super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"eurAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the European super population. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"sasAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"sasAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the South Asian super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"sasAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the South Asian super population. Non-zero integer.")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/7d23ebcb.782a6747.js b/assets/js/7d23ebcb.782a6747.js deleted file mode 100644 index d40e401c..00000000 --- a/assets/js/7d23ebcb.782a6747.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6698],{3905:(t,e,n)=>{n.d(e,{Zo:()=>c,kt:()=>d});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function o(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var i=r.createContext({}),m=function(t){var e=r.useContext(i),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},c=function(t){var e=m(t.components);return r.createElement(i.Provider,{value:e},t.children)},s="mdxType",f={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},u=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,l=t.originalType,i=t.parentName,c=p(t,["components","mdxType","originalType","parentName"]),s=m(n),u=a,d=s["".concat(i,".").concat(u)]||s[u]||f[u]||l;return n?r.createElement(d,o(o({ref:e},c),{},{components:n})):r.createElement(d,o({ref:e},c))}));function d(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var l=n.length,o=new Array(l);o[0]=u;var p={};for(var i in e)hasOwnProperty.call(e,i)&&(p[i]=e[i]);p.originalType=t,p[s]="string"==typeof t?t:a,o[1]=p;for(var m=2;m{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>s,frontMatter:()=>l,metadata:()=>p,toc:()=>i});var r=n(87462),a=(n(67294),n(3905));const l={},o=void 0,p={unversionedId:"data-sources/1000Genomes-snv-json",id:"version-3.18/data-sources/1000Genomes-snv-json",title:"1000Genomes-snv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/1000Genomes-snv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-snv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/1000Genomes-snv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/1000Genomes-snv-json.md",tags:[],version:"3.18",frontMatter:{}},i=[],m={toc:i},c="wrapper";function s(t){let{components:e,...n}=t;return(0,a.kt)(c,(0,r.Z)({},m,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":{\n "allAf":0.200879,\n "afrAf":0.210287,\n "amrAf":0.139769,\n "easAf":0.275794,\n "eurAf":0.181909,\n "sasAf":0.173824,\n "allAn":5008,\n "afrAn":1322,\n "amrAn":694,\n "easAn":1008,\n "eurAn":1006,\n "sasAn":978,\n "allAc":1006,\n "afrAc":278,\n "amrAc":97,\n "easAc":278,\n "eurAc":183,\n "sasAc":170\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for all populations. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"allAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for all populations. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"allAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for all populations. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"afrAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the African super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"afrAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the African super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"afrAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the African super population. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"amrAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"amrAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the Ad Mixed American super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"amrAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the Ad Mixed American super population. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"easAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"easAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the East Asian super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"easAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the East Asian super population. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"eurAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the European super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"eurAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the European super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"eurAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the European super population. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"sasAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"sasAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the South Asian super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"sasAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the South Asian super population. Non-zero integer.")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/7ec526d1.3698025a.js b/assets/js/7ec526d1.3698025a.js deleted file mode 100644 index 4145edb4..00000000 --- a/assets/js/7ec526d1.3698025a.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1870],{3905:(M,L,t)=>{t.d(L,{Zo:()=>o,kt:()=>C});var i=t(67294);function e(M,L,t){return L in M?Object.defineProperty(M,L,{value:t,enumerable:!0,configurable:!0,writable:!0}):M[L]=t,M}function j(M,L){var t=Object.keys(M);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(M);L&&(i=i.filter((function(L){return Object.getOwnPropertyDescriptor(M,L).enumerable}))),t.push.apply(t,i)}return t}function u(M){for(var L=1;L=0||(e[t]=M[t]);return e}(M,L);if(Object.getOwnPropertySymbols){var j=Object.getOwnPropertySymbols(M);for(i=0;i=0||Object.prototype.propertyIsEnumerable.call(M,t)&&(e[t]=M[t])}return e}var N=i.createContext({}),n=function(M){var L=i.useContext(N),t=L;return M&&(t="function"==typeof M?M(L):u(u({},L),M)),t},o=function(M){var L=n(M.components);return i.createElement(N.Provider,{value:L},M.children)},s="mdxType",w={inlineCode:"code",wrapper:function(M){var L=M.children;return i.createElement(i.Fragment,{},L)}},y=i.forwardRef((function(M,L){var t=M.components,e=M.mdxType,j=M.originalType,N=M.parentName,o=a(M,["components","mdxType","originalType","parentName"]),s=n(t),y=e,C=s["".concat(N,".").concat(y)]||s[y]||w[y]||j;return t?i.createElement(C,u(u({ref:L},o),{},{components:t})):i.createElement(C,u({ref:L},o))}));function C(M,L){var t=arguments,e=L&&L.mdxType;if("string"==typeof M||e){var j=t.length,u=new Array(j);u[0]=y;var a={};for(var N in L)hasOwnProperty.call(L,N)&&(a[N]=L[N]);a.originalType=M,a[s]="string"==typeof M?M:e,u[1]=a;for(var n=2;n{t.d(L,{Z:()=>e});var i=t(67294);function e(M){let{className:L,name:t,children:e,githubUrl:j,twitterUrl:u}=M;return i.createElement("div",{className:L},i.createElement("div",{className:"card card--full-height"},i.createElement("div",{className:"card__header"},i.createElement("div",{className:"avatar avatar--vertical"},i.createElement("img",{className:"avatar__photo avatar__photo--xl",src:j+".png"}),i.createElement("div",{className:"avatar__intro"},i.createElement("h3",{className:"avatar__name"},t)))),i.createElement("div",{className:"card__body"},e),i.createElement("div",{className:"card__footer"},i.createElement("div",{className:"button-group button-group--block"},j&&i.createElement("a",{className:"button button--secondary",href:j},"GitHub"),u&&i.createElement("a",{className:"button button--secondary",href:u},"Twitter")))))}},35002:(M,L,t)=>{t.r(L),t.d(L,{TeamProfileCardCol:()=>o,contentTitle:()=>a,default:()=>y,frontMatter:()=>u,metadata:()=>N,toc:()=>n});var i=t(87462),e=(t(67294),t(3905)),j=t(63427);const u={id:"introduction",title:"Introduction",description:"Clinical-grade variant annotation",hide_title:!0,slug:"/"},a=void 0,N={unversionedId:"introduction/introduction",id:"version-3.21/introduction/introduction",title:"Introduction",description:"Clinical-grade variant annotation",source:"@site/versioned_docs/version-3.21/introduction/introduction.mdx",sourceDirName:"introduction",slug:"/",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/introduction/introduction.mdx",tags:[],version:"3.21",frontMatter:{id:"introduction",title:"Introduction",description:"Clinical-grade variant annotation",hide_title:!0,slug:"/"},sidebar:"docs",next:{title:"Dependencies",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/introduction/dependencies"}},n=[{value:"What does Nirvana annotate?",id:"what-does-nirvana-annotate",children:[],level:2},{value:"Licensing",id:"licensing",children:[{value:"Code",id:"code",children:[],level:3},{value:"Data",id:"data",children:[],level:3}],level:2},{value:"Nirvana Team",id:"nirvana-team",children:[{value:"Active Team",id:"active-team",children:[],level:3},{value:"Honorary Alumni",id:"honorary-alumni",children:[],level:3}],level:2}];function o(M){return(0,e.kt)(j.Z,(0,i.Z)({},M,{className:"col col--6 margin-bottom--lg",mdxType:"TeamProfileCard"}))}const s={toc:n,TeamProfileCardCol:o},w="wrapper";function y(M){let{components:L,...j}=M;return(0,e.kt)(w,(0,i.Z)({},s,j,{components:L,mdxType:"MDXLayout"}),(0,e.kt)("p",null,(0,e.kt)("img",{src:t(31958).Z})),(0,e.kt)("p",null,"Nirvana provides clinical-grade annotation of genomic variants (SNVs, MNVs, insertions, deletions, indels, STRs, gene fusions, and SVs (including CNVs). It can be run as a stand-alone package, as an AWS Lambda function, or integrated into larger software tools that require variant annotation."),(0,e.kt)("p",null,"The input to Nirvana are VCFs and the output is a structured JSON representation of all annotation and sample information (as extracted from the VCF). Nirvana handles multiple alternate alleles and multiple samples with ease."),(0,e.kt)("p",null,"The software is being developed under a rigorous SDLC and testing process to ensure accuracy of the results and enable embedding in other software with regulatory needs. Nirvana uses a continuous integration pipeline where millions of variant annotations are monitored against baseline values daily."),(0,e.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,e.kt)("div",{parentName:"div",className:"admonition-heading"},(0,e.kt)("h5",{parentName:"div"},(0,e.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,e.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,e.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Fun Fact")),(0,e.kt)("div",{parentName:"div",className:"admonition-content"},(0,e.kt)("p",{parentName:"div"},"Nirvana is a backronym for ",(0,e.kt)("strong",{parentName:"p"},"NI"),"mble and ",(0,e.kt)("strong",{parentName:"p"},"R"),"obust ",(0,e.kt)("strong",{parentName:"p"},"VA"),"riant a",(0,e.kt)("strong",{parentName:"p"},"N"),"not",(0,e.kt)("strong",{parentName:"p"},"A"),"tor"))),(0,e.kt)("h2",{id:"what-does-nirvana-annotate"},"What does Nirvana annotate?"),(0,e.kt)("p",null,"We use Sequence Ontology consequences to describe how each variant impacts a given transcript:"),(0,e.kt)("p",null,(0,e.kt)("img",{src:t(84770).Z})),(0,e.kt)("p",null,"In addition, we also use external data sources to provide additional context for each variant:"),(0,e.kt)("p",null,(0,e.kt)("img",{src:t(92425).Z})),(0,e.kt)("h2",{id:"licensing"},"Licensing"),(0,e.kt)("h3",{id:"code"},"Code"),(0,e.kt)("p",null,"Nirvana source code is provided under the ",(0,e.kt)("a",{parentName:"p",href:"https://github.com/Illumina/Nirvana/blob/develop/LICENSE"},"GPLv3")," license. Nirvana includes several third party packages provided under other open source licenses, please see ",(0,e.kt)("a",{parentName:"p",href:"introduction/dependencies"},"Dependencies")," for additional details."),(0,e.kt)("h3",{id:"data"},"Data"),(0,e.kt)("p",null,"The data used by Nirvana is publicly available, however some data sources have special restrictions on use by non-academic entities."),(0,e.kt)("h2",{id:"nirvana-team"},"Nirvana Team"),(0,e.kt)("h3",{id:"active-team"},"Active Team"),(0,e.kt)("p",null,"The Nirvana team works on the core functionality, AWS annotation services, in addition to keeping the annotation data sources up-to-date."),(0,e.kt)("p",null,"Current members of the Nirvana team are listed in alphabetical order below."),(0,e.kt)("div",{className:"row"},(0,e.kt)(o,{name:"Fahd Siddiqui",githubUrl:"https://github.com/Fahd-Siddiqui",mdxType:"TeamProfileCardCol"},"Joined our team back in December 2021 and brings even more cloud and ML experience to our team."),(0,e.kt)(o,{name:"Joseph Platzer",githubUrl:"https://github.com/jplatzer2",mdxType:"TeamProfileCardCol"},"Test Lead. Joins Nirvana with a history of building sequencing tools and keeping the customer first."),(0,e.kt)(o,{name:"Michael Str\xf6mberg",githubUrl:"https://github.com/MichaelStromberg",mdxType:"TeamProfileCardCol"},"Nirvana founder and now ever grateful Nirvana cheerleader to those who actually write code for it."),(0,e.kt)(o,{name:"Ningxin Ouyang",githubUrl:"https://github.com/N-Ouyang",mdxType:"TeamProfileCardCol"},"Our newest addition to the team with a wealth of experience in transcript factor footprinting."),(0,e.kt)(o,{name:"Rajat Shuvro Roy",githubUrl:"https://github.com/rajatshuvro",mdxType:"TeamProfileCardCol"},"Lead developer. Loves to speed up things and make services available to all interested users.")),(0,e.kt)("h3",{id:"honorary-alumni"},"Honorary Alumni"),(0,e.kt)("p",null,"Nirvana would never be what it is today without the huge contributions from these folks who have moved on to bigger and greater things."),(0,e.kt)("div",{className:"row"},(0,e.kt)(o,{name:"Haochen Li",githubUrl:"https://github.com/haochenl",mdxType:"TeamProfileCardCol"},"Detail-oriented quick thinker that keeps cool even in the most stressful situations. Now working as a Senior Bioinformatics Data Scientist at GRAIL."),(0,e.kt)(o,{name:"Julien Lajugie",githubUrl:"https://github.com/JulienLajugie",mdxType:"TeamProfileCardCol"},"Julien is a legend around these parts. When he's not taking down opponents in Taekwondo or melting riffs in his rock band, he's demolishing bugs and making the world a better place."),(0,e.kt)(o,{name:"Shuli Kang",githubUrl:"https://github.com/shulik7",mdxType:"TeamProfileCardCol"},"Oncology bioinformatician from USC before joining our team at Illumina. Now working as a Senior Bioinformatics Scientist at Novartis Gene Therapies."),(0,e.kt)(o,{name:"Yu Jiang",githubUrl:"https://github.com/yujiang02",mdxType:"TeamProfileCardCol"},"Biostatistics genius from Duke University before joining our team at Illumina. Now working as a Research Engineer at Facebook AI Research.")))}y.isMDXComponent=!0},31958:(M,L,t)=>{t.d(L,{Z:()=>i});const i=""},92425:(M,L,t)=>{t.d(L,{Z:()=>i});const i=t.p+"assets/images/SupplementaryAnnotations-d43d3f1c837f9b80fab530432e0e4b1d.svg"},84770:(M,L,t)=>{t.d(L,{Z:()=>i});const i=t.p+"assets/images/TranscriptConsequences-60ca1c43a36dacf896fecdabf09ce02c.svg"}}]); \ No newline at end of file diff --git a/assets/js/80bccc38.2d3db8d2.js b/assets/js/80bccc38.2d3db8d2.js deleted file mode 100644 index fe49fe2b..00000000 --- a/assets/js/80bccc38.2d3db8d2.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1921,6882],{3905:(t,e,a)=>{a.d(e,{Zo:()=>d,kt:()=>g});var n=a(67294);function r(t,e,a){return e in t?Object.defineProperty(t,e,{value:a,enumerable:!0,configurable:!0,writable:!0}):t[e]=a,t}function l(t,e){var a=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),a.push.apply(a,n)}return a}function i(t){for(var e=1;e=0||(r[a]=t[a]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,a)&&(r[a]=t[a])}return r}var p=n.createContext({}),m=function(t){var e=n.useContext(p),a=e;return t&&(a="function"==typeof t?t(e):i(i({},e),t)),a},d=function(t){var e=m(t.components);return n.createElement(p.Provider,{value:e},t.children)},s="mdxType",c={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},N=n.forwardRef((function(t,e){var a=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,d=o(t,["components","mdxType","originalType","parentName"]),s=m(a),N=r,g=s["".concat(p,".").concat(N)]||s[N]||c[N]||l;return a?n.createElement(g,i(i({ref:e},d),{},{components:a})):n.createElement(g,i({ref:e},d))}));function g(t,e){var a=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=a.length,i=new Array(l);i[0]=N;var o={};for(var p in e)hasOwnProperty.call(e,p)&&(o[p]=e[p]);o.originalType=t,o[s]="string"==typeof t?t:r,i[1]=o;for(var m=2;m{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/fusioncatcher-json",id:"version-3.18/data-sources/fusioncatcher-json",title:"fusioncatcher-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/fusioncatcher-json.md",sourceDirName:"data-sources",slug:"/data-sources/fusioncatcher-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/fusioncatcher-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/fusioncatcher-json.md",tags:[],version:"3.18",frontMatter:{}},p=[{value:"genes",id:"genes",children:[],level:4},{value:"gene",id:"gene",children:[],level:4}],m={toc:p},d="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(d,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},' "fusionCatcher":[\n {\n "genes":{\n "first":{\n "hgnc":"ETV6",\n "isOncogene":true\n },\n "second":{\n "hgnc":"RUNX1"\n },\n "isParalogPair":true,\n "isPseudogenePair":true,\n "isReadthrough":true\n },\n "germlineSources":[\n "1000 Genomes Project"\n ],\n "somaticSources":[\n "COSMIC",\n "TCGA oesophageal carcinomas"\n ]\n }\n ]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genes"),(0,r.kt)("td",{parentName:"tr",align:"center"},"genes object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"5' gene & 3' gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"germlineSources"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"matches in known germline data sources")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"somaticSources"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"matches in known somatic data sources")))),(0,r.kt)("h4",{id:"genes"},"genes"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"first"),(0,r.kt)("td",{parentName:"tr",align:"center"},"gene object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"5' gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"second"),(0,r.kt)("td",{parentName:"tr",align:"center"},"gene object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"3' gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isParalogPair"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when both genes are paralogs for each other")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isPseudogenePair"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when both genes are pseudogenes for each other")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isReadthrough"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this fusion gene is a readthrough event (both are on the same strand and there are no genes between them)")))),(0,r.kt)("h4",{id:"gene"},"gene"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isOncogene"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this gene is an oncogene")))))}s.isMDXComponent=!0},36598:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>o,default:()=>c,frontMatter:()=>i,metadata:()=>p,toc:()=>m});var n=a(87462),r=(a(67294),a(3905)),l=a(43872);const i={title:"FusionCatcher"},o=void 0,p={unversionedId:"data-sources/fusioncatcher",id:"version-3.18/data-sources/fusioncatcher",title:"FusionCatcher",description:"Overview",source:"@site/versioned_docs/version-3.18/data-sources/fusioncatcher.mdx",sourceDirName:"data-sources",slug:"/data-sources/fusioncatcher",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/fusioncatcher",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/fusioncatcher.mdx",tags:[],version:"3.18",frontMatter:{title:"FusionCatcher"},sidebar:"docs",previous:{title:"DECIPHER",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/decipher"},next:{title:"GERP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gerp"}},m=[{value:"Overview",id:"overview",children:[],level:2},{value:"Supported Data Sources",id:"supported-data-sources",children:[{value:"Oncogenes",id:"oncogenes",children:[],level:3},{value:"Germline",id:"germline",children:[],level:3},{value:"Somatic",id:"somatic",children:[],level:3}],level:2},{value:"Gene Pair TSV File",id:"gene-pair-tsv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Gene TSV File",id:"gene-tsv-file",children:[{value:"Example",id:"example-1",children:[],level:3},{value:"Parsing",id:"parsing-1",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],d={toc:m},s="wrapper";function c(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://github.com/ndaniel/fusioncatcher"},"FusionCatcher")," is a well-known tool that searches for somatic novel/known fusion genes, translocations, and/or chimeras in RNA-seq data. While FusionCatcher itself is not part of Nirvana, we have included a subset of their genomic databases in Nirvana."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Daniel Nicorici, Mihaela \u015eatalan, Henrik Edgren, Sara Kangaspeska, Astrid Murum\xe4gi, Olli Kallioniemi, Sami Virtanen, Olavi Kilkku. (2014) ",(0,r.kt)("a",{parentName:"p",href:"https://www.biorxiv.org/content/10.1101/011650v1"},"FusionCatcher \u2013 a tool for finding somatic fusion genes in paired-end RNA-sequencing data"),". ",(0,r.kt)("em",{parentName:"p"},"bioRxiv")," 011650"))),(0,r.kt)("h2",{id:"supported-data-sources"},"Supported Data Sources"),(0,r.kt)("h3",{id:"oncogenes"},"Oncogenes"),(0,r.kt)("p",null,"The following data sources are aggregated and used to populate the ",(0,r.kt)("inlineCode",{parentName:"p"},"isOncogene")," field in the gene JSON object:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Description"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Reference"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Data"),(0,r.kt)("th",{parentName:"tr",align:"left"},"FusionCatcher filename"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Bushman"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://www.bushmanlab.org/links/genelists"},"bushmanlab.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"cancer_genes.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ONGENE"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.sciencedirect.com/science/article/pii/S1673852716302053"},"JGG")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://ongene.bioinfo-minzhao.org"},"bioinfo-minzhao.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"oncogenes_more.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"UniProt tumor genes"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/49/D1/D480/6006196"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.uniprot.org/downloads"},"uniprot.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tumor_genes.txt")))),(0,r.kt)("h3",{id:"germline"},"Germline"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Nirvana label"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Reference"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Data"),(0,r.kt)("th",{parentName:"tr",align:"left"},"FusionCatcher filename"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"1000 Genomes Project"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0104567"},"PLOS ONE")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"1000genomes.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Healthy (strong support)"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"banned.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Illumina Body Map 2.0"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.ebi.ac.uk/arrayexpress/experiments/E-MTAB-513"},"EBI")),(0,r.kt)("td",{parentName:"tr",align:"left"},"bodymap2.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"CACG"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.sciencedirect.com/science/article/pii/S0888754312000821"},"Genomics")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"cacg.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ConjoinG"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0013284"},"PLOS ONE")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"conjoing.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Healthy prefrontal cortex"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://bmcmedgenomics.biomedcentral.com/articles/10.1186/s12920-016-0164-y"},"BMC Medical Genomics")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE68719"},"NCBI GEO")),(0,r.kt)("td",{parentName:"tr",align:"left"},"cortex.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Duplicated Genes Database"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0050653"},"PLOS ONE")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://dgd.genouest.org/"},"genouest.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"dgd.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"GTEx healthy tissues"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://gtexportal.org/home/"},"gtexportal.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"gtex.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Healthy"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"healthy.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Human Protein Atlas"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.mcponline.org/article/S1535-9476(20)34633-8/fulltext"},"MCP")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.ebi.ac.uk/arrayexpress/experiments/E-MTAB-1733/"},"EBI")),(0,r.kt)("td",{parentName:"tr",align:"left"},"hpa.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Babiceanu non-cancer tissues"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/44/6/2859/2499453"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/44/6/2859/2499453#supplementary-data"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-cancer_tissues.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"non-tumor cell lines"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-tumor_cells.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TumorFusions normal"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/46/D1/D1144/4584571"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/46/D1/D1144/4584571#supplementary-data"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga-normal.txt")))),(0,r.kt)("h3",{id:"somatic"},"Somatic"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Nirvana label"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Reference"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Data"),(0,r.kt)("th",{parentName:"tr",align:"left"},"FusionCatcher filename"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Alaei-Mahabadi 18 cancers"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.pnas.org/content/113/48/13768.long"},"PNAS")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"18cancers.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"DepMap CCLE"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://depmap.org/portal/download/"},"depmap.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"ccle.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"CCLE Klijn"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nbt.3080"},"Nature Biotechnology")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nbt.3080#Sec27"},"Nature Biotechnology")),(0,r.kt)("td",{parentName:"tr",align:"left"},"ccle2.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"CCLE Vellichirammal"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cell.com/molecular-therapy-family/nucleic-acids/fulltext/S2162-2531(20)30058-5"},"Molecular Therapy Nucleic Acids")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"ccle3.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Cancer Genome Project"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://cancer.sanger.ac.uk/cosmic/download"},"COSMIC")),(0,r.kt)("td",{parentName:"tr",align:"left"},"cgp.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ChimerKB 4.0"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/48/D1/D817/5611671"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.kobic.re.kr/chimerdb_mirror/download"},"kobic.re.kr")),(0,r.kt)("td",{parentName:"tr",align:"left"},"chimerdb4kb.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ChimerPub 4.0"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/48/D1/D817/5611671"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.kobic.re.kr/chimerdb_mirror/download"},"kobic.re.kr")),(0,r.kt)("td",{parentName:"tr",align:"left"},"chimerdb4pub.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ChimerSeq 4.0"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/48/D1/D817/5611671"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.kobic.re.kr/chimerdb_mirror/download"},"kobic.re.kr")),(0,r.kt)("td",{parentName:"tr",align:"left"},"chimerdb4seq.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"COSMIC"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/47/D1/D941/5146192"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://cancer.sanger.ac.uk/cosmic/download"},"COSMIC")),(0,r.kt)("td",{parentName:"tr",align:"left"},"cosmic.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Bao gliomas"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://genome.cshlp.org/content/24/11/1765"},"Genome Research")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"gliomas.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Known"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"known.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Mitelman DB"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://mitelmandatabase.isb-cgc.org"},"ISB-CGC")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://storage.cloud.google.com/mitelman-data-files/prod/mitelman_db.zip"},"Google Cloud")),(0,r.kt)("td",{parentName:"tr",align:"left"},"mitelman.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TCGA oesophageal carcinomas"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nature20805"},"Nature")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"oesophagus.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Bailey pancreatic cancers"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nature16965"},"Nature")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nature16965#Sec44"},"Nature")),(0,r.kt)("td",{parentName:"tr",align:"left"},"pancreases.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"PCAWG"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://doi.org/10.1016/j.cell.2018.03.042"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://dcc.icgc.org/releases/PCAWG/transcriptome/fusion"},"ICGC")),(0,r.kt)("td",{parentName:"tr",align:"left"},"pcawg.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Robinson prostate cancers"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://doi.org/10.1016/j.cell.2015.05.001"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cell.com/cell/fulltext/S0092-8674(15)00548-6?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS0092867415005486%3Fshowall%3Dtrue#supplementaryMaterial"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},"prostate_cancer.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TCGA"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cancer.gov/about-nci/organization/ccg/research/structural-genomics/tcga"},"cancer.gov")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TumorFusions tumor"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/46/D1/D1144/4584571"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/46/D1/D1144/4584571#supplementary-data"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga-cancer.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TCGA Gao"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://doi.org/10.1016/j.celrep.2018.03.050"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cell.com/cell-reports/fulltext/S2211-1247(18)30395-4?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS2211124718303954%3Fshowall%3Dtrue#supplementaryMaterial"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga2.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TCGA Vellichirammal"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cell.com/molecular-therapy-family/nucleic-acids/fulltext/S2162-2531(20)30058-5"},"Molecular Therapy Nucleic Acids")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga3.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TICdb"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://bmcgenomics.biomedcentral.com/articles/10.1186/1471-2164-8-33"},"BMC Genomics")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://genetica.unav.edu/TICdb/allseqs_TICdb.txt"},"unav.edu")),(0,r.kt)("td",{parentName:"tr",align:"left"},"ticdb.txt")))),(0,r.kt)("h2",{id:"gene-pair-tsv-file"},"Gene Pair TSV File"),(0,r.kt)("p",null,"Most of the data files in FusionCatcher are two-column TSV files containing the Ensembl gene IDs that are paired together."),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("p",null,"Here are the first few lines of the 1000genomes.txt file:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre"},"ENSG00000006210 ENSG00000102962\nENSG00000006652 ENSG00000181016\nENSG00000014138 ENSG00000149798\nENSG00000026297 ENSG00000071242\nENSG00000035499 ENSG00000155959\nENSG00000055211 ENSG00000131013\nENSG00000055332 ENSG00000179915\nENSG00000062485 ENSG00000257727\nENSG00000065978 ENSG00000166501\nENSG00000066044 ENSG00000104980\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"In Nirvana, we will only import a gene pair if both Ensembl gene IDs are recognized from either our GRCh37 or GRCh38 cache files."),(0,r.kt)("h2",{id:"gene-tsv-file"},"Gene TSV File"),(0,r.kt)("p",null,"Some of the data files are single-column files containing Ensembl gene IDs. This is commonly used in the data files representing oncogene data sources."),(0,r.kt)("h3",{id:"example-1"},"Example"),(0,r.kt)("p",null,"Here are the first few lines of the oncogenes_more.txt file:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre"},"ENSG00000000938\nENSG00000003402\nENSG00000005469\nENSG00000005884\nENSG00000006128\nENSG00000006453\nENSG00000006468\nENSG00000007350\nENSG00000008294\nENSG00000008952\n")),(0,r.kt)("h3",{id:"parsing-1"},"Parsing"),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"FusionCatcher also uses creates custom Ensembl genes (e.g. ",(0,r.kt)("inlineCode",{parentName:"p"},"ENSG09000000002"),") to handle missing Ensembl genes. Nirvana will ignore these entries since we only include the gene IDs that are currently recognized by Nirvana."),(0,r.kt)("p",{parentName:"div"},"I suspect that these were originally RefSeq genes and if so, we can support those directly in Nirvana in the future."))),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://sourceforge.net/projects/fusioncatcher/files/data"},"https://sourceforge.net/projects/fusioncatcher/files/data")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(l.default,{mdxType:"JSON"}))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/82222263.c45cd9d1.js b/assets/js/82222263.c45cd9d1.js deleted file mode 100644 index 9c707672..00000000 --- a/assets/js/82222263.c45cd9d1.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3177,104],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>v});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),p=d(n),u=r,v=p["".concat(s,".").concat(u)]||p[u]||m[u]||o;return n?a.createElement(v,i(i({ref:t},c),{},{components:n})):a.createElement(v,i({ref:t},c))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[p]="string"==typeof e?e:r,i[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>p,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/topmed-json",id:"version-3.16/data-sources/topmed-json",title:"topmed-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/topmed-json.md",sourceDirName:"data-sources",slug:"/data-sources/topmed-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/topmed-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/topmed-json.md",tags:[],version:"3.16",frontMatter:{}},s=[],d={toc:s},c="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"topmed":{ \n "allAc":20,\n "allAn":125568,\n "allAf":0.000159,\n "allHc":0,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele number. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele frequency (computed by Nirvana)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed homozygous count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}p.isMDXComponent=!0},10539:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>i,metadata:()=>s,toc:()=>d});var a=n(87462),r=(n(67294),n(3905)),o=n(42472);const i={title:"TOPMed"},l=void 0,s={unversionedId:"data-sources/topmed",id:"version-3.16/data-sources/topmed",title:"TOPMed",description:"Overview",source:"@site/versioned_docs/version-3.16/data-sources/topmed.mdx",sourceDirName:"data-sources",slug:"/data-sources/topmed",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/topmed",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/topmed.mdx",tags:[],version:"3.16",frontMatter:{title:"TOPMed"},sidebar:"version-3.16/docs",previous:{title:"Splice AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/splice-ai"},next:{title:"Nirvana JSON File Format",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/file-formats/nirvana-json-file-format"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"VCF extraction",id:"vcf-extraction",children:[],level:2},{value:"GRCh37 liftover",id:"grch37-liftover",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON output",id:"json-output",children:[],level:2}],c={toc:d},p="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"The ",(0,r.kt)("a",{parentName:"p",href:"https://www.nhlbi.nih.gov/science/trans-omics-precision-medicine-topmed-program"},"Trans-Omics for Precision Medicine")," (TOPMed) program, sponsored by the National Institutes of Health (NIH) National Heart, Lung and Blood Institute (NHLBI), is part of a broader Precision Medicine Initiative, which aims to provide disease treatments tailored to an individual\u2019s unique genes and environment. TOPMed contributes to this Initiative through the integration of whole-genome sequencing (WGS) and other omics (e.g., metabolic profiles, epigenomics, protein and RNA expression patterns) data with molecular, behavioral, imaging, environmental, and clinical data."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Kowalski, M.H., Qian, H., Hou, Z., Rosen, J.D., Tapia, A.L., Shan, Y., Jain, D., Argos, M., Arnett, D.K., Avery, C. and Barnes, K.C., 2019. Use of> 100,000 NHLBI Trans-Omics for Precision Medicine (TOPMed) Consortium whole genome sequences improves imputation quality and detection of rare variant associations in admixed African and Hispanic/Latino populations. ",(0,r.kt)("em",{parentName:"p"},"PLoS genetics"),", ",(0,r.kt)("strong",{parentName:"p"},"15(12)"),", p.e1008500."))),(0,r.kt)("h2",{id:"vcf-extraction"},"VCF extraction"),(0,r.kt)("p",null,"We currently extract the following fields from TOPMed VCF file:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},'##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n')),(0,r.kt)("p",null,"Example:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 10132 TOPMed_freeze_5?chr1:10,132 T C 255 SVM VRT=1;NS=62784;AN=125568;AC=32;AF=0.000254842;Het=32;Hom=0 NA:FRQ 125568:0.000254842\n")),(0,r.kt)("h2",{id:"grch37-liftover"},"GRCh37 liftover"),(0,r.kt)("p",null,"The data is not available for GRCh37 on TOPMed website. We performed a liftover from GRCh38 to GRCh37 using dbSNP ids."),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://bravo.sph.umich.edu/freeze5/hg38/download"},"https://bravo.sph.umich.edu/freeze5/hg38/download")),(0,r.kt)("h2",{id:"json-output"},"JSON output"),(0,r.kt)(o.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/826b8b6c.7ec0cae5.js b/assets/js/826b8b6c.7ec0cae5.js deleted file mode 100644 index 024abca3..00000000 --- a/assets/js/826b8b6c.7ec0cae5.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8102,2883,7751],{3905:(e,t,n)=>{n.d(t,{Zo:()=>u,kt:()=>N});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var p=a.createContext({}),s=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},u=function(e){var t=s(e.components);return a.createElement(p.Provider,{value:t},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},c=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,l=e.originalType,p=e.parentName,u=o(e,["components","mdxType","originalType","parentName"]),m=s(n),c=r,N=m["".concat(p,".").concat(c)]||m[c]||d[c]||l;return n?a.createElement(N,i(i({ref:t},u),{},{components:n})):a.createElement(N,i({ref:t},u))}));function N(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=n.length,i=new Array(l);i[0]=c;var o={};for(var p in t)hasOwnProperty.call(t,p)&&(o[p]=t[p]);o.originalType=e,o[m]="string"==typeof e?e:r,i[1]=o;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},i=void 0,o={unversionedId:"data-sources/1000Genomes-snv-json",id:"version-3.14/data-sources/1000Genomes-snv-json",title:"1000Genomes-snv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/1000Genomes-snv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-snv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/1000Genomes-snv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/1000Genomes-snv-json.md",tags:[],version:"3.14",frontMatter:{}},p=[],s={toc:p},u="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(u,(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":{\n "allAf":0.200879,\n "afrAf":0.210287,\n "amrAf":0.139769,\n "easAf":0.275794,\n "eurAf":0.181909,\n "sasAf":0.173824,\n "allAn":5008,\n "afrAn":1322,\n "amrAn":694,\n "easAn":1008,\n "eurAn":1006,\n "sasAn":978,\n "allAc":1006,\n "afrAc":278,\n "amrAc":97,\n "easAc":278,\n "eurAc":183,\n "sasAc":170\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the African super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the African super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the Ad Mixed American super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the Ad Mixed American super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the East Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the East Asian super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the European super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the European super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the South Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the South Asian super population. Non-zero integer.")))))}m.isMDXComponent=!0},97118:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},i=void 0,o={unversionedId:"data-sources/1000Genomes-sv-json",id:"version-3.14/data-sources/1000Genomes-sv-json",title:"1000Genomes-sv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/1000Genomes-sv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-sv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/1000Genomes-sv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/1000Genomes-sv-json.md",tags:[],version:"3.14",frontMatter:{}},p=[],s={toc:p},u="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(u,(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":[\n {\n "chromosome":"1",\n "begin":1595369,\n "end":1612441,\n "variantType": "copy_number_variation",\n "id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",\n "allAn": 5008,\n "allAc": 2702,\n "allAf": 0.539537,\n "afrAf": 0.6052,\n "amrAf": 0.3675,\n "eurAf": 0.5357,\n "easAf": 0.5368,\n "sasAf": 0.5797,\n "reciprocalOverlap": 0.07555\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"range: 0 - 1.")))))}m.isMDXComponent=!0},1621:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>p,default:()=>c,frontMatter:()=>o,metadata:()=>s,toc:()=>u});var a=n(87462),r=(n(67294),n(3905)),l=n(49082),i=n(97118);const o={title:"1000 Genomes"},p=void 0,s={unversionedId:"data-sources/1000Genomes",id:"version-3.14/data-sources/1000Genomes",title:"1000 Genomes",description:"Overview",source:"@site/versioned_docs/version-3.14/data-sources/1000Genomes.mdx",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/1000Genomes",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/1000Genomes.mdx",tags:[],version:"3.14",frontMatter:{title:"1000 Genomes"},sidebar:"version-3.14/docs",previous:{title:"Annotating COVID-19",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/introduction/covid19"},next:{title:"ClinVar",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/clinvar"}},u=[{value:"Overview",id:"overview",children:[],level:2},{value:"Populations",id:"populations",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF File Parsing",id:"vcf-file-parsing",children:[{value:"Conflict Resolution",id:"conflict-resolution",children:[],level:4}],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2},{value:"Structural Variants",id:"structural-variants",children:[{value:"VCF File Parsing",id:"vcf-file-parsing-1",children:[],level:3},{value:"Converting VCF svTypes to SO sequence alterations",id:"converting-vcf-svtypes-to-so-sequence-alterations",children:[{value:"Exceptions",id:"exceptions",children:[],level:4}],level:3}],level:2},{value:"JSON Output",id:"json-output-1",children:[],level:2}],m={toc:u},d="wrapper";function c(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"The goal of the 1000 Genomes Project was to find most genetic variants with frequencies of at least 1% in the populations studied. It was the first project to sequence the genomes of a large number of people, to provide a comprehensive resource on human genetic variation. Data from the 1000 Genomes Project was quickly made available to the worldwide scientific community through freely accessible public databases."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Sudmant, P., Rausch, T., Gardner, E. et al. An integrated map of structural variation in 2,504 human genomes. ",(0,r.kt)("em",{parentName:"p"},"Nature 526"),", 75\u201381 (2015). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1038/nature15394"},"https://doi.org/10.1038/nature15394")))),(0,r.kt)("h2",{id:"populations"},"Populations"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"The super population membership can be found here: (",(0,r.kt)("a",{parentName:"li",href:"http://www.1000genomes.org/category/population/"},"http://www.1000genomes.org/category/population/"),")"),(0,r.kt)("li",{parentName:"ul"},"We want to capture the allele frequencies for all 26 populations as well as the 5 super populations and the total population.")),(0,r.kt)("h2",{id:"small-variants"},"Small Variants"),(0,r.kt)("h3",{id:"vcf-file-parsing"},"VCF File Parsing"),(0,r.kt)("p",null,"The original VCF files come with allele frequency fields (e.g. ALL_AF, AMR_AF) but we recompute them using allele counts and allele numbers in order to get 6 digit precision. The allele counts and allele numbers (e.g. AMR_AC, AMR_AN) are not expressed in the INFO field. Instead the genotypes need to be parsed to compute that information. Our team converted the original data to VCF entries with allele counts and allele numbers like the following."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 15274 rs62636497 A G,T 100 PASS AC=1739,3210;AF=0.347244,0.640974;AN=5008;NS=2504;DP=23255;EAS_AF=0.4812,0.5188;AMR_AF=0.2752,0.7205;AFR_AF=0.323,0.6369;EUR_AF=0.2922,0.7078;SAS_AF=0.3497,0.6472;AA=g|||;VT=SNP;MULTI_ALLELIC;EAS_AN=1008;EAS_AC=485,523;EUR_AN=1006;EUR_AC=294,712;AFR_AN=1322;AFR_AC=427,842;AMR_AN=694;AMR_AC=191,500;SAS_AN=978;SAS_AC=342,633\n")),(0,r.kt)("p",null,"The ancestral allele, if it exists, is the first value in the pipe separated AA fields (the Indel specific REF, ALT, IndelType fields are ignored)."),(0,r.kt)("p",null,"We parse the VCF file and extract the following fields from INFO:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"AA"),(0,r.kt)("li",{parentName:"ul"},"AC"),(0,r.kt)("li",{parentName:"ul"},"AN"),(0,r.kt)("li",{parentName:"ul"},"EAS_AN"),(0,r.kt)("li",{parentName:"ul"},"AMR_AN"),(0,r.kt)("li",{parentName:"ul"},"AFR_AN"),(0,r.kt)("li",{parentName:"ul"},"EUR_AN"),(0,r.kt)("li",{parentName:"ul"},"SAS_AN"),(0,r.kt)("li",{parentName:"ul"},"EAS_AC"),(0,r.kt)("li",{parentName:"ul"},"AMR_AC"),(0,r.kt)("li",{parentName:"ul"},"AFR_AC"),(0,r.kt)("li",{parentName:"ul"},"EUR_AC"),(0,r.kt)("li",{parentName:"ul"},"SAS_AC")),(0,r.kt)("h4",{id:"conflict-resolution"},"Conflict Resolution"),(0,r.kt)("p",null,"We have observed conflicting allele frequency information in the source. Take the following example:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 20505705 rs35377696 C CTCTG,CTG,CTGTG 100 PASS AC=46,1513,152;AF=0.0091853,0.302117,0.0303514;\n1 20505705 rs35377696 C CTG 100 PASS AC=4;AF=0.000798722;\n")),(0,r.kt)("p",null,"That is, the variant 1-20505705-C-CTG has conflicting entries. To get an idea of how frequently we observe this, here is a table summarizing ChrX and all chromosomes. Note that almost all such entries are found in ChrX."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"center"},"Chromosome"),(0,r.kt)("th",{parentName:"tr",align:"left"},"#"," of alleles"),(0,r.kt)("th",{parentName:"tr",align:"center"},"#"," of conflicting alleles"),(0,r.kt)("th",{parentName:"tr",align:"left"},"percentage"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"chrX"),(0,r.kt)("td",{parentName:"tr",align:"left"},"834800"),(0,r.kt)("td",{parentName:"tr",align:"center"},"2733"),(0,r.kt)("td",{parentName:"tr",align:"left"},"0.33%")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"Total"),(0,r.kt)("td",{parentName:"tr",align:"left"},"21413098"),(0,r.kt)("td",{parentName:"tr",align:"center"},"2743"),(0,r.kt)("td",{parentName:"tr",align:"left"},"0.013%")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Currently"),", we removed the allele frequency of the conflicting allele (i.e., insertion TG in the example) but keep allele frequencies of all other alleles in the VCF line."),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Potential Alternate Solutions")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"Remove all alleles that are contained in the vcf lines which have conflicting allele. (Recommended by 1000 genome group Holly Zheng-Bradley, 7/29/2015)"),(0,r.kt)("li",{parentName:"ul"},"Recalculate the allele frequency for the conflicting allele."),(0,r.kt)("li",{parentName:"ul"},"Pick the allele frequency that has the highest data support.")),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/"},"GRCh37"),"\n",(0,r.kt)("a",{parentName:"p",href:"http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/"},"GRCh38")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(l.default,{mdxType:"JSONSNV"}),(0,r.kt)("h2",{id:"structural-variants"},"Structural Variants"),(0,r.kt)("h3",{id:"vcf-file-parsing-1"},"VCF File Parsing"),(0,r.kt)("p",null,"The VCF files contain entries like the following:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103\n22 16050654 esv3647175;esv3647176;esv3647177;esv3647178 A ,,, 100 PASS AC=9,87,599,20;AF=0.00179712,0.0173722,0.119609,0.00399361;AN=5008;CS=DUP_gs;END=16063474;NS=2504;SVTYPE=CNV;DP=22545;EAS_AF=0.001,0.0169,0.2361,0.0099;AMR_AF=0,0.0101,0.219,0.0072;AFR_AF=0.0061,0.0363,0.0053,0;EUR_AF=0,0.007,0.0944,0.003;SAS_AF=0,0.0082,0.1094,0.002;VT=SV GT 3|0 0|0 0|0 0|0 0|0 0|0 0|4\n")),(0,r.kt)("p",null,"Please note that, CNVs are allele-specific. For example, HG00096 is effectively copy number 4, which would be a net gain on chr22."),(0,r.kt)("p",null,"1000 Genomes contains 5 types of structural variants:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"CNV"),(0,r.kt)("li",{parentName:"ul"},"DEL"),(0,r.kt)("li",{parentName:"ul"},"DUP"),(0,r.kt)("li",{parentName:"ul"},"INS"),(0,r.kt)("li",{parentName:"ul"},"INV")),(0,r.kt)("p",null,"Since data of 1000 genomes is provided in VCF format, we assume that the coordinates follow the vcf format, i.e., there is a padding base for symbolic alleles. So all the interval can be interpreted as ","[BEGIN+1, END]",".\nSimilarly, for all other variant types except insertion, END is far larger than BEGIN. The distribution of BEGIN and END for insertions is summarized below."),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Insertion issues")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"END = BEGIN for 6/165"),(0,r.kt)("li",{parentName:"ul"},"END = BEGIN+2 for 93/165"),(0,r.kt)("li",{parentName:"ul"},"END = BEGIN+3 for 11/165"),(0,r.kt)("li",{parentName:"ul"},"END = BEGIN+4 for 11/165"),(0,r.kt)("li",{parentName:"ul"},"END \u2013 BEGIN range from 5 to 1156 for others.")),(0,r.kt)("h3",{id:"converting-vcf-svtypes-to-so-sequence-alterations"},"Converting VCF svTypes to SO sequence alterations"),(0,r.kt)("p",null,"The svType will be captured in our JSON file under the ",(0,r.kt)("a",{parentName:"p",href:"http://www.sequenceontology.org/browser/current_svn/term/SO:0001059"},"sequenceAlteration")," key. Here's the translation we'll use according to svType in 1000 Genomes."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"svType"),(0,r.kt)("th",{parentName:"tr",align:null},"Alternative Alleles contain "),(0,r.kt)("th",{parentName:"tr",align:null},"sequenceAlteration"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"ALU"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"DUP"),(0,r.kt)("td",{parentName:"tr",align:null},"TRUE"),(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_gain")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"CNV"),(0,r.kt)("td",{parentName:"tr",align:null},"TRUE"),(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_gain (observed_gains >0 and observed_losses =0) ",(0,r.kt)("br",null),"copy_number_loss\xa0(observed_gains = 0 and observed_losses > 0) ",(0,r.kt)("br",null),"copy_number_variation (otherwise)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"DEL"),(0,r.kt)("td",{parentName:"tr",align:null},"TRUE"),(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_loss")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"LINE1"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"SVA"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"INV"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"inversion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"INS"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"insertion")))),(0,r.kt)("h4",{id:"exceptions"},"Exceptions"),(0,r.kt)("p",null,(0,r.kt)("em",{parentName:"p"},"We discard structural variants without END")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103\n21 9495848 esv3646347 A 100 PASS AC=1543;AF=0.308107;AN=5008;CS=L1_umary;MEINFO=LINE1,5669,6005,+;NS=2504;SVLEN=336;SVTYPE=LINE1;TSD=null;DP=20015;EAS_AF=0.3125;AMR_AF=0.2911;AFR_AF=0.3026;EUR_AF=0.2922;SAS_AF=0.3395;VT=SV GT 0|0 1|1 1|0 0|1 1|0 1|0 0|0\n")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"CNVs in chrY")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"No other types of structural variants exist in chrY"),(0,r.kt)("li",{parentName:"ul"},'Since copy number is provided in genotype field, we directly parse the copy number from "CN" field.'),(0,r.kt)("li",{parentName:"ul"},"For most CNVs in chrY, the reference copy number is 1, but the refence number for CNVs in segmental duplication sites is 2 ("," in the 2nd example). All segmental duplication calls have identifiers starting with GS_SD_M2.")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00101 HG00103 HG00105 HG00107 HG00108\nY 2888555 CNV_Y_2888555_3014661 T 100 PASS AC=1;AF=0.000817661;AN=1223;END=3014661;NS=1233;SVTYPE=CNV;AMR_AF=0.0000;AFR_AF=0.0000;EUR_AF=0.0000;SAS_AF=0.0019;EAS_AF=0.0000;VT=SV GT:CN:CNL:CNP:CNQ:GP:GQ:PL 0:1:-1000,0,-58.45:-1000,0,-61.55:99:0,-61.55:99:0,585 0:1:-296.36,0,-16.6:-300.46,0,-19.7:99:0,-19.7:99:0,166 0:1:-1000,0,-39.44:-1000,0,-42.54:99:0,-42.54:99:0,394\nY 6128381 GS_SD_M2_Y_6128381_6230094_Y_9650284_9752225 C , 100 PASS AC=4,2;AF=0.00327065,0.00163532;AN=1223;END=6230094;NS=1233;SVTYPE=CNV;AMR_AF=0.0029,0.0029;AFR_AF=0.0016,0.0016;EUR_AF=0.0000,0.0000;SAS_AF=0.0038,0.0000;EAS_AF=0.0000,0.0000;VT=SV;EX_TARGET GT:CN:CNL:CNP:CNQ:GP:GQ 0:2:-1000,-138.78,0,-38.53:-1000,-141.27,0,-41.33:99:0,-141.27,-41.33:99 0:2:-1000,-53.32,0,-17.85:-1000,-55.81,0,-20.64:99:0,-55.81,-20.64:99 0:2:-1000,-71.83,0,-32.5:-1000,-74.32,0,-35.29:99:0,-74.32,-35.29:99 0:2:-1000,-60.96,0,-20.29:-1000,-63.45,0,-23.08:99:0,-63.45,-23.08:99 0:2:-1000,-77.6,0,-31.45:-1000,-80.09,0,-34.24:99:0,-80.09,-34.24:99\n")),(0,r.kt)("h2",{id:"json-output-1"},"JSON Output"),(0,r.kt)(i.default,{mdxType:"JSONSV"}))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/82e726f2.b23177a6.js b/assets/js/82e726f2.b23177a6.js deleted file mode 100644 index 27f6cb33..00000000 --- a/assets/js/82e726f2.b23177a6.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[12],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>g});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},p=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,l=e.originalType,s=e.parentName,p=o(e,["components","mdxType","originalType","parentName"]),u=c(n),m=r,g=u["".concat(s,".").concat(m)]||u[m]||d[m]||l;return n?a.createElement(g,i(i({ref:t},p),{},{components:n})):a.createElement(g,i({ref:t},p))}));function g(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=n.length,i=new Array(l);i[0]=m;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[u]="string"==typeof e?e:r,i[1]=o;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const l={},i=void 0,o={unversionedId:"data-sources/clingen-gene-validity-json",id:"data-sources/clingen-gene-validity-json",title:"clingen-gene-validity-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/clingen-gene-validity-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-gene-validity-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-gene-validity-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clingen-gene-validity-json.md",tags:[],version:"current",frontMatter:{}},s=[],c={toc:s},p="wrapper";function u(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingenGeneValidity":[\n {\n "diseaseId":"MONDO_0007893",\n "disease":"Noonan syndrome with multiple lentigines",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n },\n {\n "diseaseId":"MONDO_0015280",\n "disease":"cardiofaciocutaneous syndrome",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingenGeneValidity"),(0,r.kt)("td",{parentName:"tr",align:null},"object"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"diseaseId"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Monarch Disease Ontology ID (MONDO)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"disease"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"disease label")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"classification"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see below for possible values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"classificationDate"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"yyyy-MM-dd")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"classification")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no reported evidence"),(0,r.kt)("li",{parentName:"ul"},"disputed"),(0,r.kt)("li",{parentName:"ul"},"limited"),(0,r.kt)("li",{parentName:"ul"},"moderate"),(0,r.kt)("li",{parentName:"ul"},"definitive"),(0,r.kt)("li",{parentName:"ul"},"strong"),(0,r.kt)("li",{parentName:"ul"},"refuted"),(0,r.kt)("li",{parentName:"ul"},"no known disease relationship")))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/82e726f2.d75f2330.js b/assets/js/82e726f2.d75f2330.js new file mode 100644 index 00000000..62b0fb40 --- /dev/null +++ b/assets/js/82e726f2.d75f2330.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[12],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>g});var a=n(7294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},p=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,l=e.originalType,s=e.parentName,p=o(e,["components","mdxType","originalType","parentName"]),u=c(n),m=r,g=u["".concat(s,".").concat(m)]||u[m]||d[m]||l;return n?a.createElement(g,i(i({ref:t},p),{},{components:n})):a.createElement(g,i({ref:t},p))}));function g(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=n.length,i=new Array(l);i[0]=m;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[u]="string"==typeof e?e:r,i[1]=o;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var a=n(7462),r=(n(7294),n(3905));const l={},i=void 0,o={unversionedId:"data-sources/clingen-gene-validity-json",id:"data-sources/clingen-gene-validity-json",title:"clingen-gene-validity-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/clingen-gene-validity-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-gene-validity-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-gene-validity-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clingen-gene-validity-json.md",tags:[],version:"current",frontMatter:{}},s=[],c={toc:s},p="wrapper";function u(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingenGeneValidity":[\n {\n "diseaseId":"MONDO_0007893",\n "disease":"Noonan syndrome with multiple lentigines",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n },\n {\n "diseaseId":"MONDO_0015280",\n "disease":"cardiofaciocutaneous syndrome",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingenGeneValidity"),(0,r.kt)("td",{parentName:"tr",align:null},"object"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"diseaseId"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Monarch Disease Ontology ID (MONDO)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"disease"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"disease label")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"classification"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see below for possible values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"classificationDate"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"yyyy-MM-dd")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"classification")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no reported evidence"),(0,r.kt)("li",{parentName:"ul"},"disputed"),(0,r.kt)("li",{parentName:"ul"},"limited"),(0,r.kt)("li",{parentName:"ul"},"moderate"),(0,r.kt)("li",{parentName:"ul"},"definitive"),(0,r.kt)("li",{parentName:"ul"},"strong"),(0,r.kt)("li",{parentName:"ul"},"refuted"),(0,r.kt)("li",{parentName:"ul"},"no known disease relationship")))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/833bd66e.2d5e9bc5.js b/assets/js/833bd66e.2d5e9bc5.js deleted file mode 100644 index e85362d5..00000000 --- a/assets/js/833bd66e.2d5e9bc5.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9082],{3905:(e,t,n)=>{n.d(t,{Zo:()=>u,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var p=r.createContext({}),c=function(e){var t=r.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},u=function(e){var t=c(e.components);return r.createElement(p.Provider,{value:t},e.children)},s="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,p=e.parentName,u=i(e,["components","mdxType","originalType","parentName"]),s=c(n),m=a,f=s["".concat(p,".").concat(m)]||s[m]||d[m]||o;return n?r.createElement(f,l(l({ref:t},u),{},{components:n})):r.createElement(f,l({ref:t},u))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=m;var i={};for(var p in t)hasOwnProperty.call(t,p)&&(i[p]=t[p]);i.originalType=e,i[s]="string"==typeof e?e:a,l[1]=i;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>s,frontMatter:()=>o,metadata:()=>i,toc:()=>p});var r=n(87462),a=(n(67294),n(3905));const o={},l=void 0,i={unversionedId:"data-sources/gnomad-lof-json",id:"data-sources/gnomad-lof-json",title:"gnomad-lof-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gnomad-lof-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-lof-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-lof-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gnomad-lof-json.md",tags:[],version:"current",frontMatter:{}},p=[],c={toc:p},u="wrapper";function s(e){let{components:t,...n}=e;return(0,a.kt)(u,(0,r.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD":{ \n "pLi":1.00e0,\n "pNull":8.94e-40,\n "pRec":1.84e-16,\n "synZ":-8.44e-2,\n "misZ":5.96e-1,\n "loeuf":1.13e0\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"pLi"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"pNull"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"pRec"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"synZ"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"misZ"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/9fc4e4f9.69915267.js b/assets/js/833bd66e.c612355b.js similarity index 68% rename from assets/js/9fc4e4f9.69915267.js rename to assets/js/833bd66e.c612355b.js index 87e17db5..40557936 100644 --- a/assets/js/9fc4e4f9.69915267.js +++ b/assets/js/833bd66e.c612355b.js @@ -1 +1 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[882],{3905:(e,t,n)=>{n.d(t,{Zo:()=>s,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var p=r.createContext({}),c=function(e){var t=r.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},s=function(e){var t=c(e.components);return r.createElement(p.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,p=e.parentName,s=i(e,["components","mdxType","originalType","parentName"]),u=c(n),m=a,f=u["".concat(p,".").concat(m)]||u[m]||d[m]||o;return n?r.createElement(f,l(l({ref:t},s),{},{components:n})):r.createElement(f,l({ref:t},s))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=m;var i={};for(var p in t)hasOwnProperty.call(t,p)&&(i[p]=t[p]);i.originalType=e,i[u]="string"==typeof e?e:a,l[1]=i;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>o,metadata:()=>i,toc:()=>p});var r=n(87462),a=(n(67294),n(3905));const o={},l=void 0,i={unversionedId:"data-sources/gnomad-lof-json",id:"version-3.16/data-sources/gnomad-lof-json",title:"gnomad-lof-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/gnomad-lof-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-lof-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/gnomad-lof-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/gnomad-lof-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],c={toc:p},s="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(s,(0,r.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD":{ \n "pLi":1.00e0,\n "pNull":8.94e-40,\n "pRec":1.84e-16,\n "synZ":-8.44e-2,\n "misZ":5.96e-1,\n "loeuf":1.13e0\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"pLi"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"pNull"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"pRec"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"synZ"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"misZ"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))))}u.isMDXComponent=!0}}]); \ No newline at end of file +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9082],{3905:(e,t,n)=>{n.d(t,{Zo:()=>u,kt:()=>f});var r=n(7294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var p=r.createContext({}),c=function(e){var t=r.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},u=function(e){var t=c(e.components);return r.createElement(p.Provider,{value:t},e.children)},s="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,p=e.parentName,u=i(e,["components","mdxType","originalType","parentName"]),s=c(n),m=a,f=s["".concat(p,".").concat(m)]||s[m]||d[m]||o;return n?r.createElement(f,l(l({ref:t},u),{},{components:n})):r.createElement(f,l({ref:t},u))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=m;var i={};for(var p in t)hasOwnProperty.call(t,p)&&(i[p]=t[p]);i.originalType=e,i[s]="string"==typeof e?e:a,l[1]=i;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>s,frontMatter:()=>o,metadata:()=>i,toc:()=>p});var r=n(7462),a=(n(7294),n(3905));const o={},l=void 0,i={unversionedId:"data-sources/gnomad-lof-json",id:"data-sources/gnomad-lof-json",title:"gnomad-lof-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gnomad-lof-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-lof-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-lof-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gnomad-lof-json.md",tags:[],version:"current",frontMatter:{}},p=[],c={toc:p},u="wrapper";function s(e){let{components:t,...n}=e;return(0,a.kt)(u,(0,r.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD":{ \n "pLi":1.00e0,\n "pNull":8.94e-40,\n "pRec":1.84e-16,\n "synZ":-8.44e-2,\n "misZ":5.96e-1,\n "loeuf":1.13e0\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"pLi"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"pNull"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"pRec"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"synZ"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"misZ"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/83570804.a15ce0f0.js b/assets/js/83570804.a15ce0f0.js deleted file mode 100644 index d822d34e..00000000 --- a/assets/js/83570804.a15ce0f0.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9351],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>g});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},d=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,d=o(e,["components","mdxType","originalType","parentName"]),p=c(n),m=r,g=p["".concat(s,".").concat(m)]||p[m]||u[m]||i;return n?a.createElement(g,l(l({ref:t},d),{},{components:n})):a.createElement(g,l({ref:t},d))}));function g(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,l=new Array(i);l[0]=m;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[p]="string"==typeof e?e:r,l[1]=o;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>p,frontMatter:()=>i,metadata:()=>o,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const i={},l=void 0,o={unversionedId:"data-sources/clingen-gene-validity-json",id:"version-3.18/data-sources/clingen-gene-validity-json",title:"clingen-gene-validity-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/clingen-gene-validity-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-gene-validity-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/clingen-gene-validity-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/clingen-gene-validity-json.md",tags:[],version:"3.18",frontMatter:{}},s=[],c={toc:s},d="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingenGeneValidity":[\n {\n "diseaseId":"MONDO_0007893",\n "disease":"Noonan syndrome with multiple lentigines",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n },\n {\n "diseaseId":"MONDO_0015280",\n "disease":"cardiofaciocutaneous syndrome",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingenGeneValidity"),(0,r.kt)("td",{parentName:"tr",align:null},"object"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"diseaseId"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Monarch Disease Ontology ID (MONDO)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"disease"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"disease label")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"classification"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see below for possible values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"classificationDate"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"yyyy-MM-dd")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"classification")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no reported evidence"),(0,r.kt)("li",{parentName:"ul"},"disputed"),(0,r.kt)("li",{parentName:"ul"},"limited"),(0,r.kt)("li",{parentName:"ul"},"moderate"),(0,r.kt)("li",{parentName:"ul"},"definitive"),(0,r.kt)("li",{parentName:"ul"},"strong"),(0,r.kt)("li",{parentName:"ul"},"refuted"),(0,r.kt)("li",{parentName:"ul"},"no known disease relationship")))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/835b62b6.be7e60a5.js b/assets/js/835b62b6.be7e60a5.js deleted file mode 100644 index e67bb523..00000000 --- a/assets/js/835b62b6.be7e60a5.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6097],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>m});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},p=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},d="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},h=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),d=c(n),h=r,m=d["".concat(s,".").concat(h)]||d[h]||u[h]||i;return n?a.createElement(m,o(o({ref:t},p),{},{components:n})):a.createElement(m,o({ref:t},p))}));function m(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=h;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[d]="string"==typeof e?e:r,o[1]=l;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const i={title:"Canonical Transcripts"},o=void 0,l={unversionedId:"core-functionality/canonical-transcripts",id:"version-3.21/core-functionality/canonical-transcripts",title:"Canonical Transcripts",description:"Overview",source:"@site/versioned_docs/version-3.21/core-functionality/canonical-transcripts.md",sourceDirName:"core-functionality",slug:"/core-functionality/canonical-transcripts",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/core-functionality/canonical-transcripts",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/core-functionality/canonical-transcripts.md",tags:[],version:"3.21",frontMatter:{title:"Canonical Transcripts"},sidebar:"docs",previous:{title:"Custom Annotations",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/file-formats/custom-annotations"},next:{title:"Gene Fusion Detection",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/core-functionality/gene-fusions"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Known Algorithms",id:"known-algorithms",children:[{value:"UCSC",id:"ucsc",children:[],level:3},{value:"Ensembl",id:"ensembl",children:[],level:3},{value:"ACMG",id:"acmg",children:[],level:3},{value:"ClinVar",id:"clinvar",children:[],level:3}],level:2},{value:"Unified Approach",id:"unified-approach",children:[],level:2}],c={toc:s},p="wrapper";function d(e){let{components:t,...i}=e;return(0,r.kt)(p,(0,a.Z)({},c,i,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"One of the more polarizing topics within annotation is the notion of canonical transcripts. Because of alternative splicing, we often have several transcripts for each gene. In the human genome, there are an average of 3.4 transcripts per gene (Tung, 2020). As scientists, we seem to have a need for identifying a representative example of a gene - even if there's no biological basis for the motivation."),(0,r.kt)("p",null,(0,r.kt)("img",{src:n(7057).Z})),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Golden Helix Blog")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"A few years ago, the guys over at Golden Helix wrote an excellent post about the pitfalls and issues surrounding the identification of canonical transcripts: ",(0,r.kt)("a",{parentName:"p",href:"https://blog.goldenhelix.com/whats-in-a-name-the-intricacies-of-identifying-variants/"},"What\u2019s in a Name: The Intricacies of Identifying Variants"),"."))),(0,r.kt)("p",null,"In Nirvana, we wanted to identify an algorithm for determining the canonical transcript and apply it consistently to all of our transcript data sources."),(0,r.kt)("h2",{id:"known-algorithms"},"Known Algorithms"),(0,r.kt)("h3",{id:"ucsc"},"UCSC"),(0,r.kt)("p",null,"UCSC publishes a list of canonical transcripts in its ",(0,r.kt)("inlineCode",{parentName:"p"},"knownCanonical")," table which is available via the ",(0,r.kt)("a",{parentName:"p",href:"https://genome.ucsc.edu/cgi-bin/hgTables"},"TableBrowser"),". Of the RefSeq data sources, it was the only one we could find that provided canonical transcripts:"),(0,r.kt)("blockquote",null,(0,r.kt)("p",{parentName:"blockquote"},"The canonical transcript is defined as either the longest CDS, if the gene has translated transcripts, or the longest cDNA.")),(0,r.kt)("p",null,"If you were to implement this and compare it with the knownCanonical table, you would see a lot of exceptions to the rule."),(0,r.kt)("h3",{id:"ensembl"},"Ensembl"),(0,r.kt)("p",null,"The ",(0,r.kt)("a",{parentName:"p",href:"http://uswest.ensembl.org/Help/Glossary"},"Ensembl glossary")," states:"),(0,r.kt)("blockquote",null,(0,r.kt)("p",{parentName:"blockquote"},"The canonical transcript is used in the gene tree analysis in Ensembl and does not necessarily reflect the most biologically relevant transcript of a gene. For human, the canonical transcript for a gene is set according to the following hierarchy:"),(0,r.kt)("ol",{parentName:"blockquote"},(0,r.kt)("li",{parentName:"ol"},"Longest CCDS translation with no stop codons."),(0,r.kt)("li",{parentName:"ol"},"If no (1), choose the longest Ensembl/Havana merged translation with no stop codons."),(0,r.kt)("li",{parentName:"ol"},"If no (2), choose the longest translation with no stop codons."),(0,r.kt)("li",{parentName:"ol"},"If no translation, choose the longest non-protein-coding transcript."))),(0,r.kt)("h3",{id:"acmg"},"ACMG"),(0,r.kt)("p",null,"From the ACMG Guidelines for the Interpretation of Sequence Variants:"),(0,r.kt)("blockquote",null,(0,r.kt)("p",{parentName:"blockquote"},"A reference transcript for each gene should be used and provided in the report when describing coding variants. The transcript should represent either the longest known transcript and/or the most clinically relevant transcript.")),(0,r.kt)("h3",{id:"clinvar"},"ClinVar"),(0,r.kt)("p",null,"From the ClinVar paper:"),(0,r.kt)("blockquote",null,(0,r.kt)("p",{parentName:"blockquote"},"When there are multiple transcripts for a gene, ClinVar selects one HGVS expression to construct a preferred name. By default, this selection is based on the first reference standard transcript identified by the RefSeqGene/LRG (Locus Reference Genomic) collaboration.")),(0,r.kt)("h2",{id:"unified-approach"},"Unified Approach"),(0,r.kt)("p",null,"Our approach is almost identical to the one Golden Helix discussed in their article:"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"If we're looking at RefSeq, only consider NM & NR transcripts as candidates for canonical transcripts."),(0,r.kt)("li",{parentName:"ol"},"Sort the transcripts in the following order:",(0,r.kt)("ol",{parentName:"li"},(0,r.kt)("li",{parentName:"ol"},(0,r.kt)("a",{parentName:"li",href:"https://www.lrg-sequence.org/"},"Locus Reference Genomic (LRG)")," entries occur before non-LRG entries"),(0,r.kt)("li",{parentName:"ol"},"Descending CDS length"),(0,r.kt)("li",{parentName:"ol"},"Descending transcript length"),(0,r.kt)("li",{parentName:"ol"},"Ascending accession number"))),(0,r.kt)("li",{parentName:"ol"},"Grab the first entry")))}d.isMDXComponent=!0},7057:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/hk1-transcripts-a5b85474d3b002553687715dbd004907.png"}}]); \ No newline at end of file diff --git a/assets/js/83cc9f41.5eec2f67.js b/assets/js/83cc9f41.5eec2f67.js deleted file mode 100644 index 0871e8ab..00000000 --- a/assets/js/83cc9f41.5eec2f67.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[380],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>h});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},d=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},u="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,d=l(e,["components","mdxType","originalType","parentName"]),u=c(n),m=i,h=u["".concat(s,".").concat(m)]||u[m]||p[m]||r;return n?a.createElement(h,o(o({ref:t},d),{},{components:n})):a.createElement(h,o({ref:t},d))}));function h(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,o=new Array(r);o[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[u]="string"==typeof e?e:i,o[1]=l;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>u,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={title:"Getting Started"},o=void 0,l={unversionedId:"introduction/getting-started",id:"version-3.14/introduction/getting-started",title:"Getting Started",description:"Nirvana is written in C# using .NET Core (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files.",source:"@site/versioned_docs/version-3.14/introduction/getting-started.md",sourceDirName:"introduction",slug:"/introduction/getting-started",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/introduction/getting-started",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/introduction/getting-started.md",tags:[],version:"3.14",frontMatter:{title:"Getting Started"},sidebar:"version-3.14/docs",previous:{title:"Dependencies",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/introduction/dependencies"},next:{title:"Annotating COVID-19",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/introduction/covid19"}},s=[{value:"Quick Start",id:"quick-start",children:[],level:2},{value:"Getting Nirvana",id:"getting-nirvana",children:[{value:"Compile from Source",id:"compile-from-source",children:[],level:3},{value:"GitHub Release Notes",id:"github-release-notes",children:[],level:3},{value:"Docker",id:"docker",children:[],level:3}],level:2},{value:"Downloading the data files",id:"downloading-the-data-files",children:[],level:2},{value:"Download a test VCF file",id:"download-a-test-vcf-file",children:[],level:2},{value:"Running Nirvana",id:"running-nirvana",children:[],level:2}],c={toc:s},d="wrapper";function u(e){let{components:t,...n}=e;return(0,i.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("p",null,"Nirvana is written in C# using ",(0,i.kt)("a",{parentName:"p",href:"https://www.microsoft.com/net/download/core"},".NET Core")," (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files."),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Nirvana currently uses .NET Core 2.1 or later. Please make sure that you have the most current runtime from the ",(0,i.kt)("a",{parentName:"p",href:"https://www.microsoft.com/net/download/core"},".NET Core downloads")," page."))),(0,i.kt)("h2",{id:"quick-start"},"Quick Start"),(0,i.kt)("p",null,"If you want to get started right away, we've created ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestNirvana.sh"},"a script")," that downloads Nirvana, compiles it, and starts annotating a test file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"curl -O https://illumina.github.io/NirvanaDocumentation/files/TestNirvana.sh\nsh ./TestNirvana.sh\n")),(0,i.kt)("p",null,"We have verified that this script works on Windows (using Git Bash or WSL), Linux, and Mac OS X."),(0,i.kt)("h2",{id:"getting-nirvana"},"Getting Nirvana"),(0,i.kt)("h3",{id:"compile-from-source"},"Compile from Source"),(0,i.kt)("p",null,"The following will grab the latest version of Nirvana from GitHub and compile it using the .NET Core compiler:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"git clone https://github.com/Illumina/Nirvana.git\ncd Nirvana\ndotnet build -c Release\n")),(0,i.kt)("h3",{id:"github-release-notes"},"GitHub Release Notes"),(0,i.kt)("p",null,"Alternatively, you can grab the latest binaries from our ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/Illumina/Nirvana/releases"},"GitHub Releases")," page:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"mkdir -p Nirvana/Data\ncd Nirvana\nunzip Nirvana-3.12.0-dotnet-2.1.0.zip\n")),(0,i.kt)("h3",{id:"docker"},"Docker"),(0,i.kt)("p",null,"You can find us on ",(0,i.kt)("a",{parentName:"p",href:"https://hub.docker.com/repository/docker/annotation/nirvana"},"Docker Hub")," under ",(0,i.kt)("inlineCode",{parentName:"p"},"annotation/nirvana"),":"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"We think Docker is fantastic. However, because our data files are usually accessed through a Docker volume, there is a noticeable performance penalty when running Nirvana in Docker."))),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"mkdir -p Nirvana/Data\ncd Nirvana\ndocker pull annotation/nirvana:3.9.1\n")),(0,i.kt)("p",null,"For Docker, we have special instructions for running the Downloader:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"sudo docker run --rm -it -v Data:/scratch annotation/nirvana:3.9.1 dotnet \\\n /opt/nirvana/Downloader.dll --ga GRCh37 -o /scratch\n")),(0,i.kt)("p",null,"Similarly, we have special instructions for running Nirvana (Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.vcf.gz"},"a toy VCF")," in case you need it):"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"sudo docker run --rm -it -v Data:/scratch annotation/nirvana:3.9.1 dotnet \\\n /opt/nirvana/Nirvana.dll -c /scratch/Cache/GRCh37/Both \\\n -r /scratch/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n --sd /scratch/SupplementaryAnnotation/GRCh37 \\\n -i /scratch/HiSeq.10000.vcf.gz -o /scratch/HiSeq\n")),(0,i.kt)("h2",{id:"downloading-the-data-files"},"Downloading the data files"),(0,i.kt)("p",null,"To download the latest data sources (or update the ones that you already have), use the following command to automate the download from S3:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/Downloader.dll \\\n --ga GRCh37 \\\n -o Data\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"--ga")," argument specifies the genome assembly which can be ",(0,i.kt)("inlineCode",{parentName:"li"},"GRCh37"),", ",(0,i.kt)("inlineCode",{parentName:"li"},"GRCh38"),", or ",(0,i.kt)("inlineCode",{parentName:"li"},"both"),"."),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output directory")),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Glitches in the Matrix")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Every once in a while, the download process does not go smoothly. Perhaps the internet connection cut out or you ran out of disk space. The Downloader attempts to detect these situations by checking the file sizes at the very end. If you see that a file was marked ",(0,i.kt)("inlineCode",{parentName:"p"},"truncated"),", try fixing the root cause and running the downloader again."))),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"From time to time, you can re-run the Downloader to get the latest annotation files. It will only download the files that changed."))),(0,i.kt)("h2",{id:"download-a-test-vcf-file"},"Download a test VCF file"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.vcf.gz"},"a toy VCF file")," you can play around with:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"curl -O https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.vcf.gz\n")),(0,i.kt)("h2",{id:"running-nirvana"},"Running Nirvana"),(0,i.kt)("p",null,"Once you have downloaded the data sets, use the following command to annotate your VCF:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/Nirvana.dll \\\n -c Data/Cache/GRCh37/Both \\\n --sd Data/SupplementaryAnnotation/GRCh37 \\\n -r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n -i HiSeq.10000.vcf.gz \\\n -o HiSeq.10000\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-c")," argument specifies the cache prefix"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"--sd")," argument specifies the supplementary annotation directory"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-r")," argument specifies the compressed reference path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input VCF path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output filename prefix")),(0,i.kt)("p",null,"When running Nirvana, performance metrics are shown as it evaluates each chromosome in the input VCF file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"---------------------------------------------------------------------------\nNirvana (c) 2020 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0\n---------------------------------------------------------------------------\n\nInitialization Time Positions/s\n---------------------------------------------------------------------------\nCache 00:00:01.8\nSA Position Scan 00:00:00.7 12902\n\nReference Preload Annotation Variants/s\n---------------------------------------------------------------------------\nchr1 00:00:02.3 00:00:04.5 2176\n\nSummary Time Percent\n---------------------------------------------------------------------------\nInitialization 00:00:02.6 16.5 %\nPreload 00:00:02.3 15.2 %\nAnnotation 00:00:04.5 29.0 %\n\nTime: 00:00:14.7\n")),(0,i.kt)("p",null,"The output will be a JSON file called ",(0,i.kt)("inlineCode",{parentName:"p"},"HiSeq.10000.json.gz"),". Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.json.gz"},"the full JSON file"),"."))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/85047af6.45f9cdd5.js b/assets/js/85047af6.45f9cdd5.js new file mode 100644 index 00000000..7c217533 --- /dev/null +++ b/assets/js/85047af6.45f9cdd5.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7860],{3905:(_,E,t)=>{t.d(E,{Zo:()=>N,kt:()=>o});var e=t(7294);function A(_,E,t){return E in _?Object.defineProperty(_,E,{value:t,enumerable:!0,configurable:!0,writable:!0}):_[E]=t,_}function n(_,E){var t=Object.keys(_);if(Object.getOwnPropertySymbols){var e=Object.getOwnPropertySymbols(_);E&&(e=e.filter((function(E){return Object.getOwnPropertyDescriptor(_,E).enumerable}))),t.push.apply(t,e)}return t}function a(_){for(var E=1;E=0||(A[t]=_[t]);return A}(_,E);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(_);for(e=0;e=0||Object.prototype.propertyIsEnumerable.call(_,t)&&(A[t]=_[t])}return A}var l=e.createContext({}),M=function(_){var E=e.useContext(l),t=E;return _&&(t="function"==typeof _?_(E):a(a({},E),_)),t},N=function(_){var E=M(_.components);return e.createElement(l.Provider,{value:E},_.children)},R="mdxType",i={inlineCode:"code",wrapper:function(_){var E=_.children;return e.createElement(e.Fragment,{},E)}},F=e.forwardRef((function(_,E){var t=_.components,A=_.mdxType,n=_.originalType,l=_.parentName,N=r(_,["components","mdxType","originalType","parentName"]),R=M(t),F=A,o=R["".concat(l,".").concat(F)]||R[F]||i[F]||n;return t?e.createElement(o,a(a({ref:E},N),{},{components:t})):e.createElement(o,a({ref:E},N))}));function o(_,E){var t=arguments,A=E&&E.mdxType;if("string"==typeof _||A){var n=t.length,a=new Array(n);a[0]=F;var r={};for(var l in E)hasOwnProperty.call(E,l)&&(r[l]=E[l]);r.originalType=_,r[R]="string"==typeof _?_:A,a[1]=r;for(var M=2;M{t.r(E),t.d(E,{contentTitle:()=>a,default:()=>R,frontMatter:()=>n,metadata:()=>r,toc:()=>l});var e=t(7462),A=(t(7294),t(3905));const n={},a=void 0,r={unversionedId:"data-sources/gnomad-structural-variants-data_description",id:"data-sources/gnomad-structural-variants-data_description",title:"gnomad-structural-variants-data_description",description:"Bed Example",source:"@site/docs/data-sources/gnomad-structural-variants-data_description.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-structural-variants-data_description",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-structural-variants-data_description",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gnomad-structural-variants-data_description.md",tags:[],version:"current",frontMatter:{}},l=[{value:"Bed Example",id:"bed-example",children:[],level:4},{value:"TSV Example",id:"tsv-example",children:[],level:4},{value:"Structural Variant Type Mapping",id:"structural-variant-type-mapping",children:[],level:4}],M={toc:l},N="wrapper";function R(_){let{components:E,...t}=_;return(0,A.kt)(N,(0,e.Z)({},M,t,{components:E,mdxType:"MDXLayout"}),(0,A.kt)("h4",{id:"bed-example"},"Bed Example"),(0,A.kt)("p",null,"The bed file was obtained from original source for GRCh37"),(0,A.kt)("pre",null,(0,A.kt)("code",{parentName:"pre",className:"language-scss"},"#chrom start end name svtype ALGORITHMS BOTHSIDES_SUPPORT CHR2 CPX_INTERVALS CPX_TYPE END2 ENDEVIDENCE HIGH_SR_BACKGROUND PCRPLUS_DEPLETED PESR_GT_OVERDISPERSION POS2 PROTEIN_CODING__COPY_GAIN PROTEIN_CODING__DUP_LOF PROTEIN_CODING__DUP_PARTIAL PROTEIN_CODING__INTERGENIC PROTEIN_CODING__INTRONIC PROTEIN_CODING__INV_SPAN PROTEIN_CODING__LOF PROTEIN_CODING__MSV_EXON_OVR PROTEIN_CODING__NEAREST_TSS PROTEIN_CODING__PROMOTER PROTEIN_CODING__UTR SOURCE STRANDS SVLEN SVTYPE UNRESOLVED_TYPE UNSTABLE_AF_PCRPLUS VARIABLE_ACROSS_BATCHES AN AC AF N_BI_GENOS N_HOMREF N_HET N_HOMALT FREQ_HOMREF FREQ_HET FREQ_HOMALT MALE_AN MALE_AC MALE_AF MALE_N_BI_GENOS MALE_N_HOMREF MALE_N_HET MALE_N_HOMALT MALE_FREQ_HOMREF MALE_FREQ_HET MALE_FREQ_HOMALT MALE_N_HEMIREF MALE_N_HEMIALT MALE_FREQ_HEMIREF MALE_FREQ_HEMIALT PAR FEMALE_AN FEMALE_AC FEMALE_AF FEMALE_N_BI_GENOS FEMALE_N_HOMREF FEMALE_N_HET FEMALE_N_HOMALT FEMALE_FREQ_HOMREF FEMALE_FREQ_HET FEMALE_FREQ_HOMALT POPMAX_AF AFR_AN AFR_AC AFR_AF AFR_N_BI_GENOS AFR_N_HOMREF AFR_N_HET AFR_N_HOMALT AFR_FREQ_HOMREF AFR_FREQ_HEAFR_FREQ_HOMALT AFR_MALE_AN AFR_MALE_AC AFR_MALE_AF AFR_MALE_N_BI_GENOS AFR_MALE_N_HOMREF AFR_MALE_N_HET AFR_MALE_N_HOMALT AFR_MALE_FREQ_HOMREF AFR_MALE_FREQ_HET AFR_MALE_FREQ_HOMALT AFR_MALE_N_HEMIREF AFR_MALE_N_HEMIALT AFR_MALE_FREQ_HEMIREF AFR_MALE_FREQ_HEMIALT AFR_FEMALE_AN AFR_FEMALE_AC AFR_FEMALE_AF AFR_FEMALE_N_BI_GENOS AFR_FEMALE_N_HOMREF AFR_FEMALE_N_HET AFR_FEMALE_N_HOMALT AFR_FEMALE_FREQ_HOMREF AFR_FEMALE_FREQ_HET AFR_FEMALE_FREQ_HOMALT AMR_AN AMR_AC AMR_AF AMR_N_BI_GENOS AMR_N_HOMREF AMR_N_HET AMR_N_HOMALT AMR_FREQ_HOMREF AMR_FREQ_HET AMR_FREQ_HOMALT AMR_MALE_AN AMR_MALE_AC AMR_MALE_AF AMR_MALE_N_BI_GENOS AMR_MALE_N_HOMREF AMR_MALE_N_HET AMR_MALE_N_HOMALT AMR_MALE_FREQ_HOMREF AMR_MALE_FREQ_HET AMR_MALE_FREQ_HOMALT AMR_MALE_N_HEMIREF AMR_MALE_N_HEMIALT AMR_MALE_FREQ_HEMIREF AMR_MALE_FREQ_HEMIALT AMR_FEMALE_AN AMR_FEMALE_AC AMR_FEMALE_AF AMR_FEMALE_N_BI_GENOS AMR_FEMALE_N_HOMREF AMR_FEMALE_N_HET AMR_FEMALE_N_HOMALT AMR_FEMALE_FREQ_HOMREF AMR_FEMALE_FREQ_HET AMR_FEMALE_FREQ_HOMALT EAS_AN EAS_AC EAS_AF EAS_N_BI_GENOS EAS_N_HOMREF EAS_N_HET EAS_N_HOMALT EAS_FREQ_HOMREF EAS_FREQ_HET EAS_FREQ_HOMALT EAS_MALE_AN EAS_MALE_AC EAS_MALE_AF EAS_MALE_N_BI_GENOS EAS_MALE_N_HOMREF EAS_MALE_N_HET EAS_MALE_N_HOMALT EAS_MALE_FREQ_HOMREF EAS_MALE_FREQ_HET EAS_MALE_FREQ_HOMALT EAS_MALE_N_HEMIREF EAS_MALE_N_HEMIALT EAS_MALE_FREQ_HEMIREF EAS_MALE_FREQ_HEMIALT EAS_FEMALE_AN EAS_FEMALE_AC EAS_FEMALE_AF EAS_FEMALE_N_BI_GENOS EAS_FEMALE_N_HOMREF EAS_FEMALE_N_HET EAS_FEMALE_N_HOMALT EAS_FEMALE_FREQ_HOMREF EAS_FEMALE_FREQ_HET EAS_FEMALE_FREQ_HOMALT EUR_AN EUR_AC EUR_AF EUR_N_BI_GENOS EUR_N_HOMREF EUR_N_HET EUR_N_HOMALT EUR_FREQ_HOMREF EUR_FREQ_HET EUR_FREQ_HOMALT EUR_MALE_AN EUR_MALE_AC EUR_MALE_AF EUR_MALE_N_BI_GENOS EUR_MALE_N_HOMREF EUR_MALE_N_HET EUR_MALE_N_HOMALT EUR_MALE_FREQ_HOMREF EUR_MALE_FREQ_HET EUR_MALE_FREQ_HOMALT EUR_MALE_N_HEMIREF EUR_MALE_N_HEMIALT EUR_MALE_FREQ_HEMIREF EUR_MALE_FREQ_HEMIALT EUR_FEMALE_AN EUR_FEMALE_AC EUR_FEMALE_AF EUR_FEMALE_N_BI_GENOS EUR_FEMALE_N_HOMREF EUR_FEMALE_N_HET EUR_FEMALE_N_HOMALT EUR_FEMALE_FREQ_HOMREF EUR_FEMALE_FREQ_HET EUR_FEMALE_FREQ_HOMALT OTH_AN OTH_AC OTH_AF OTH_N_BI_GENOS OTH_N_HOMREF OTH_N_HET OTH_N_HOMALT OTH_FREQ_HOMREF OTH_FREQ_HET OTH_FREQ_HOMALT OTH_MALE_AN OTH_MALE_AC OTH_MALE_AF OTH_MALE_N_BI_GENOS OTH_MALE_N_HOMREF OTH_MALE_N_HET OTH_MALE_N_HOMALT OTH_MALE_FREQ_HOMREF OTH_MALE_FREQ_HET OTH_MALE_FREQ_HOMALT OTH_MALE_N_HEMIREF OTH_MALE_N_HEMIALT OTH_MALE_FREQ_HEMIREF OTH_MALE_FREQ_HEMIALT OTH_FEMALE_AN OTH_FEMALE_AC OTH_FEMALE_AF OTH_FEMALE_N_BI_GENOS OTH_FEMALE_N_HOMREF OTH_FEMALE_N_HET OTH_FEMALE_N_HOMALT OTH_FEMALE_FREQ_HOMREF OTH_FEMALE_FREQ_HET OTH_FEMALE_FREQ_HOMALT FILTER\n1 10641 10642 gnomAD-SV_v2.1_BND_1_1 BND manta False 15 NA NA 10643 10643 PE,SR False False True 10642 NA NA NA False NA NA NA NA NA NA NA NA NA -1 BND SINGLE_ENDER_-- False False 21366 145 0.006785999983549118 10683 10543 135 5 0.9868950247764587 0.012636899948120117 0.00046803298755548894 10866 69 0.00634999992325902 5433 5366 65 2 0.987667977809906 0.011963900178670883 0.000368120992789045 NA NA NA NA False 10454 76 0.007269999943673615227 5154 70 3 0.9860339760780334 0.013392000459134579 0.0005739430198445916 0.015956999734044075 93972 0.007660999894142151 4699 4629 68 2 0.9851030111312866 0.014471200294792652 0.0004256220126990229 5154 33 0.006403000093996525 2577 2544 33 0 0.9871940016746521 0.012805599719285965 0.0NA NA NA NA 4232 39 0.009216000325977802 2116 2079 35 2 0.9825140237808228 0.01654059998691082 0.0009451800142414868 1910 7 0.003664999967440963 955 949 5 1 0.9937170147895813 0.00523559981957078 0.001047119963914156 950 4 0.004211000166833401 475 472 2 1 0.9936839938163757 0.00421052984893322 0.0021052600350230932 NA NA NA NA 952 3 0.0031510000117123127 476473 3 0 0.9936969876289368 0.006302520167082548 0.0 2296 31 0.013501999899744987 1148 11131 0 0.9729970097541809 0.02700350061058998 0.0 1312 13 0.009909000247716904 656 643 13 0.9801830053329468 0.01981710083782673 0.0 NA NA NA NA 976 18 0.018442999571561813 488470 18 0 0.9631149768829346 0.03688519820570946 0.0 7574 32 0.004224999807775021 3787 37528 2 0.9920780062675476 0.007393720094114542 0.0005281229969114065 3374 17 0.005038999952375889 1681671 15 1 0.9905160069465637 0.008891520090401173 0.000592768017668277 NA NA NA NA 41815 0.003587000072002411 2091 2077 13 1 0.9933050274848938 0.006217120215296745 0.00047823999193497188 3 0.015956999734044075 94 91 3 0 0.968084990978241 0.03191490098834038 0.0 76 0.026316000148653984 38 36 2 0 0.9473680257797241 0.05263160169124603 0.0 NA NA NA NA 112 1 0.008929000236093998 56 55 1 0 0.982142984867096 0.017857100814580917 0.0UNRESOLVED \n")),(0,A.kt)("h4",{id:"tsv-example"},"TSV Example"),(0,A.kt)("p",null,"The tsv was obtained from lifted over dataset created by dbVar for GRCh38"),(0,A.kt)("pre",null,(0,A.kt)("code",{parentName:"pre",className:"language-scss"},"#variant_call_accession variant_call_id variant_call_type experiment_id sample_id sampleset_id assembly chrcontig outer_start start inner_start inner_stop stop outer_stop insertion_length variant_region_acc variant_region_id copy_number description validation zygosity origin phenotype hgvs_name placement_method placement_rank placements_per_assembly remap_alignment remap_best_within_cluster remap_coverage remap_diff_chr remap_failure_code allele_count allele_frequency allele_number\nnssv15777856 gnomAD-SV_v2.1_CNV_10_564_alt_1 copy number variation 1 1 GRCh38.p12 10 736806 738184 nsv4039284 10__782746___784124______GRCh37.p13_copy_number_variation 0 Remapped BestAvailable Single First Pass 0 1 AC=21,AFR_AC=10,AMR_AC=9,EAS_AC=0,EUR_AC=2,OTH_AC=0AF=0.038889,AFR_AF=0.044643,AMR_AF=0.03913,EAS_AF=0,EUR_AF=0.023256,OTH_AF=0 AN=540,AFR_AN=224,AMR_AN=230,EAS_AN=0,EUR_AN=86,OTH_AN=0\n")),(0,A.kt)("h4",{id:"structural-variant-type-mapping"},"Structural Variant Type Mapping"),(0,A.kt)("p",null,"The source files represented the structural variants with keys using various naming conventions.\nIn the Illumina Connected Annotations JSON output, these keys will be mapped according to the following. "),(0,A.kt)("table",null,(0,A.kt)("thead",{parentName:"table"},(0,A.kt)("tr",{parentName:"thead"},(0,A.kt)("th",{parentName:"tr",align:null},"Illumina Connected Annotations JSON SV Type Key"),(0,A.kt)("th",{parentName:"tr",align:null},"GRCh37 Source SV Type Key"),(0,A.kt)("th",{parentName:"tr",align:null},"GRCh38 Source SV Type Key"))),(0,A.kt)("tbody",{parentName:"table"},(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"copy_number_variation"),(0,A.kt)("td",{parentName:"tr",align:null}),(0,A.kt)("td",{parentName:"tr",align:null},"copy number variation")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"deletion"),(0,A.kt)("td",{parentName:"tr",align:null},"DEL, CN=0"),(0,A.kt)("td",{parentName:"tr",align:null},"deletion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"duplication"),(0,A.kt)("td",{parentName:"tr",align:null},"DUP"),(0,A.kt)("td",{parentName:"tr",align:null},"duplication")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"insertion"),(0,A.kt)("td",{parentName:"tr",align:null},"INS"),(0,A.kt)("td",{parentName:"tr",align:null},"insertion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"inversion"),(0,A.kt)("td",{parentName:"tr",align:null},"INV"),(0,A.kt)("td",{parentName:"tr",align:null},"inversion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,A.kt)("td",{parentName:"tr",align:null},"INS:ME"),(0,A.kt)("td",{parentName:"tr",align:null},"mobile element insertion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,A.kt)("td",{parentName:"tr",align:null},"INS:ME:ALU"),(0,A.kt)("td",{parentName:"tr",align:null},"alu insertion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,A.kt)("td",{parentName:"tr",align:null},"INS:ME:LINE1"),(0,A.kt)("td",{parentName:"tr",align:null},"line1 insertion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,A.kt)("td",{parentName:"tr",align:null},"INS:ME:SVA"),(0,A.kt)("td",{parentName:"tr",align:null},"sva insertion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"structural alteration"),(0,A.kt)("td",{parentName:"tr",align:null}),(0,A.kt)("td",{parentName:"tr",align:null},"sequence alteration")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"complex_structural_alteration"),(0,A.kt)("td",{parentName:"tr",align:null},"CPX"),(0,A.kt)("td",{parentName:"tr",align:null})))))}R.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/85047af6.4b36082b.js b/assets/js/85047af6.4b36082b.js deleted file mode 100644 index a5002c66..00000000 --- a/assets/js/85047af6.4b36082b.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7860],{3905:(_,E,t)=>{t.d(E,{Zo:()=>N,kt:()=>o});var e=t(67294);function A(_,E,t){return E in _?Object.defineProperty(_,E,{value:t,enumerable:!0,configurable:!0,writable:!0}):_[E]=t,_}function n(_,E){var t=Object.keys(_);if(Object.getOwnPropertySymbols){var e=Object.getOwnPropertySymbols(_);E&&(e=e.filter((function(E){return Object.getOwnPropertyDescriptor(_,E).enumerable}))),t.push.apply(t,e)}return t}function a(_){for(var E=1;E=0||(A[t]=_[t]);return A}(_,E);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(_);for(e=0;e=0||Object.prototype.propertyIsEnumerable.call(_,t)&&(A[t]=_[t])}return A}var l=e.createContext({}),M=function(_){var E=e.useContext(l),t=E;return _&&(t="function"==typeof _?_(E):a(a({},E),_)),t},N=function(_){var E=M(_.components);return e.createElement(l.Provider,{value:E},_.children)},R="mdxType",i={inlineCode:"code",wrapper:function(_){var E=_.children;return e.createElement(e.Fragment,{},E)}},F=e.forwardRef((function(_,E){var t=_.components,A=_.mdxType,n=_.originalType,l=_.parentName,N=r(_,["components","mdxType","originalType","parentName"]),R=M(t),F=A,o=R["".concat(l,".").concat(F)]||R[F]||i[F]||n;return t?e.createElement(o,a(a({ref:E},N),{},{components:t})):e.createElement(o,a({ref:E},N))}));function o(_,E){var t=arguments,A=E&&E.mdxType;if("string"==typeof _||A){var n=t.length,a=new Array(n);a[0]=F;var r={};for(var l in E)hasOwnProperty.call(E,l)&&(r[l]=E[l]);r.originalType=_,r[R]="string"==typeof _?_:A,a[1]=r;for(var M=2;M{t.r(E),t.d(E,{contentTitle:()=>a,default:()=>R,frontMatter:()=>n,metadata:()=>r,toc:()=>l});var e=t(87462),A=(t(67294),t(3905));const n={},a=void 0,r={unversionedId:"data-sources/gnomad-structural-variants-data_description",id:"data-sources/gnomad-structural-variants-data_description",title:"gnomad-structural-variants-data_description",description:"Bed Example",source:"@site/docs/data-sources/gnomad-structural-variants-data_description.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-structural-variants-data_description",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-structural-variants-data_description",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gnomad-structural-variants-data_description.md",tags:[],version:"current",frontMatter:{}},l=[{value:"Bed Example",id:"bed-example",children:[],level:4},{value:"TSV Example",id:"tsv-example",children:[],level:4},{value:"Structural Variant Type Mapping",id:"structural-variant-type-mapping",children:[],level:4}],M={toc:l},N="wrapper";function R(_){let{components:E,...t}=_;return(0,A.kt)(N,(0,e.Z)({},M,t,{components:E,mdxType:"MDXLayout"}),(0,A.kt)("h4",{id:"bed-example"},"Bed Example"),(0,A.kt)("p",null,"The bed file was obtained from original source for GRCh37"),(0,A.kt)("pre",null,(0,A.kt)("code",{parentName:"pre",className:"language-scss"},"#chrom start end name svtype ALGORITHMS BOTHSIDES_SUPPORT CHR2 CPX_INTERVALS CPX_TYPE END2 ENDEVIDENCE HIGH_SR_BACKGROUND PCRPLUS_DEPLETED PESR_GT_OVERDISPERSION POS2 PROTEIN_CODING__COPY_GAIN PROTEIN_CODING__DUP_LOF PROTEIN_CODING__DUP_PARTIAL PROTEIN_CODING__INTERGENIC PROTEIN_CODING__INTRONIC PROTEIN_CODING__INV_SPAN PROTEIN_CODING__LOF PROTEIN_CODING__MSV_EXON_OVR PROTEIN_CODING__NEAREST_TSS PROTEIN_CODING__PROMOTER PROTEIN_CODING__UTR SOURCE STRANDS SVLEN SVTYPE UNRESOLVED_TYPE UNSTABLE_AF_PCRPLUS VARIABLE_ACROSS_BATCHES AN AC AF N_BI_GENOS N_HOMREF N_HET N_HOMALT FREQ_HOMREF FREQ_HET FREQ_HOMALT MALE_AN MALE_AC MALE_AF MALE_N_BI_GENOS MALE_N_HOMREF MALE_N_HET MALE_N_HOMALT MALE_FREQ_HOMREF MALE_FREQ_HET MALE_FREQ_HOMALT MALE_N_HEMIREF MALE_N_HEMIALT MALE_FREQ_HEMIREF MALE_FREQ_HEMIALT PAR FEMALE_AN FEMALE_AC FEMALE_AF FEMALE_N_BI_GENOS FEMALE_N_HOMREF FEMALE_N_HET FEMALE_N_HOMALT FEMALE_FREQ_HOMREF FEMALE_FREQ_HET FEMALE_FREQ_HOMALT POPMAX_AF AFR_AN AFR_AC AFR_AF AFR_N_BI_GENOS AFR_N_HOMREF AFR_N_HET AFR_N_HOMALT AFR_FREQ_HOMREF AFR_FREQ_HEAFR_FREQ_HOMALT AFR_MALE_AN AFR_MALE_AC AFR_MALE_AF AFR_MALE_N_BI_GENOS AFR_MALE_N_HOMREF AFR_MALE_N_HET AFR_MALE_N_HOMALT AFR_MALE_FREQ_HOMREF AFR_MALE_FREQ_HET AFR_MALE_FREQ_HOMALT AFR_MALE_N_HEMIREF AFR_MALE_N_HEMIALT AFR_MALE_FREQ_HEMIREF AFR_MALE_FREQ_HEMIALT AFR_FEMALE_AN AFR_FEMALE_AC AFR_FEMALE_AF AFR_FEMALE_N_BI_GENOS AFR_FEMALE_N_HOMREF AFR_FEMALE_N_HET AFR_FEMALE_N_HOMALT AFR_FEMALE_FREQ_HOMREF AFR_FEMALE_FREQ_HET AFR_FEMALE_FREQ_HOMALT AMR_AN AMR_AC AMR_AF AMR_N_BI_GENOS AMR_N_HOMREF AMR_N_HET AMR_N_HOMALT AMR_FREQ_HOMREF AMR_FREQ_HET AMR_FREQ_HOMALT AMR_MALE_AN AMR_MALE_AC AMR_MALE_AF AMR_MALE_N_BI_GENOS AMR_MALE_N_HOMREF AMR_MALE_N_HET AMR_MALE_N_HOMALT AMR_MALE_FREQ_HOMREF AMR_MALE_FREQ_HET AMR_MALE_FREQ_HOMALT AMR_MALE_N_HEMIREF AMR_MALE_N_HEMIALT AMR_MALE_FREQ_HEMIREF AMR_MALE_FREQ_HEMIALT AMR_FEMALE_AN AMR_FEMALE_AC AMR_FEMALE_AF AMR_FEMALE_N_BI_GENOS AMR_FEMALE_N_HOMREF AMR_FEMALE_N_HET AMR_FEMALE_N_HOMALT AMR_FEMALE_FREQ_HOMREF AMR_FEMALE_FREQ_HET AMR_FEMALE_FREQ_HOMALT EAS_AN EAS_AC EAS_AF EAS_N_BI_GENOS EAS_N_HOMREF EAS_N_HET EAS_N_HOMALT EAS_FREQ_HOMREF EAS_FREQ_HET EAS_FREQ_HOMALT EAS_MALE_AN EAS_MALE_AC EAS_MALE_AF EAS_MALE_N_BI_GENOS EAS_MALE_N_HOMREF EAS_MALE_N_HET EAS_MALE_N_HOMALT EAS_MALE_FREQ_HOMREF EAS_MALE_FREQ_HET EAS_MALE_FREQ_HOMALT EAS_MALE_N_HEMIREF EAS_MALE_N_HEMIALT EAS_MALE_FREQ_HEMIREF EAS_MALE_FREQ_HEMIALT EAS_FEMALE_AN EAS_FEMALE_AC EAS_FEMALE_AF EAS_FEMALE_N_BI_GENOS EAS_FEMALE_N_HOMREF EAS_FEMALE_N_HET EAS_FEMALE_N_HOMALT EAS_FEMALE_FREQ_HOMREF EAS_FEMALE_FREQ_HET EAS_FEMALE_FREQ_HOMALT EUR_AN EUR_AC EUR_AF EUR_N_BI_GENOS EUR_N_HOMREF EUR_N_HET EUR_N_HOMALT EUR_FREQ_HOMREF EUR_FREQ_HET EUR_FREQ_HOMALT EUR_MALE_AN EUR_MALE_AC EUR_MALE_AF EUR_MALE_N_BI_GENOS EUR_MALE_N_HOMREF EUR_MALE_N_HET EUR_MALE_N_HOMALT EUR_MALE_FREQ_HOMREF EUR_MALE_FREQ_HET EUR_MALE_FREQ_HOMALT EUR_MALE_N_HEMIREF EUR_MALE_N_HEMIALT EUR_MALE_FREQ_HEMIREF EUR_MALE_FREQ_HEMIALT EUR_FEMALE_AN EUR_FEMALE_AC EUR_FEMALE_AF EUR_FEMALE_N_BI_GENOS EUR_FEMALE_N_HOMREF EUR_FEMALE_N_HET EUR_FEMALE_N_HOMALT EUR_FEMALE_FREQ_HOMREF EUR_FEMALE_FREQ_HET EUR_FEMALE_FREQ_HOMALT OTH_AN OTH_AC OTH_AF OTH_N_BI_GENOS OTH_N_HOMREF OTH_N_HET OTH_N_HOMALT OTH_FREQ_HOMREF OTH_FREQ_HET OTH_FREQ_HOMALT OTH_MALE_AN OTH_MALE_AC OTH_MALE_AF OTH_MALE_N_BI_GENOS OTH_MALE_N_HOMREF OTH_MALE_N_HET OTH_MALE_N_HOMALT OTH_MALE_FREQ_HOMREF OTH_MALE_FREQ_HET OTH_MALE_FREQ_HOMALT OTH_MALE_N_HEMIREF OTH_MALE_N_HEMIALT OTH_MALE_FREQ_HEMIREF OTH_MALE_FREQ_HEMIALT OTH_FEMALE_AN OTH_FEMALE_AC OTH_FEMALE_AF OTH_FEMALE_N_BI_GENOS OTH_FEMALE_N_HOMREF OTH_FEMALE_N_HET OTH_FEMALE_N_HOMALT OTH_FEMALE_FREQ_HOMREF OTH_FEMALE_FREQ_HET OTH_FEMALE_FREQ_HOMALT FILTER\n1 10641 10642 gnomAD-SV_v2.1_BND_1_1 BND manta False 15 NA NA 10643 10643 PE,SR False False True 10642 NA NA NA False NA NA NA NA NA NA NA NA NA -1 BND SINGLE_ENDER_-- False False 21366 145 0.006785999983549118 10683 10543 135 5 0.9868950247764587 0.012636899948120117 0.00046803298755548894 10866 69 0.00634999992325902 5433 5366 65 2 0.987667977809906 0.011963900178670883 0.000368120992789045 NA NA NA NA False 10454 76 0.007269999943673615227 5154 70 3 0.9860339760780334 0.013392000459134579 0.0005739430198445916 0.015956999734044075 93972 0.007660999894142151 4699 4629 68 2 0.9851030111312866 0.014471200294792652 0.0004256220126990229 5154 33 0.006403000093996525 2577 2544 33 0 0.9871940016746521 0.012805599719285965 0.0NA NA NA NA 4232 39 0.009216000325977802 2116 2079 35 2 0.9825140237808228 0.01654059998691082 0.0009451800142414868 1910 7 0.003664999967440963 955 949 5 1 0.9937170147895813 0.00523559981957078 0.001047119963914156 950 4 0.004211000166833401 475 472 2 1 0.9936839938163757 0.00421052984893322 0.0021052600350230932 NA NA NA NA 952 3 0.0031510000117123127 476473 3 0 0.9936969876289368 0.006302520167082548 0.0 2296 31 0.013501999899744987 1148 11131 0 0.9729970097541809 0.02700350061058998 0.0 1312 13 0.009909000247716904 656 643 13 0.9801830053329468 0.01981710083782673 0.0 NA NA NA NA 976 18 0.018442999571561813 488470 18 0 0.9631149768829346 0.03688519820570946 0.0 7574 32 0.004224999807775021 3787 37528 2 0.9920780062675476 0.007393720094114542 0.0005281229969114065 3374 17 0.005038999952375889 1681671 15 1 0.9905160069465637 0.008891520090401173 0.000592768017668277 NA NA NA NA 41815 0.003587000072002411 2091 2077 13 1 0.9933050274848938 0.006217120215296745 0.00047823999193497188 3 0.015956999734044075 94 91 3 0 0.968084990978241 0.03191490098834038 0.0 76 0.026316000148653984 38 36 2 0 0.9473680257797241 0.05263160169124603 0.0 NA NA NA NA 112 1 0.008929000236093998 56 55 1 0 0.982142984867096 0.017857100814580917 0.0UNRESOLVED \n")),(0,A.kt)("h4",{id:"tsv-example"},"TSV Example"),(0,A.kt)("p",null,"The tsv was obtained from lifted over dataset created by dbVar for GRCh38"),(0,A.kt)("pre",null,(0,A.kt)("code",{parentName:"pre",className:"language-scss"},"#variant_call_accession variant_call_id variant_call_type experiment_id sample_id sampleset_id assembly chrcontig outer_start start inner_start inner_stop stop outer_stop insertion_length variant_region_acc variant_region_id copy_number description validation zygosity origin phenotype hgvs_name placement_method placement_rank placements_per_assembly remap_alignment remap_best_within_cluster remap_coverage remap_diff_chr remap_failure_code allele_count allele_frequency allele_number\nnssv15777856 gnomAD-SV_v2.1_CNV_10_564_alt_1 copy number variation 1 1 GRCh38.p12 10 736806 738184 nsv4039284 10__782746___784124______GRCh37.p13_copy_number_variation 0 Remapped BestAvailable Single First Pass 0 1 AC=21,AFR_AC=10,AMR_AC=9,EAS_AC=0,EUR_AC=2,OTH_AC=0AF=0.038889,AFR_AF=0.044643,AMR_AF=0.03913,EAS_AF=0,EUR_AF=0.023256,OTH_AF=0 AN=540,AFR_AN=224,AMR_AN=230,EAS_AN=0,EUR_AN=86,OTH_AN=0\n")),(0,A.kt)("h4",{id:"structural-variant-type-mapping"},"Structural Variant Type Mapping"),(0,A.kt)("p",null,"The source files represented the structural variants with keys using various naming conventions.\nIn the Illumina Connected Annotations JSON output, these keys will be mapped according to the following. "),(0,A.kt)("table",null,(0,A.kt)("thead",{parentName:"table"},(0,A.kt)("tr",{parentName:"thead"},(0,A.kt)("th",{parentName:"tr",align:null},"Illumina Connected Annotations JSON SV Type Key"),(0,A.kt)("th",{parentName:"tr",align:null},"GRCh37 Source SV Type Key"),(0,A.kt)("th",{parentName:"tr",align:null},"GRCh38 Source SV Type Key"))),(0,A.kt)("tbody",{parentName:"table"},(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"copy_number_variation"),(0,A.kt)("td",{parentName:"tr",align:null}),(0,A.kt)("td",{parentName:"tr",align:null},"copy number variation")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"deletion"),(0,A.kt)("td",{parentName:"tr",align:null},"DEL, CN=0"),(0,A.kt)("td",{parentName:"tr",align:null},"deletion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"duplication"),(0,A.kt)("td",{parentName:"tr",align:null},"DUP"),(0,A.kt)("td",{parentName:"tr",align:null},"duplication")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"insertion"),(0,A.kt)("td",{parentName:"tr",align:null},"INS"),(0,A.kt)("td",{parentName:"tr",align:null},"insertion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"inversion"),(0,A.kt)("td",{parentName:"tr",align:null},"INV"),(0,A.kt)("td",{parentName:"tr",align:null},"inversion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,A.kt)("td",{parentName:"tr",align:null},"INS:ME"),(0,A.kt)("td",{parentName:"tr",align:null},"mobile element insertion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,A.kt)("td",{parentName:"tr",align:null},"INS:ME:ALU"),(0,A.kt)("td",{parentName:"tr",align:null},"alu insertion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,A.kt)("td",{parentName:"tr",align:null},"INS:ME:LINE1"),(0,A.kt)("td",{parentName:"tr",align:null},"line1 insertion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,A.kt)("td",{parentName:"tr",align:null},"INS:ME:SVA"),(0,A.kt)("td",{parentName:"tr",align:null},"sva insertion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"structural alteration"),(0,A.kt)("td",{parentName:"tr",align:null}),(0,A.kt)("td",{parentName:"tr",align:null},"sequence alteration")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"complex_structural_alteration"),(0,A.kt)("td",{parentName:"tr",align:null},"CPX"),(0,A.kt)("td",{parentName:"tr",align:null})))))}R.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/85065d51.f3ff6964.js b/assets/js/85065d51.f3ff6964.js deleted file mode 100644 index 51a6f778..00000000 --- a/assets/js/85065d51.f3ff6964.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2020],{3905:(t,e,n)=>{n.d(e,{Zo:()=>s,kt:()=>f});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function o(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function i(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var l=r.createContext({}),p=function(t){var e=r.useContext(l),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},s=function(t){var e=p(t.components);return r.createElement(l.Provider,{value:e},t.children)},d="mdxType",m={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},u=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,o=t.originalType,l=t.parentName,s=c(t,["components","mdxType","originalType","parentName"]),d=p(n),u=a,f=d["".concat(l,".").concat(u)]||d[u]||m[u]||o;return n?r.createElement(f,i(i({ref:e},s),{},{components:n})):r.createElement(f,i({ref:e},s))}));function f(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var o=n.length,i=new Array(o);i[0]=u;var c={};for(var l in e)hasOwnProperty.call(e,l)&&(c[l]=e[l]);c.originalType=t,c[d]="string"==typeof t?t:a,i[1]=c;for(var p=2;p{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>d,frontMatter:()=>o,metadata:()=>c,toc:()=>l});var r=n(87462),a=(n(67294),n(3905));const o={},i=void 0,c={unversionedId:"data-sources/splice-ai-json",id:"version-3.14/data-sources/splice-ai-json",title:"splice-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/splice-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/splice-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/splice-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/splice-ai-json.md",tags:[],version:"3.14",frontMatter:{}},l=[],p={toc:l},s="wrapper";function d(t){let{components:e,...n}=t;return(0,a.kt)(s,(0,r.Z)({},p,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"spliceAI":[ \n {\n "hgnc":"BLCAP",\n "acceptorGainDistance":-3,\n "acceptorGainScore":0.3,\n "donorLossDistance":7,\n "donorLossScore":0.9\n },\n { \n "hgnc":"NNAT",\n "acceptorGainDistance":-1,\n "acceptorGainScore":0.2,\n "donorGainDistance":-2,\n "donorGainScore":0.3\n }\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorGainDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorGainScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorLossDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorLossScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorGainDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorGainScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorLossDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorLossScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/85115b50.bcac4e24.js b/assets/js/85115b50.bcac4e24.js deleted file mode 100644 index 1f9b932f..00000000 --- a/assets/js/85115b50.bcac4e24.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6770,392],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>h});var o=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function a(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);t&&(o=o.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,o)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);for(o=0;o=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=o.createContext({}),p=function(e){var t=o.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=p(e.components);return o.createElement(s.Provider,{value:t},e.children)},d="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return o.createElement(o.Fragment,{},t)}},m=o.forwardRef((function(e,t){var n=e.components,r=e.mdxType,a=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),d=p(n),m=r,h=d["".concat(s,".").concat(m)]||d[m]||u[m]||a;return n?o.createElement(h,i(i({ref:t},c),{},{components:n})):o.createElement(h,i({ref:t},c))}));function h(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var a=n.length,i=new Array(a);i[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[d]="string"==typeof e?e:r,i[1]=l;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>d,frontMatter:()=>a,metadata:()=>l,toc:()=>s});var o=n(87462),r=(n(67294),n(3905));const a={},i=void 0,l={unversionedId:"data-sources/phylop-json",id:"version-3.21/data-sources/phylop-json",title:"phylop-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/phylop-json.md",sourceDirName:"data-sources",slug:"/data-sources/phylop-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/phylop-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/phylop-json.md",tags:[],version:"3.21",frontMatter:{}},s=[],p={toc:s},c="wrapper";function d(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,o.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"variants":[\n {\n "vid":"2:48010488:A",\n "chromosome":"chr2",\n "begin":48010488,\n "end":48010488,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "phylopScore":0.459\n }\n] \n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phylopScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: -14.08 to 6.424")))))}d.isMDXComponent=!0},4939:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>i,metadata:()=>s,toc:()=>p});var o=n(87462),r=(n(67294),n(3905)),a=n(77860);const i={title:"PhyloP"},l=void 0,s={unversionedId:"data-sources/phylop",id:"version-3.21/data-sources/phylop",title:"PhyloP",description:"Overview",source:"@site/versioned_docs/version-3.21/data-sources/phylop.mdx",sourceDirName:"data-sources",slug:"/data-sources/phylop",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/phylop",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/phylop.mdx",tags:[],version:"3.21",frontMatter:{title:"PhyloP"},sidebar:"docs",previous:{title:"OMIM",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/omim"},next:{title:"Primate AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/primate-ai"}},p=[{value:"Overview",id:"overview",children:[],level:2},{value:"WigFix File",id:"wigfix-file",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:p},d="wrapper";function u(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,o.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"PhyloP (phylogenetic p-values) conservation scores are obtained from the ","[PHAST package]"," (",(0,r.kt)("a",{parentName:"p",href:"http://compgen.bscb.cornell.edu/phast/"},"http://compgen.bscb.cornell.edu/phast/"),") for multiple alignments of vertebrate genomes to the human genome. For GRCh38, the multiple alignments are against 19 mammals and for GRCh37, it is against 45 vertebrate genomes."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Siepel A, Bejerano G, Pedersen JS, Hinrichs AS, Hou M, Rosenbloom K, Clawson H, Spieth J, Hillier LW, Richards S, et al. Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. ",(0,r.kt)("strong",{parentName:"p"},"Genome Res. 2005")," Aug;15(8):1034-50. (",(0,r.kt)("a",{parentName:"p",href:"http://www.genome.org/cgi/doi/10.1101/gr.3715005"},"http://www.genome.org/cgi/doi/10.1101/gr.3715005"),")"))),(0,r.kt)("h2",{id:"wigfix-file"},"WigFix File"),(0,r.kt)("p",null,"The data is provided in WigFix files which is a text file that provides conservation scores for contiguous intervals in the following format:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"fixedStep chrom=chr1 start=10918 step=1\n0.064\n0.058\n0.064\n0.058\n0.064\n0.064\nfixedStep chrom=chr1 start=34045 step=1\n0.111\n0.100\n0.111\n0.111\n0.100\n0.111\n0.111\n0.111\n0.100\n0.111\n-1.636\n")),(0,r.kt)("p",null,"We convert them to binary files with indexes for fast query. Note that these are scores for genomic positions and are reported only for SNVs."),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,"GRCh37: ",(0,r.kt)("a",{parentName:"p",href:"http://hgdownload.cse.ucsc.edu/goldenpath/hg19/phyloP46way/vertebrate/"},"http://hgdownload.cse.ucsc.edu/goldenpath/hg19/phyloP46way/vertebrate/")),(0,r.kt)("p",null,"GRCh38: ",(0,r.kt)("a",{parentName:"p",href:"http://hgdownload.cse.ucsc.edu/goldenPath/hg38/phyloP20way/"},"http://hgdownload.cse.ucsc.edu/goldenPath/hg38/phyloP20way/")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)("p",null,"Unlike other supplemetary datasources, phyloP scores are reported in the variants section."),(0,r.kt)(a.default,{mdxType:"JSON"}))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/85b7ba3d.827baf70.js b/assets/js/85b7ba3d.827baf70.js deleted file mode 100644 index c59e4f67..00000000 --- a/assets/js/85b7ba3d.827baf70.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4439,3103],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>v});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},c=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),p=d(n),u=r,v=p["".concat(s,".").concat(u)]||p[u]||m[u]||i;return n?a.createElement(v,o(o({ref:t},c),{},{components:n})):a.createElement(v,o({ref:t},c))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[p]="string"==typeof e?e:r,o[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>i,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/revel-json",id:"version-3.16/data-sources/revel-json",title:"revel-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/revel-json.md",sourceDirName:"data-sources",slug:"/data-sources/revel-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/revel-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/revel-json.md",tags:[],version:"3.16",frontMatter:{}},s=[],d={toc:s},c="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"revel":{ \n "score":0.027\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"score"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1.0")))))}p.isMDXComponent=!0},75414:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>o,metadata:()=>s,toc:()=>d});var a=n(87462),r=(n(67294),n(3905)),i=n(91945);const o={title:"REVEL"},l=void 0,s={unversionedId:"data-sources/revel",id:"version-3.16/data-sources/revel",title:"REVEL",description:"Overview",source:"@site/versioned_docs/version-3.16/data-sources/revel.mdx",sourceDirName:"data-sources",slug:"/data-sources/revel",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/revel",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/revel.mdx",tags:[],version:"3.16",frontMatter:{title:"REVEL"},sidebar:"version-3.16/docs",previous:{title:"Primate AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/primate-ai"},next:{title:"Splice AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/splice-ai"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"CSV File",id:"csv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:d},p="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"REVEL is an ensemble method for predicting the pathogenicity of missense variants based on a combination of scores from 13 individual tools: MutPred, FATHMM v2.3, VEST 3.0, PolyPhen-2, SIFT, PROVEAN, MutationAssessor, MutationTaster, LRT, GERP++, SiPhy, phyloP, and phastCons."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Ioannidis, N. M. et al. REVEL: An Ensemble Method for Predicting the Pathogenicity of Rare Missense Variants. ",(0,r.kt)("em",{parentName:"p"},"The American Journal of Human Genetics")," ",(0,r.kt)("strong",{parentName:"p"},"99"),", 877-885 (2016). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1016/j.ajhg.2016.08.016"},"https://doi.org/10.1016/j.ajhg.2016.08.016")))),(0,r.kt)("h2",{id:"csv-file"},"CSV File"),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr,hg19_pos,grch38_pos,ref,alt,aaref,aaalt,REVEL\n1,35142,35142,G,A,T,M,0.027\n1,35142,35142,G,C,T,R,0.035\n1,35142,35142,G,T,T,K,0.043\n1,35143,35143,T,A,T,S,0.018\n1,35143,35143,T,C,T,A,0.034\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the CSV file, we're mainly interested in the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"chr")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"hg19_pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"grch38_pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"ref")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"alt")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"REVEL"))),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Sorting")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Since the input file contains positions for both GRCh37 and GRCh38, we split it into two ",(0,r.kt)("strong",{parentName:"p"},"TSV")," files (for the sake of better readability) with identical format. The positions for GRCh37 were sorted but not for GRCh38. So we re-sort the variants by position in the GRCh38 file."))),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Conflicting Scores")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"When there are multiple scores available for the same variant (i.e. the same position with the same alternative allele), we pick the highest score."))),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://sites.google.com/site/revelgenomics/downloads"},"https://sites.google.com/site/revelgenomics/downloads")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(i.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/86375951.dd58658c.js b/assets/js/86375951.dd58658c.js deleted file mode 100644 index 37fc7709..00000000 --- a/assets/js/86375951.dd58658c.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5248],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>u});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var c=r.createContext({}),s=function(e){var t=r.useContext(c),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},p=function(e){var t=s(e.components);return r.createElement(c.Provider,{value:t},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},g=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,c=e.parentName,p=i(e,["components","mdxType","originalType","parentName"]),d=s(n),g=a,u=d["".concat(c,".").concat(g)]||d[g]||m[g]||o;return n?r.createElement(u,l(l({ref:t},p),{},{components:n})):r.createElement(u,l({ref:t},p))}));function u(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=g;var i={};for(var c in t)hasOwnProperty.call(t,c)&&(i[c]=t[c]);i.originalType=e,i[d]="string"==typeof e?e:a,l[1]=i;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>d,frontMatter:()=>o,metadata:()=>i,toc:()=>c});var r=n(87462),a=(n(67294),n(3905));const o={},l=void 0,i={unversionedId:"data-sources/fusioncatcher-json",id:"version-3.16/data-sources/fusioncatcher-json",title:"fusioncatcher-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/fusioncatcher-json.md",sourceDirName:"data-sources",slug:"/data-sources/fusioncatcher-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/fusioncatcher-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/fusioncatcher-json.md",tags:[],version:"3.16",frontMatter:{}},c=[{value:"genes",id:"genes",children:[],level:4},{value:"gene",id:"gene",children:[],level:4}],s={toc:c},p="wrapper";function d(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},' "fusionCatcher":[\n {\n "genes":{\n "first":{\n "hgnc":"ETV6",\n "isOncogene":true\n },\n "second":{\n "hgnc":"RUNX1"\n },\n "isParalogPair":true,\n "isPseudogenePair":true,\n "isReadthrough":true\n },\n "germlineSources":[\n "1000 Genomes Project"\n ],\n "somaticSources":[\n "COSMIC",\n "TCGA oesophageal carcinomas"\n ]\n }\n ]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"genes"),(0,a.kt)("td",{parentName:"tr",align:"center"},"genes object"),(0,a.kt)("td",{parentName:"tr",align:"left"},"5' gene & 3' gene")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"germlineSources"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"matches in known germline data sources")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"somaticSources"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"matches in known somatic data sources")))),(0,a.kt)("h4",{id:"genes"},"genes"),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"first"),(0,a.kt)("td",{parentName:"tr",align:"center"},"gene object"),(0,a.kt)("td",{parentName:"tr",align:"left"},"5' gene")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"second"),(0,a.kt)("td",{parentName:"tr",align:"center"},"gene object"),(0,a.kt)("td",{parentName:"tr",align:"left"},"3' gene")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"isParalogPair"),(0,a.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,a.kt)("td",{parentName:"tr",align:"left"},"true when both genes are paralogs for each other")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"isPseudogenePair"),(0,a.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,a.kt)("td",{parentName:"tr",align:"left"},"true when both genes are pseudogenes for each other")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"isReadthrough"),(0,a.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,a.kt)("td",{parentName:"tr",align:"left"},"true when this fusion gene is a readthrough event (both are on the same strand and there are no genes between them)")))),(0,a.kt)("h4",{id:"gene"},"gene"),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"isOncogene"),(0,a.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,a.kt)("td",{parentName:"tr",align:"left"},"true when this gene is an oncogene")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/8847ab82.01a9fb68.js b/assets/js/8847ab82.01a9fb68.js deleted file mode 100644 index 0c5463ca..00000000 --- a/assets/js/8847ab82.01a9fb68.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9337,4226],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>v});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},u="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),u=d(n),m=r,v=u["".concat(s,".").concat(m)]||u[m]||p[m]||o;return n?a.createElement(v,i(i({ref:t},c),{},{components:n})):a.createElement(v,i({ref:t},c))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[u]="string"==typeof e?e:r,i[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/dann-json",id:"version-3.18/data-sources/dann-json",title:"dann-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/dann-json.md",sourceDirName:"data-sources",slug:"/data-sources/dann-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/dann-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/dann-json.md",tags:[],version:"3.18",frontMatter:{}},s=[],d={toc:s},c="wrapper";function u(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"dannScore": 0.27\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"dannScore"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1.0")))))}u.isMDXComponent=!0},99583:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>p,frontMatter:()=>i,metadata:()=>s,toc:()=>d});var a=n(87462),r=(n(67294),n(3905)),o=n(11030);const i={title:"DANN"},l=void 0,s={unversionedId:"data-sources/dann",id:"version-3.18/data-sources/dann",title:"DANN",description:"Overview",source:"@site/versioned_docs/version-3.18/data-sources/dann.mdx",sourceDirName:"data-sources",slug:"/data-sources/dann",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/dann",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/dann.mdx",tags:[],version:"3.18",frontMatter:{title:"DANN"},sidebar:"docs",previous:{title:"COSMIC",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/cosmic"},next:{title:"dbSNP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/dbsnp"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"TSV File",id:"tsv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"GRCh38 liftover",id:"grch38-liftover",children:[],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:d},u="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(u,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"DANN uses the same feature set and training data as CADD (Combined Annotation-Dependent Depletion) to train a deep neural network (DNN).\nCADD is an algorithm designed to annotate both coding and non-coding variants, and has been shown to outperform other annotation algorithms.\nDANN improves on CADD (which uses Support Vector Machines (SVMs)) by capturing non-linear relationships by using a deep neural network instead of SVMs.\nDANN achieves about a 19% relative reduction in the error rate and about a 14% relative increase in the area under the curve (AUC) metric over CADD\u2019s SVM methodology."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Quang, Daniel, Yifei Chen, and Xiaohui Xie. DANN: a deep learning approach for annotating the pathogenicity of genetic variants. ",(0,r.kt)("em",{parentName:"p"},"Bioinformatics")," ",(0,r.kt)("strong",{parentName:"p"},"31.5")," 761-763 (2015). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1093/bioinformatics/btu703"},"https://doi.org/10.1093/bioinformatics/btu703")))),(0,r.kt)("h2",{id:"tsv-file"},"TSV File"),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-tsv"},"chr grch37_pos ref alt DANN\n1 10001 T A 0.16461391399220135\n1 10001 T C 0.4396994049749739\n1 10001 T G 0.38108629377072734\n1 10002 A C 0.36182020272810128\n1 10002 A G 0.44413258111779291\n1 10002 A T 0.16812846819989813\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the CSV file, we are interested in all columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"chr")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"grch37_pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"ref")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"alt")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DANN"))),(0,r.kt)("h2",{id:"grch38-liftover"},"GRCh38 liftover"),(0,r.kt)("p",null,"The data is not available for GRCh38 on DANN website. We performed a liftover from GRCh37 to GRCh38 using crossmap."),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("p",null,"None"),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://cbcl.ics.uci.edu/public_data/DANN/"},"https://cbcl.ics.uci.edu/public_data/DANN/")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(o.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/8894.58b40a92.js b/assets/js/8894.58b40a92.js new file mode 100644 index 00000000..3a722b60 --- /dev/null +++ b/assets/js/8894.58b40a92.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8894],{8894:(n,a,e)=>{e.r(a)}}]); \ No newline at end of file diff --git a/assets/js/8894.e34375d2.js b/assets/js/8894.e34375d2.js deleted file mode 100644 index c3470cfb..00000000 --- a/assets/js/8894.e34375d2.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8894],{18894:(n,a,e)=>{e.r(a)}}]); \ No newline at end of file diff --git a/assets/js/8ae16000.31c8192e.js b/assets/js/8ae16000.31c8192e.js deleted file mode 100644 index c0d0d313..00000000 --- a/assets/js/8ae16000.31c8192e.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4105],{3905:(t,n,e)=>{e.d(n,{Zo:()=>m,kt:()=>k});var a=e(67294);function l(t,n,e){return n in t?Object.defineProperty(t,n,{value:e,enumerable:!0,configurable:!0,writable:!0}):t[n]=e,t}function r(t,n){var e=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(t,n).enumerable}))),e.push.apply(e,a)}return e}function o(t){for(var n=1;n=0||(l[e]=t[e]);return l}(t,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,e)&&(l[e]=t[e])}return l}var p=a.createContext({}),u=function(t){var n=a.useContext(p),e=n;return t&&(e="function"==typeof t?t(n):o(o({},n),t)),e},m=function(t){var n=u(t.components);return a.createElement(p.Provider,{value:n},t.children)},d="mdxType",g={inlineCode:"code",wrapper:function(t){var n=t.children;return a.createElement(a.Fragment,{},n)}},N=a.forwardRef((function(t,n){var e=t.components,l=t.mdxType,r=t.originalType,p=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),d=u(e),N=l,k=d["".concat(p,".").concat(N)]||d[N]||g[N]||r;return e?a.createElement(k,o(o({ref:n},m),{},{components:e})):a.createElement(k,o({ref:n},m))}));function k(t,n){var e=arguments,l=n&&n.mdxType;if("string"==typeof t||l){var r=e.length,o=new Array(r);o[0]=N;var i={};for(var p in n)hasOwnProperty.call(n,p)&&(i[p]=n[p]);i.originalType=t,i[d]="string"==typeof t?t:l,o[1]=i;for(var u=2;u{e.r(n),e.d(n,{contentTitle:()=>o,default:()=>d,frontMatter:()=>r,metadata:()=>i,toc:()=>p});var a=e(87462),l=(e(67294),e(3905));const r={},o=void 0,i={unversionedId:"data-sources/gnomad-small-variants-json",id:"data-sources/gnomad-small-variants-json",title:"gnomad-small-variants-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gnomad-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gnomad-small-variants-json.md",tags:[],version:"current",frontMatter:{}},p=[],u={toc:p},m="wrapper";function d(t){let{components:n,...e}=t;return(0,l.kt)(m,(0,a.Z)({},u,e,{components:n,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomad":{ \n "coverage":20,\n "allAf":0.190317,\n "maleAf":0.193,\n "femaleAf": 0.1935,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "maleAn":15096,\n "femaleAn":15700\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "maleAc":2930,\n "femaleAc": 2931,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "maleHc":280,\n "femaleHc":281,\n "controlsAllAf":0.190317,\n "controlsAllAn":30796,\n "controlsAllAc":5861,\n "lowComplexityRegion":true,\n "failedFilter":true\n}\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"coverage"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the controls subset. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for male population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for female population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the controls subset. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for male population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for female population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the controls subset. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the South Asian population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the South Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the South Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"lowComplexityRegion"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant is located in a low complexity region.")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/8ae16000.f70873f4.js b/assets/js/8ae16000.f70873f4.js new file mode 100644 index 00000000..69ecd0be --- /dev/null +++ b/assets/js/8ae16000.f70873f4.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4105],{3905:(t,n,e)=>{e.d(n,{Zo:()=>m,kt:()=>k});var a=e(7294);function l(t,n,e){return n in t?Object.defineProperty(t,n,{value:e,enumerable:!0,configurable:!0,writable:!0}):t[n]=e,t}function r(t,n){var e=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(t,n).enumerable}))),e.push.apply(e,a)}return e}function o(t){for(var n=1;n=0||(l[e]=t[e]);return l}(t,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,e)&&(l[e]=t[e])}return l}var p=a.createContext({}),u=function(t){var n=a.useContext(p),e=n;return t&&(e="function"==typeof t?t(n):o(o({},n),t)),e},m=function(t){var n=u(t.components);return a.createElement(p.Provider,{value:n},t.children)},d="mdxType",g={inlineCode:"code",wrapper:function(t){var n=t.children;return a.createElement(a.Fragment,{},n)}},N=a.forwardRef((function(t,n){var e=t.components,l=t.mdxType,r=t.originalType,p=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),d=u(e),N=l,k=d["".concat(p,".").concat(N)]||d[N]||g[N]||r;return e?a.createElement(k,o(o({ref:n},m),{},{components:e})):a.createElement(k,o({ref:n},m))}));function k(t,n){var e=arguments,l=n&&n.mdxType;if("string"==typeof t||l){var r=e.length,o=new Array(r);o[0]=N;var i={};for(var p in n)hasOwnProperty.call(n,p)&&(i[p]=n[p]);i.originalType=t,i[d]="string"==typeof t?t:l,o[1]=i;for(var u=2;u{e.r(n),e.d(n,{contentTitle:()=>o,default:()=>d,frontMatter:()=>r,metadata:()=>i,toc:()=>p});var a=e(7462),l=(e(7294),e(3905));const r={},o=void 0,i={unversionedId:"data-sources/gnomad-small-variants-json",id:"data-sources/gnomad-small-variants-json",title:"gnomad-small-variants-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gnomad-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gnomad-small-variants-json.md",tags:[],version:"current",frontMatter:{}},p=[],u={toc:p},m="wrapper";function d(t){let{components:n,...e}=t;return(0,l.kt)(m,(0,a.Z)({},u,e,{components:n,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomad":{ \n "coverage":20,\n "allAf":0.190317,\n "maleAf":0.193,\n "femaleAf": 0.1935,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "maleAn":15096,\n "femaleAn":15700\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "maleAc":2930,\n "femaleAc": 2931,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "maleHc":280,\n "femaleHc":281,\n "controlsAllAf":0.190317,\n "controlsAllAn":30796,\n "controlsAllAc":5861,\n "lowComplexityRegion":true,\n "failedFilter":true\n}\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"coverage"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the controls subset. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for male population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for female population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the controls subset. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for male population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for female population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the controls subset. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the South Asian population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the South Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the South Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"lowComplexityRegion"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant is located in a low complexity region.")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/8bd62654.1755fef0.js b/assets/js/8bd62654.1755fef0.js deleted file mode 100644 index bf83b185..00000000 --- a/assets/js/8bd62654.1755fef0.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7340],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>m});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},p=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},d="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},h=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),d=c(n),h=r,m=d["".concat(s,".").concat(h)]||d[h]||u[h]||i;return n?a.createElement(m,o(o({ref:t},p),{},{components:n})):a.createElement(m,o({ref:t},p))}));function m(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=h;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[d]="string"==typeof e?e:r,o[1]=l;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const i={title:"Canonical Transcripts"},o=void 0,l={unversionedId:"core-functionality/canonical-transcripts",id:"version-3.17/core-functionality/canonical-transcripts",title:"Canonical Transcripts",description:"Overview",source:"@site/versioned_docs/version-3.17/core-functionality/canonical-transcripts.md",sourceDirName:"core-functionality",slug:"/core-functionality/canonical-transcripts",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/core-functionality/canonical-transcripts",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/core-functionality/canonical-transcripts.md",tags:[],version:"3.17",frontMatter:{title:"Canonical Transcripts"},sidebar:"version-3.17/docs",previous:{title:"Custom Annotations",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/file-formats/custom-annotations"},next:{title:"Gene Fusion Detection",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/core-functionality/gene-fusions"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Known Algorithms",id:"known-algorithms",children:[{value:"UCSC",id:"ucsc",children:[],level:3},{value:"Ensembl",id:"ensembl",children:[],level:3},{value:"ACMG",id:"acmg",children:[],level:3},{value:"ClinVar",id:"clinvar",children:[],level:3}],level:2},{value:"Unified Approach",id:"unified-approach",children:[],level:2}],c={toc:s},p="wrapper";function d(e){let{components:t,...i}=e;return(0,r.kt)(p,(0,a.Z)({},c,i,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"One of the more polarizing topics within annotation is the notion of canonical transcripts. Because of alternative splicing, we often have several transcripts for each gene. In the human genome, there are an average of 3.4 transcripts per gene (Tung, 2020). As scientists, we seem to have a need for identifying a representative example of a gene - even if there's no biological basis for the motivation."),(0,r.kt)("p",null,(0,r.kt)("img",{src:n(93502).Z})),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Golden Helix Blog")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"A few years ago, the guys over at Golden Helix wrote an excellent post about the pitfalls and issues surrounding the identification of canonical transcripts: ",(0,r.kt)("a",{parentName:"p",href:"https://blog.goldenhelix.com/whats-in-a-name-the-intricacies-of-identifying-variants/"},"What\u2019s in a Name: The Intricacies of Identifying Variants"),"."))),(0,r.kt)("p",null,"In Nirvana, we wanted to identify an algorithm for determining the canonical transcript and apply it consistently to all of our transcript data sources."),(0,r.kt)("h2",{id:"known-algorithms"},"Known Algorithms"),(0,r.kt)("h3",{id:"ucsc"},"UCSC"),(0,r.kt)("p",null,"UCSC publishes a list of canonical transcripts in its ",(0,r.kt)("inlineCode",{parentName:"p"},"knownCanonical")," table which is available via the ",(0,r.kt)("a",{parentName:"p",href:"https://genome.ucsc.edu/cgi-bin/hgTables"},"TableBrowser"),". Of the RefSeq data sources, it was the only one we could find that provided canonical transcripts:"),(0,r.kt)("blockquote",null,(0,r.kt)("p",{parentName:"blockquote"},"The canonical transcript is defined as either the longest CDS, if the gene has translated transcripts, or the longest cDNA.")),(0,r.kt)("p",null,"If you were to implement this and compare it with the knownCanonical table, you would see a lot of exceptions to the rule."),(0,r.kt)("h3",{id:"ensembl"},"Ensembl"),(0,r.kt)("p",null,"The ",(0,r.kt)("a",{parentName:"p",href:"http://uswest.ensembl.org/Help/Glossary"},"Ensembl glossary")," states:"),(0,r.kt)("blockquote",null,(0,r.kt)("p",{parentName:"blockquote"},"The canonical transcript is used in the gene tree analysis in Ensembl and does not necessarily reflect the most biologically relevant transcript of a gene. For human, the canonical transcript for a gene is set according to the following hierarchy:"),(0,r.kt)("ol",{parentName:"blockquote"},(0,r.kt)("li",{parentName:"ol"},"Longest CCDS translation with no stop codons."),(0,r.kt)("li",{parentName:"ol"},"If no (1), choose the longest Ensembl/Havana merged translation with no stop codons."),(0,r.kt)("li",{parentName:"ol"},"If no (2), choose the longest translation with no stop codons."),(0,r.kt)("li",{parentName:"ol"},"If no translation, choose the longest non-protein-coding transcript."))),(0,r.kt)("h3",{id:"acmg"},"ACMG"),(0,r.kt)("p",null,"From the ACMG Guidelines for the Interpretation of Sequence Variants:"),(0,r.kt)("blockquote",null,(0,r.kt)("p",{parentName:"blockquote"},"A reference transcript for each gene should be used and provided in the report when describing coding variants. The transcript should represent either the longest known transcript and/or the most clinically relevant transcript.")),(0,r.kt)("h3",{id:"clinvar"},"ClinVar"),(0,r.kt)("p",null,"From the ClinVar paper:"),(0,r.kt)("blockquote",null,(0,r.kt)("p",{parentName:"blockquote"},"When there are multiple transcripts for a gene, ClinVar selects one HGVS expression to construct a preferred name. By default, this selection is based on the first reference standard transcript identified by the RefSeqGene/LRG (Locus Reference Genomic) collaboration.")),(0,r.kt)("h2",{id:"unified-approach"},"Unified Approach"),(0,r.kt)("p",null,"Our approach is almost identical to the one Golden Helix discussed in their article:"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"If we're looking at RefSeq, only consider NM & NR transcripts as candidates for canonical transcripts."),(0,r.kt)("li",{parentName:"ol"},"Sort the transcripts in the following order:",(0,r.kt)("ol",{parentName:"li"},(0,r.kt)("li",{parentName:"ol"},(0,r.kt)("a",{parentName:"li",href:"https://www.lrg-sequence.org/"},"Locus Reference Genomic (LRG)")," entries occur before non-LRG entries"),(0,r.kt)("li",{parentName:"ol"},"Descending CDS length"),(0,r.kt)("li",{parentName:"ol"},"Descending transcript length"),(0,r.kt)("li",{parentName:"ol"},"Ascending accession number"))),(0,r.kt)("li",{parentName:"ol"},"Grab the first entry")))}d.isMDXComponent=!0},93502:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/hk1-transcripts-a5b85474d3b002553687715dbd004907.png"}}]); \ No newline at end of file diff --git a/assets/js/8c9e6963.938f0460.js b/assets/js/8c9e6963.938f0460.js deleted file mode 100644 index 1c323b0d..00000000 --- a/assets/js/8c9e6963.938f0460.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1147,1100,3396,2146,9449],{3905:(e,t,n)=>{n.d(t,{Zo:()=>s,kt:()=>g});var a=n(67294);function l(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(l[n]=e[n]);return l}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(l[n]=e[n])}return l}var p=a.createContext({}),m=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},s=function(e){var t=m(e.components);return a.createElement(p.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},N=a.forwardRef((function(e,t){var n=e.components,l=e.mdxType,r=e.originalType,p=e.parentName,s=o(e,["components","mdxType","originalType","parentName"]),u=m(n),N=l,g=u["".concat(p,".").concat(N)]||u[N]||d[N]||r;return n?a.createElement(g,i(i({ref:t},s),{},{components:n})):a.createElement(g,i({ref:t},s))}));function g(e,t){var n=arguments,l=t&&t.mdxType;if("string"==typeof e||l){var r=n.length,i=new Array(r);i[0]=N;var o={};for(var p in t)hasOwnProperty.call(t,p)&&(o[p]=t[p]);o.originalType=e,o[u]="string"==typeof e?e:l,i[1]=o;for(var m=2;m{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>r,metadata:()=>o,toc:()=>p});var a=n(87462),l=(n(67294),n(3905));const r={},i=void 0,o={unversionedId:"data-sources/gnomad-lof-json",id:"version-3.21/data-sources/gnomad-lof-json",title:"gnomad-lof-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/gnomad-lof-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-lof-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gnomad-lof-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/gnomad-lof-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],m={toc:p},s="wrapper";function u(e){let{components:t,...n}=e;return(0,l.kt)(s,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD":{ \n "pLi":1.00e0,\n "pNull":8.94e-40,\n "pRec":1.84e-16,\n "synZ":-8.44e-2,\n "misZ":5.96e-1,\n "loeuf":1.13e0\n}\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pLi"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pNull"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pRec"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"synZ"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"misZ"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))))}u.isMDXComponent=!0},19804:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>r,metadata:()=>o,toc:()=>p});var a=n(87462),l=(n(67294),n(3905));const r={},i=void 0,o={unversionedId:"data-sources/gnomad-small-variants-json",id:"version-3.21/data-sources/gnomad-small-variants-json",title:"gnomad-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/gnomad-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gnomad-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/gnomad-small-variants-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],m={toc:p},s="wrapper";function u(e){let{components:t,...n}=e;return(0,l.kt)(s,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomad":{ \n "coverage":20,\n "allAf":0.190317,\n "maleAf":0.193,\n "femaleAf": 0.1935,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "maleAn":15096,\n "femaleAn":15700\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "maleAc":2930,\n "femaleAc": 2931,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "maleHc":280,\n "femaleHc":281,\n "controlsAllAf":0.190317,\n "controlsAllAn":30796,\n "controlsAllAc":5861,\n "lowComplexityRegion":true,\n "failedFilter":true\n}\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"coverage"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the controls subset. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for male population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for female population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the controls subset. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for male population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for female population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the controls subset. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the South Asian population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the South Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the South Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"lowComplexityRegion"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant is located in a low complexity region.")))))}u.isMDXComponent=!0},73125:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>r,metadata:()=>o,toc:()=>p});var a=n(87462),l=(n(67294),n(3905));const r={},i=void 0,o={unversionedId:"data-sources/gnomad-structural-variants-data_description",id:"version-3.21/data-sources/gnomad-structural-variants-data_description",title:"gnomad-structural-variants-data_description",description:"Bed Example",source:"@site/versioned_docs/version-3.21/data-sources/gnomad-structural-variants-data_description.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-structural-variants-data_description",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gnomad-structural-variants-data_description",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/gnomad-structural-variants-data_description.md",tags:[],version:"3.21",frontMatter:{}},p=[{value:"Bed Example",id:"bed-example",children:[],level:4},{value:"TSV Example",id:"tsv-example",children:[],level:4},{value:"Structural Variant Type Mapping",id:"structural-variant-type-mapping",children:[],level:4}],m={toc:p},s="wrapper";function u(e){let{components:t,...n}=e;return(0,l.kt)(s,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("h4",{id:"bed-example"},"Bed Example"),(0,l.kt)("p",null,"The bed file was obtained from original source for GRCh37"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"#chrom start end name svtype ALGORITHMS BOTHSIDES_SUPPORT CHR2 CPX_INTERVALS CPX_TYPE END2 ENDEVIDENCE HIGH_SR_BACKGROUND PCRPLUS_DEPLETED PESR_GT_OVERDISPERSION POS2 PROTEIN_CODING__COPY_GAIN PROTEIN_CODING__DUP_LOF PROTEIN_CODING__DUP_PARTIAL PROTEIN_CODING__INTERGENIC PROTEIN_CODING__INTRONIC PROTEIN_CODING__INV_SPAN PROTEIN_CODING__LOF PROTEIN_CODING__MSV_EXON_OVR PROTEIN_CODING__NEAREST_TSS PROTEIN_CODING__PROMOTER PROTEIN_CODING__UTR SOURCE STRANDS SVLEN SVTYPE UNRESOLVED_TYPE UNSTABLE_AF_PCRPLUS VARIABLE_ACROSS_BATCHES AN AC AF N_BI_GENOS N_HOMREF N_HET N_HOMALT FREQ_HOMREF FREQ_HET FREQ_HOMALT MALE_AN MALE_AC MALE_AF MALE_N_BI_GENOS MALE_N_HOMREF MALE_N_HET MALE_N_HOMALT MALE_FREQ_HOMREF MALE_FREQ_HET MALE_FREQ_HOMALT MALE_N_HEMIREF MALE_N_HEMIALT MALE_FREQ_HEMIREF MALE_FREQ_HEMIALT PAR FEMALE_AN FEMALE_AC FEMALE_AF FEMALE_N_BI_GENOS FEMALE_N_HOMREF FEMALE_N_HET FEMALE_N_HOMALT FEMALE_FREQ_HOMREF FEMALE_FREQ_HET FEMALE_FREQ_HOMALT POPMAX_AF AFR_AN AFR_AC AFR_AF AFR_N_BI_GENOS AFR_N_HOMREF AFR_N_HET AFR_N_HOMALT AFR_FREQ_HOMREF AFR_FREQ_HEAFR_FREQ_HOMALT AFR_MALE_AN AFR_MALE_AC AFR_MALE_AF AFR_MALE_N_BI_GENOS AFR_MALE_N_HOMREF AFR_MALE_N_HET AFR_MALE_N_HOMALT AFR_MALE_FREQ_HOMREF AFR_MALE_FREQ_HET AFR_MALE_FREQ_HOMALT AFR_MALE_N_HEMIREF AFR_MALE_N_HEMIALT AFR_MALE_FREQ_HEMIREF AFR_MALE_FREQ_HEMIALT AFR_FEMALE_AN AFR_FEMALE_AC AFR_FEMALE_AF AFR_FEMALE_N_BI_GENOS AFR_FEMALE_N_HOMREF AFR_FEMALE_N_HET AFR_FEMALE_N_HOMALT AFR_FEMALE_FREQ_HOMREF AFR_FEMALE_FREQ_HET AFR_FEMALE_FREQ_HOMALT AMR_AN AMR_AC AMR_AF AMR_N_BI_GENOS AMR_N_HOMREF AMR_N_HET AMR_N_HOMALT AMR_FREQ_HOMREF AMR_FREQ_HET AMR_FREQ_HOMALT AMR_MALE_AN AMR_MALE_AC AMR_MALE_AF AMR_MALE_N_BI_GENOS AMR_MALE_N_HOMREF AMR_MALE_N_HET AMR_MALE_N_HOMALT AMR_MALE_FREQ_HOMREF AMR_MALE_FREQ_HET AMR_MALE_FREQ_HOMALT AMR_MALE_N_HEMIREF AMR_MALE_N_HEMIALT AMR_MALE_FREQ_HEMIREF AMR_MALE_FREQ_HEMIALT AMR_FEMALE_AN AMR_FEMALE_AC AMR_FEMALE_AF AMR_FEMALE_N_BI_GENOS AMR_FEMALE_N_HOMREF AMR_FEMALE_N_HET AMR_FEMALE_N_HOMALT AMR_FEMALE_FREQ_HOMREF AMR_FEMALE_FREQ_HET AMR_FEMALE_FREQ_HOMALT EAS_AN EAS_AC EAS_AF EAS_N_BI_GENOS EAS_N_HOMREF EAS_N_HET EAS_N_HOMALT EAS_FREQ_HOMREF EAS_FREQ_HET EAS_FREQ_HOMALT EAS_MALE_AN EAS_MALE_AC EAS_MALE_AF EAS_MALE_N_BI_GENOS EAS_MALE_N_HOMREF EAS_MALE_N_HET EAS_MALE_N_HOMALT EAS_MALE_FREQ_HOMREF EAS_MALE_FREQ_HET EAS_MALE_FREQ_HOMALT EAS_MALE_N_HEMIREF EAS_MALE_N_HEMIALT EAS_MALE_FREQ_HEMIREF EAS_MALE_FREQ_HEMIALT EAS_FEMALE_AN EAS_FEMALE_AC EAS_FEMALE_AF EAS_FEMALE_N_BI_GENOS EAS_FEMALE_N_HOMREF EAS_FEMALE_N_HET EAS_FEMALE_N_HOMALT EAS_FEMALE_FREQ_HOMREF EAS_FEMALE_FREQ_HET EAS_FEMALE_FREQ_HOMALT EUR_AN EUR_AC EUR_AF EUR_N_BI_GENOS EUR_N_HOMREF EUR_N_HET EUR_N_HOMALT EUR_FREQ_HOMREF EUR_FREQ_HET EUR_FREQ_HOMALT EUR_MALE_AN EUR_MALE_AC EUR_MALE_AF EUR_MALE_N_BI_GENOS EUR_MALE_N_HOMREF EUR_MALE_N_HET EUR_MALE_N_HOMALT EUR_MALE_FREQ_HOMREF EUR_MALE_FREQ_HET EUR_MALE_FREQ_HOMALT EUR_MALE_N_HEMIREF EUR_MALE_N_HEMIALT EUR_MALE_FREQ_HEMIREF EUR_MALE_FREQ_HEMIALT EUR_FEMALE_AN EUR_FEMALE_AC EUR_FEMALE_AF EUR_FEMALE_N_BI_GENOS EUR_FEMALE_N_HOMREF EUR_FEMALE_N_HET EUR_FEMALE_N_HOMALT EUR_FEMALE_FREQ_HOMREF EUR_FEMALE_FREQ_HET EUR_FEMALE_FREQ_HOMALT OTH_AN OTH_AC OTH_AF OTH_N_BI_GENOS OTH_N_HOMREF OTH_N_HET OTH_N_HOMALT OTH_FREQ_HOMREF OTH_FREQ_HET OTH_FREQ_HOMALT OTH_MALE_AN OTH_MALE_AC OTH_MALE_AF OTH_MALE_N_BI_GENOS OTH_MALE_N_HOMREF OTH_MALE_N_HET OTH_MALE_N_HOMALT OTH_MALE_FREQ_HOMREF OTH_MALE_FREQ_HET OTH_MALE_FREQ_HOMALT OTH_MALE_N_HEMIREF OTH_MALE_N_HEMIALT OTH_MALE_FREQ_HEMIREF OTH_MALE_FREQ_HEMIALT OTH_FEMALE_AN OTH_FEMALE_AC OTH_FEMALE_AF OTH_FEMALE_N_BI_GENOS OTH_FEMALE_N_HOMREF OTH_FEMALE_N_HET OTH_FEMALE_N_HOMALT OTH_FEMALE_FREQ_HOMREF OTH_FEMALE_FREQ_HET OTH_FEMALE_FREQ_HOMALT FILTER\n1 10641 10642 gnomAD-SV_v2.1_BND_1_1 BND manta False 15 NA NA 10643 10643 PE,SR False False True 10642 NA NA NA False NA NA NA NA NA NA NA NA NA -1 BND SINGLE_ENDER_-- False False 21366 145 0.006785999983549118 10683 10543 135 5 0.9868950247764587 0.012636899948120117 0.00046803298755548894 10866 69 0.00634999992325902 5433 5366 65 2 0.987667977809906 0.011963900178670883 0.000368120992789045 NA NA NA NA False 10454 76 0.007269999943673615227 5154 70 3 0.9860339760780334 0.013392000459134579 0.0005739430198445916 0.015956999734044075 93972 0.007660999894142151 4699 4629 68 2 0.9851030111312866 0.014471200294792652 0.0004256220126990229 5154 33 0.006403000093996525 2577 2544 33 0 0.9871940016746521 0.012805599719285965 0.0NA NA NA NA 4232 39 0.009216000325977802 2116 2079 35 2 0.9825140237808228 0.01654059998691082 0.0009451800142414868 1910 7 0.003664999967440963 955 949 5 1 0.9937170147895813 0.00523559981957078 0.001047119963914156 950 4 0.004211000166833401 475 472 2 1 0.9936839938163757 0.00421052984893322 0.0021052600350230932 NA NA NA NA 952 3 0.0031510000117123127 476473 3 0 0.9936969876289368 0.006302520167082548 0.0 2296 31 0.013501999899744987 1148 11131 0 0.9729970097541809 0.02700350061058998 0.0 1312 13 0.009909000247716904 656 643 13 0.9801830053329468 0.01981710083782673 0.0 NA NA NA NA 976 18 0.018442999571561813 488470 18 0 0.9631149768829346 0.03688519820570946 0.0 7574 32 0.004224999807775021 3787 37528 2 0.9920780062675476 0.007393720094114542 0.0005281229969114065 3374 17 0.005038999952375889 1681671 15 1 0.9905160069465637 0.008891520090401173 0.000592768017668277 NA NA NA NA 41815 0.003587000072002411 2091 2077 13 1 0.9933050274848938 0.006217120215296745 0.00047823999193497188 3 0.015956999734044075 94 91 3 0 0.968084990978241 0.03191490098834038 0.0 76 0.026316000148653984 38 36 2 0 0.9473680257797241 0.05263160169124603 0.0 NA NA NA NA 112 1 0.008929000236093998 56 55 1 0 0.982142984867096 0.017857100814580917 0.0UNRESOLVED \n")),(0,l.kt)("h4",{id:"tsv-example"},"TSV Example"),(0,l.kt)("p",null,"The tsv was obtained from lifted over dataset created by dbVar for GRCh38"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"#variant_call_accession variant_call_id variant_call_type experiment_id sample_id sampleset_id assembly chrcontig outer_start start inner_start inner_stop stop outer_stop insertion_length variant_region_acc variant_region_id copy_number description validation zygosity origin phenotype hgvs_name placement_method placement_rank placements_per_assembly remap_alignment remap_best_within_cluster remap_coverage remap_diff_chr remap_failure_code allele_count allele_frequency allele_number\nnssv15777856 gnomAD-SV_v2.1_CNV_10_564_alt_1 copy number variation 1 1 GRCh38.p12 10 736806 738184 nsv4039284 10__782746___784124______GRCh37.p13_copy_number_variation 0 Remapped BestAvailable Single First Pass 0 1 AC=21,AFR_AC=10,AMR_AC=9,EAS_AC=0,EUR_AC=2,OTH_AC=0AF=0.038889,AFR_AF=0.044643,AMR_AF=0.03913,EAS_AF=0,EUR_AF=0.023256,OTH_AF=0 AN=540,AFR_AN=224,AMR_AN=230,EAS_AN=0,EUR_AN=86,OTH_AN=0\n")),(0,l.kt)("h4",{id:"structural-variant-type-mapping"},"Structural Variant Type Mapping"),(0,l.kt)("p",null,"The source files represented the structural variants with keys using various naming conventions.\nIn the Nirvana JSON output, these keys will be mapped according to the following. "),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Nirvana JSON SV Type Key"),(0,l.kt)("th",{parentName:"tr",align:null},"GRCh37 Source SV Type Key"),(0,l.kt)("th",{parentName:"tr",align:null},"GRCh38 Source SV Type Key"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"copy_number_variation"),(0,l.kt)("td",{parentName:"tr",align:null}),(0,l.kt)("td",{parentName:"tr",align:null},"copy number variation")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"deletion"),(0,l.kt)("td",{parentName:"tr",align:null},"DEL, CN=0"),(0,l.kt)("td",{parentName:"tr",align:null},"deletion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"duplication"),(0,l.kt)("td",{parentName:"tr",align:null},"DUP"),(0,l.kt)("td",{parentName:"tr",align:null},"duplication")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"insertion"),(0,l.kt)("td",{parentName:"tr",align:null},"INS"),(0,l.kt)("td",{parentName:"tr",align:null},"insertion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"inversion"),(0,l.kt)("td",{parentName:"tr",align:null},"INV"),(0,l.kt)("td",{parentName:"tr",align:null},"inversion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,l.kt)("td",{parentName:"tr",align:null},"INS:ME"),(0,l.kt)("td",{parentName:"tr",align:null},"mobile element insertion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,l.kt)("td",{parentName:"tr",align:null},"INS:ME:ALU"),(0,l.kt)("td",{parentName:"tr",align:null},"alu insertion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,l.kt)("td",{parentName:"tr",align:null},"INS:ME:LINE1"),(0,l.kt)("td",{parentName:"tr",align:null},"line1 insertion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,l.kt)("td",{parentName:"tr",align:null},"INS:ME:SVA"),(0,l.kt)("td",{parentName:"tr",align:null},"sva insertion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"structural alteration"),(0,l.kt)("td",{parentName:"tr",align:null}),(0,l.kt)("td",{parentName:"tr",align:null},"sequence alteration")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"complex_structural_alteration"),(0,l.kt)("td",{parentName:"tr",align:null},"CPX"),(0,l.kt)("td",{parentName:"tr",align:null})))))}u.isMDXComponent=!0},292:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>r,metadata:()=>o,toc:()=>p});var a=n(87462),l=(n(67294),n(3905));const r={},i=void 0,o={unversionedId:"data-sources/gnomad-structural-variants-json",id:"version-3.21/data-sources/gnomad-structural-variants-json",title:"gnomad-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/gnomad-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gnomad-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/gnomad-structural-variants-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],m={toc:p},s="wrapper";function u(e){let{components:t,...n}=e;return(0,l.kt)(s,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD-preview": [\n {\n "chromosome": "1",\n "begin": 40001,\n "end": 47200,\n "variantId": "gnomAD-SV_v2.1_DUP_1_1",\n "variantType": "duplication",\n "failedFilter": true,\n "allAf": 0.068963,\n "afrAf": 0.135694,\n "amrAf": 0.022876,\n "easAf": 0.01101,\n "eurAf": 0.007846,\n "othAf": 0.017544,\n "femaleAf": 0.065288,\n "maleAf": 0.07255,\n "allAc": 943,\n "afrAc": 866,\n "amrAc": 21,\n "easAc": 17,\n "eurAc": 37,\n "othAc": 2,\n "femaleAc": 442,\n "maleAc": 499,\n "allAn": 13674,\n "afrAn": 6382,\n "amrAn": 918,\n "easAn": 1544,\n "eurAn": 4716,\n "othAn": 114,\n "femaleAn": 6770,\n "maleAn": 6878,\n "allHc": 91,\n "afrHc": 90,\n "amrHc": 1,\n "easHc": 0,\n "eurHc": 0,\n "othHc": 55,\n "femaleHc": 44,\n "maleHc": 47,\n "reciprocalOverlap": 0.01839,\n "annotationOverlap": 0.16667\n }\n]\n\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,l.kt)("td",{parentName:"tr",align:null},"string"),(0,l.kt)("td",{parentName:"tr",align:null},"chromosome number")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"begin"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"position interval start")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"end"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"position internal end")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"variantType"),(0,l.kt)("td",{parentName:"tr",align:null},"string"),(0,l.kt)("td",{parentName:"tr",align:null},"structural variant type")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"variantId"),(0,l.kt)("td",{parentName:"tr",align:null},"string"),(0,l.kt)("td",{parentName:"tr",align:null},"gnomAD ID")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for all other populations. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for all populations.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the African super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Ad Mixed American super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"eurAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the European super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for all other populations.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for male population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for female population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for all populations.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the African super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Ad Mixed American super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"eurAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the European super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for all other populations.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for female population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for male population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the African / African American population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Latino population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the East Asian population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"eurAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the European super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all other populations.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,l.kt)("td",{parentName:"tr",align:null},"boolean"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"Reciprocal overlap. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"Reciprocal overlap. Range: 0 - 1.0")))),(0,l.kt)("p",null,(0,l.kt)("strong",{parentName:"p"},"Note:")," Following fields are not available in ",(0,l.kt)("em",{parentName:"p"},"GRCh38")," because the source file does not contain this information:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAf")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAf")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAn")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAn")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"eurAc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"failedFilter")))))}u.isMDXComponent=!0},73505:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>s,default:()=>c,frontMatter:()=>m,metadata:()=>u,toc:()=>d});var a=n(87462),l=(n(67294),n(3905)),r=n(19804),i=n(36458),o=n(292),p=n(73125);const m={title:"gnomAD"},s=void 0,u={unversionedId:"data-sources/gnomad",id:"version-3.21/data-sources/gnomad",title:"gnomAD",description:"Overview",source:"@site/versioned_docs/version-3.21/data-sources/gnomad.mdx",sourceDirName:"data-sources",slug:"/data-sources/gnomad",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gnomad",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/gnomad.mdx",tags:[],version:"3.21",frontMatter:{title:"gnomAD"},sidebar:"docs",previous:{title:"GME Variome",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gme"},next:{title:"Mitochondrial Heteroplasmy",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/mito-heteroplasmy"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF extraction",id:"vcf-extraction",children:[],level:3},{value:"Computation",id:"computation",children:[],level:3},{value:"Merging genomes and exomes",id:"merging-genomes-and-exomes",children:[],level:3},{value:"Filters",id:"filters",children:[],level:3},{value:"VCF download instructions",id:"vcf-download-instructions",children:[],level:3},{value:"JSON output",id:"json-output",children:[],level:3},{value:"Building the supplementary files",id:"building-the-supplementary-files",children:[{value:"Source data files",id:"source-data-files",children:[],level:4}],level:3}],level:2},{value:"LoF Gene Metrics",id:"lof-gene-metrics",children:[{value:"Tab delimited file example",id:"tab-delimited-file-example",children:[],level:3},{value:"JSON key to TSV column mapping",id:"json-key-to-tsv-column-mapping",children:[],level:3},{value:"Gene symbol update",id:"gene-symbol-update",children:[],level:3},{value:"Conflict resolution",id:"conflict-resolution",children:[],level:3},{value:"Download URL",id:"download-url",children:[],level:3},{value:"JSON output",id:"json-output-1",children:[],level:3}],level:2},{value:"Structural Variants",id:"structural-variants",children:[{value:"Source Files",id:"source-files",children:[],level:3},{value:"Download URLs",id:"download-urls",children:[{value:"GRCh37",id:"grch37",children:[],level:4},{value:"GRCh38",id:"grch38",children:[],level:4},{value:"Download URL",id:"download-url-1",children:[],level:4}],level:3},{value:"JSON output",id:"json-output-2",children:[],level:3}],level:2}],N={toc:d},g="wrapper";function c(e){let{components:t,...n}=e;return(0,l.kt)(g,(0,a.Z)({},N,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("h2",{id:"overview"},"Overview"),(0,l.kt)("p",null,"The Genome Aggregation Database (",(0,l.kt)("a",{parentName:"p",href:"https://gnomad.broadinstitute.org/"},"gnomAD"),") is a resource developed by an international coalition of investigators, with the goal of aggregating and harmonizing both exome and genome sequencing data from a wide variety of large-scale sequencing projects, and making summary data available for the wider scientific community."),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Koch, L., 2020. Exploring human genomic diversity with gnomAD. ",(0,l.kt)("em",{parentName:"p"},"Nature Reviews Genetics"),", ",(0,l.kt)("strong",{parentName:"p"},"21(8)"),", pp.448-448."))),(0,l.kt)("h2",{id:"small-variants"},"Small Variants"),(0,l.kt)("h3",{id:"vcf-extraction"},"VCF extraction"),(0,l.kt)("p",null,"We currently extract the following info fields from gnomAD genome and exome VCF files:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},'##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n')),(0,l.kt)("p",null,"We also extract the following extra fields from gnomAD exome VCF file:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},'##INFO=\n##INFO=\n##INFO=\n')),(0,l.kt)("h3",{id:"computation"},"Computation"),(0,l.kt)("p",null,"Using these, we compute the following:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"Coverage"),(0,l.kt)("li",{parentName:"ul"},"Allele count, Homozygous count, allele number and allele frequencies for:"),(0,l.kt)("li",{parentName:"ul"},"Global population"),(0,l.kt)("li",{parentName:"ul"},"African/African Americans"),(0,l.kt)("li",{parentName:"ul"},"Admixed Americans"),(0,l.kt)("li",{parentName:"ul"},"Ashkenazi Jews"),(0,l.kt)("li",{parentName:"ul"},"East Asians"),(0,l.kt)("li",{parentName:"ul"},"Finnish"),(0,l.kt)("li",{parentName:"ul"},"Non-Finnish Europeans"),(0,l.kt)("li",{parentName:"ul"},"South Asian"),(0,l.kt)("li",{parentName:"ul"},"Others (population not assigned)"),(0,l.kt)("li",{parentName:"ul"},"Male"),(0,l.kt)("li",{parentName:"ul"},"Female"),(0,l.kt)("li",{parentName:"ul"},"Controls")),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Note")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("ul",{parentName:"div"},(0,l.kt)("li",{parentName:"ul"},"Coverage = DP / AN. Frequencies are computed using AC/AN for each population."),(0,l.kt)("li",{parentName:"ul"},"Please note that currently there is no genome sequencing data of south asian (SAS) population available in gnomAD."),(0,l.kt)("li",{parentName:"ul"},"Allele Count, Homozygous count, allele number and allele frequencies for control groups are also provided for the global population.")))),(0,l.kt)("h3",{id:"merging-genomes-and-exomes"},"Merging genomes and exomes"),(0,l.kt)("p",null,"When merging the genomes and exomes, the allele counts and allele numbers will be summed across both of the data sets."),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("ul",{parentName:"div"},(0,l.kt)("li",{parentName:"ul"},"For GRCh37, Nirvana currently uses gnomAD version 2.1 which contains both genomes and exomes data. Genomes and exomes data are merged in the output."),(0,l.kt)("li",{parentName:"ul"},"For GRCh38, Nirvana currently uses gnomAD version 3.0 which doesn't contain the exomes data. Therefore, only genomes data are presented in the output.")))),(0,l.kt)("h3",{id:"filters"},"Filters"),(0,l.kt)("p",null,"The following strategy will be used when there's a conflict in filter status:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"center"}),(0,l.kt)("th",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"th"},"Genomes PASS")),(0,l.kt)("th",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"th"},"Genomes Filtered")))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"td"},"Exomes PASS")),(0,l.kt)("td",{parentName:"tr",align:"center"},"PASS"),(0,l.kt)("td",{parentName:"tr",align:"center"},"Only use exome data")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"td"},"Exomes Filtered")),(0,l.kt)("td",{parentName:"tr",align:"center"},"Only use genome data"),(0,l.kt)("td",{parentName:"tr",align:"center"},"Filtered")))),(0,l.kt)("h3",{id:"vcf-download-instructions"},"VCF download instructions"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://gnomad.broadinstitute.org/downloads"},"https://gnomad.broadinstitute.org/downloads")),(0,l.kt)("h3",{id:"json-output"},"JSON output"),(0,l.kt)(r.default,{mdxType:"JSONV"}),(0,l.kt)("h3",{id:"building-the-supplementary-files"},"Building the supplementary files"),(0,l.kt)("p",null,"The gnomAD ",(0,l.kt)("inlineCode",{parentName:"p"},".nsa")," for Nirvana can be built using the ",(0,l.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's ",(0,l.kt)("inlineCode",{parentName:"p"},"gnomad")," subcommand. We will describe building gnomAD version 3.1 here."),(0,l.kt)("h4",{id:"source-data-files"},"Source data files"),(0,l.kt)("p",null,"Input VCF files (one per chromosome) and a ",(0,l.kt)("inlineCode",{parentName:"p"},".version")," file are required in a folder to build the ",(0,l.kt)("inlineCode",{parentName:"p"},".nsa")," file. For example, my directory contains:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr10.vcf.bgz chr22.vcf.bgz\nchr11.vcf.bgz chr2.vcf.bgz\nchr12.vcf.bgz chr3.vcf.bgz\nchr13.vcf.bgz chr4.vcf.bgz\nchr14.vcf.bgz chr5.vcf.bgz\nchr15.vcf.bgz chr6.vcf.bgz\nchr16.vcf.bgz chr7.vcf.bgz\nchr17.vcf.bgz chr8.vcf.bgz\nchr18.vcf.bgz chr9.vcf.bgz\nchr19.vcf.bgz chrM.vcf.bgz\nchr1.vcf.bgz chrX.vcf.bgz\nchr20.vcf.bgz chrY.vcf.bgz\nchr21.vcf.bgz gnomad.r3.1.version\n")),(0,l.kt)("p",null,"The version file is a text file with the following content."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"NAME=gnomAD\nVERSION=3.1\nDATE=2020-10-29\nDESCRIPTION=Allele frequencies from Genome Aggregation Database (gnomAD)\n")),(0,l.kt)("p",null,"The help menu for the utility is as follows:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"SAUtils.dll gnomad\n---------------------------------------------------------------------------\nSAUtils (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.17.0\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll gnomad [options]\nReads provided supplementary data files and populates tsv files\n\nOPTIONS:\n --ref, -r compressed reference sequence file\n --genome, -g input directory containing VCF (and .version)\n files with genomic frequencies\n --exome, -e input directory containing VCF (and .version)\n files with exomic frequencies\n --temp, -t output temp directory for intermediate (per chrom)\n NSA files\n --out, -o output directory for NSA file\n --help, -h displays the help menu\n --version, -v displays the version\n")),(0,l.kt)("p",null,"Here is a sample execution:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet ~/Nirvana/bin/Debug/netcoreapp3.1/SAUtils.dll Gnomad \\\\\n--ref ~/References/7/Homo_sapiens.GRCh38.Nirvana.dat --genome genomes/ \\\\\n--out ~/SupplementaryDatabase/63/GRCh38 --temp ~/ExternalDataSources/gnomAD/3.1/GRCh38/temp\n")),(0,l.kt)("h2",{id:"lof-gene-metrics"},"LoF Gene Metrics"),(0,l.kt)("h3",{id:"tab-delimited-file-example"},"Tab delimited file example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"gene transcript obs_mis exp_mis oe_mis mu_mis possible_mis obs_mis_pphen exp_mis_pphen oe_mis_pphen possible_mis_pphen obs_syn exp_syn oe_syn mu_syn possible_syn obs_lof mu_lof possible_lof exp_lof pLI pNull pRec oe_lof oe_syn_lower oe_syn_upper oe_mis_lower oe_mis_upper oe_lof_lower oe_lof_upper constraint_flag syn_zmis_z lof_z oe_lof_upper_rank oe_lof_upper_bin oe_lof_upper_bin_6 n_sites classic_caf max_af no_lofs obs_het_lof obs_hom_lof defined p exp_hom_lof classic_caf_afr classic_caf_amr classic_caf_asj classic_caf_eas classic_caf_fin classic_caf_nfe classic_caf_oth classic_caf_sas p_afr p_amr p_asj p_eas p_fin p_nfep_oth p_sas transcript_type gene_id transcript_level cds_length num_coding_exons gene_type gene_length exac_pLI exac_obs_lof exac_exp_lof exac_oe_lof brain_expression chromosome start_positionend_position\nMED13 ENST00000397786 871 1.1178e+03 7.7921e-01 5.5598e-05 14195 314 5.2975e+02 5.9273e-01 6708 422 3.8753e+02 1.0890e+00 1.9097e-05 4248 0 4.9203e-06 1257 9.8429e+01 1.0000e+00 8.9436e-40 1.8383e-16 0.0000e+00 1.0050e+00 1.1800e+00 7.3600e-01 8.2400e-01 0.0000e+00 3.0000e-02 -1.3765e+00 2.6232e+00 9.1935e+00 0 0 0 2 1.2058e-05 8.0492e-06 124782 3 0 124785 1.2021e-05 1.8031e-05 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 9.2812e-05 8.8571e-06 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 9.2760e-05 8.8276e-06 0.0000e+00 0.0000e+00 protein_coding ENSG00000108510 2 6522 30 protein_coding 122678 1.0000e+00 0 6.4393e+01 0.0000e+00 NA 17 60019966 60142643\n")),(0,l.kt)("h3",{id:"json-key-to-tsv-column-mapping"},"JSON key to TSV column mapping"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"JSON key"),(0,l.kt)("th",{parentName:"tr",align:null},"TSV column"),(0,l.kt)("th",{parentName:"tr",align:null},"Description"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pLi"),(0,l.kt)("td",{parentName:"tr",align:null},"pLI"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pNull"),(0,l.kt)("td",{parentName:"tr",align:null},"pNull"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pRec"),(0,l.kt)("td",{parentName:"tr",align:null},"pRec"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"synZ"),(0,l.kt)("td",{parentName:"tr",align:null},"syn_z"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"misZ"),(0,l.kt)("td",{parentName:"tr",align:null},"mis_z"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,l.kt)("td",{parentName:"tr",align:null},"oe_lof_upper"),(0,l.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))),(0,l.kt)("h3",{id:"gene-symbol-update"},"Gene symbol update"),(0,l.kt)("p",null,"The input file provides Ensembl gene ids for each entry. We observed that they were unique while gene symbols may be repeated (multiple lines may have the same gene symbol). Since Ensembl gene Ids are more stable, and Nirvana transcript cache data contains Ensembl gene ids, we use these ids to extract the gene symbols from the transcript cache. For example, if ENSG0001 has gene symbol GENE1 in the input but Nirvana cache say ENSG0001 maps to GENE2, we use GENE2 as the gene symbol for that entry."),(0,l.kt)("h3",{id:"conflict-resolution"},"Conflict resolution"),(0,l.kt)("p",null,"gnomAD uses Ensembl GeneID as unique identifiers in the ",(0,l.kt)("a",{parentName:"p",href:"https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz"},"source file")," but Nirvana uses HGNC gene symbols. Multiple Ensembl GeneIDs can map to the same HGNC symbol and therefore may result is conflict."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"MDGA2 ENST00000426342 306 4.0043e+02 7.6419e-01 2.1096e-05 4724 78 1.6525e+02 4.7202e-01 1923 125 1.3737e+02 9.0993e-01 7.1973e-06 1413 4 2.0926e-06 453 3.8316e+01 9.9922e-01 8.6490e-12 7.8128e-04 1.0440e-01 7.8600e-01 1.0560e+00 6.9500e-01 8.4000e-01 5.0000e-02 2.3900e-01 8.2988e-01 1.6769e+00 5.1372e+00 1529 0 0 7 2.8103e-05 4.0317e-06 124784 7 0 124791 2.8047e-05 9.8167e-05 0.0000e+00 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 3.5391e-05 1.6672e-04 3.2680e-05 0.0000e+00 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 3.5308e-05 1.6492e-04 3.2678e-05 protein_coding ENSG00000139915 2 2181 13 protein_coding 835332 9.9322e-01 3 2.7833e+01 1.0779e-01 NA 14 47308826 48144157\nMDGA2 ENST00000439988 438 5.5311e+02 7.9189e-01 2.9490e-05 6608 105 2.0496e+02 5.1228e-01 2386 180 1.9491e+02 9.2351e-01 9.8371e-06 2048 11 2.8074e-06 627 5.1882e+01 6.6457e-01 5.5841e-10 3.3543e-01 2.1202e-01 8.1700e-01 1.0450e+00 7.3100e-01 8.5700e-01 1.3200e-01 3.5100e-01 8.3940e-01 1.7393e+00 5.2595e+00 2989 1 0 9 3.6173e-05 4.0463e-06 124782 9 0 124791 3.6061e-05 1.6228e-04 6.4986e-05 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 4.4275e-05 1.6672e-04 3.2680e-05 6.4577e-05 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 4.4135e-05 1.6492e-04 3.2678e-05 protein_coding ENSG00000272781 3 3075 17 protein_coding 832866 NA NA NA NA NA 14 47311134 48143999\n")),(0,l.kt)("p",null,'In such cases, Nirvana chooses the entry with the smallest "LOEUF" value. The reason for choosing this value can be highlighted by the following table:'),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"right"},"LOEUF decile"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Haplo-insufficient"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Autosomal Dominant"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Autosomal Recessive"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Olfactory Genes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"0-10%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"104"),(0,l.kt)("td",{parentName:"tr",align:"right"},"140"),(0,l.kt)("td",{parentName:"tr",align:"right"},"36"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"10-20%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"47"),(0,l.kt)("td",{parentName:"tr",align:"right"},"128"),(0,l.kt)("td",{parentName:"tr",align:"right"},"72"),(0,l.kt)("td",{parentName:"tr",align:"right"},"1")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"20-30%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"17"),(0,l.kt)("td",{parentName:"tr",align:"right"},"86"),(0,l.kt)("td",{parentName:"tr",align:"right"},"112"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"30-40%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"8"),(0,l.kt)("td",{parentName:"tr",align:"right"},"80"),(0,l.kt)("td",{parentName:"tr",align:"right"},"173"),(0,l.kt)("td",{parentName:"tr",align:"right"},"4")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"40-50%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"7"),(0,l.kt)("td",{parentName:"tr",align:"right"},"65"),(0,l.kt)("td",{parentName:"tr",align:"right"},"206"),(0,l.kt)("td",{parentName:"tr",align:"right"},"8")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"50-60%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"4"),(0,l.kt)("td",{parentName:"tr",align:"right"},"54"),(0,l.kt)("td",{parentName:"tr",align:"right"},"207"),(0,l.kt)("td",{parentName:"tr",align:"right"},"6")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"60-70%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0"),(0,l.kt)("td",{parentName:"tr",align:"right"},"46"),(0,l.kt)("td",{parentName:"tr",align:"right"},"154"),(0,l.kt)("td",{parentName:"tr",align:"right"},"18")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"70-80%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"2"),(0,l.kt)("td",{parentName:"tr",align:"right"},"49"),(0,l.kt)("td",{parentName:"tr",align:"right"},"120"),(0,l.kt)("td",{parentName:"tr",align:"right"},"49")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"80-90%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0"),(0,l.kt)("td",{parentName:"tr",align:"right"},"34"),(0,l.kt)("td",{parentName:"tr",align:"right"},"58"),(0,l.kt)("td",{parentName:"tr",align:"right"},"96")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"90-100%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0"),(0,l.kt)("td",{parentName:"tr",align:"right"},"26"),(0,l.kt)("td",{parentName:"tr",align:"right"},"40"),(0,l.kt)("td",{parentName:"tr",align:"right"},"174")))),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Note")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("ul",{parentName:"div"},(0,l.kt)("li",{parentName:"ul"},"Table source: ",(0,l.kt)("a",{parentName:"li",href:"https://www.biorxiv.org/content/biorxiv/early/2019/01/28/531210.full-text.pdf"},"https://www.biorxiv.org/content/biorxiv/early/2019/01/28/531210.full-text.pdf")),(0,l.kt)("li",{parentName:"ul"},"This table indicates that lower LOEUF scores have more deleterious effect on genes."),(0,l.kt)("li",{parentName:"ul"},"Only 15 out of 19685 genes have conflicting entries.")))),(0,l.kt)("p",null,(0,l.kt)("strong",{parentName:"p"},"List of genes with conflicting entries")),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},'MDGA2:\n {"pLI":9.99e-1,"pRec":7.81e-4,"pNull":8.65e-12,"synZ":8.30e-1,"misZ":1.68e0,"loeuf":2.39e-1}\n {"pLI":6.65e-1,"pRec":3.35e-1,"pNull":5.58e-10,"synZ":8.39e-1,"misZ":1.74e0,"loeuf":3.51e-1}\nCRYBG3:\n {"pLI":9.27e-5,"pRec":1.00e0,"pNull":1.88e-7,"synZ":1.82e0,"misZ":4.68e-1,"loeuf":4.93e-1}\n {"pLI":2.69e-4,"pRec":1.00e0,"pNull":1.20e-4,"synZ":2.63e0,"misZ":9.80e-1,"loeuf":5.98e-1}\nCHTF8:\n {"pLI":8.29e-1,"pRec":1.67e-1,"pNull":3.21e-3,"synZ":1.94e0,"misZ":9.48e-1,"loeuf":5.13e-1}\n {"pLI":3.73e-1,"pRec":5.84e-1,"pNull":4.29e-2,"synZ":3.33e-1,"misZ":2.91e-1,"loeuf":9.92e-1}\nSEPT1:\n {"pLI":6.77e-8,"pRec":8.90e-1,"pNull":1.10e-1,"synZ":1.58e-1,"misZ":1.57e0,"loeuf":9.68e-1}\n {"pLI":1.96e-8,"pRec":6.71e-1,"pNull":3.29e-1,"synZ":1.68e-1,"misZ":1.41e0,"loeuf":1.08e0}\nARL14EPL:\n {"pLI":3.48e-2,"pRec":8.38e-1,"pNull":1.28e-1,"synZ":3.56e-1,"misZ":-1.87e-1,"loeuf":1.23e0}\n {"pLI":3.23e-2,"pRec":8.29e-1,"pNull":1.38e-1,"synZ":1.15e0,"misZ":-4.05e-1,"loeuf":1.26e0}\nUGT2A1:\n {"pLI":2.90e-13,"pRec":1.40e-1,"pNull":8.60e-1,"synZ":-1.29e0,"misZ":-1.77e0,"loeuf":1.18e0}\n {"pLI":3.88e-17,"pRec":2.87e-3,"pNull":9.97e-1,"synZ":-8.00e-1,"misZ":-1.40e0,"loeuf":1.53e0}\nLTB4R2:\n {"pLI":4.39e-4,"pRec":6.71e-1,"pNull":3.29e-1,"synZ":-5.24e-1,"misZ":-2.96e-1,"loeuf":1.40e0}\n {"pLI":1.38e-5,"pRec":4.12e-1,"pNull":5.88e-1,"synZ":-4.58e-1,"misZ":-2.02e-1,"loeuf":1.54e0}\nCDRT1:\n {"pLI":4.98e-14,"pRec":5.31e-1,"pNull":4.69e-1,"synZ":8.18e-1,"misZ":6.57e-1,"loeuf":1.00e0}\n {"pLI":3.50e-3,"pRec":6.37e-1,"pNull":3.59e-1,"synZ":4.89e-1,"misZ":6.90e-1,"loeuf":1.63e0}\nMUC3A:\n {"pLI":1.48e-10,"pRec":5.76e-1,"pNull":4.24e-1,"synZ":5.81e-2,"misZ":-6.01e-1,"loeuf":1.06e0}\n {"pLI":4.03e-1,"pRec":4.79e-1,"pNull":1.17e-1,"synZ":4.05e-2,"misZ":-1.60e-1,"loeuf":1.70e0}\nCOG8:\n {"pLI":2.97e-9,"pRec":5.04e-1,"pNull":4.96e-1,"synZ":-1.35e0,"misZ":-9.37e-2,"loeuf":1.13e0}\n {"pLI":2.31e-3,"pRec":5.47e-1,"pNull":4.50e-1,"synZ":-4.94e-1,"misZ":-1.48e-1,"loeuf":1.76e0}\nAC006486.1:\n {"pLI":9.37e-1,"pRec":6.27e-2,"pNull":2.47e-4,"synZ":1.44e0,"misZ":2.12e0,"loeuf":3.41e-1}\n {"pLI":1.14e-1,"pRec":6.16e-1,"pNull":2.70e-1,"synZ":-7.57e-2,"misZ":8.33e-2,"loeuf":1.84e0}\nAL645922.1:\n {"pLI":4.67e-16,"pRec":1.00e0,"pNull":4.15e-5,"synZ":7.99e-1,"misZ":1.61e0,"loeuf":6.92e-1}\n {"pLI":1.60e-3,"pRec":2.78e-1,"pNull":7.21e-1,"synZ":-7.30e-2,"misZ":3.21e-1,"loeuf":1.96e0}\nNBPF20:\n {"pLI":1.42e-7,"pRec":3.40e-2,"pNull":9.66e-1,"synZ":-1.86e0,"misZ":-2.88e0,"loeuf":1.97e0}\n {"pLI":1.92e-22,"pRec":7.96e-6,"pNull":1.00e0,"synZ":-9.73e0,"misZ":-7.67e0,"loeuf":1.97e0}\nPRAMEF11:\n {"pLI":6.16e-4,"pRec":7.42e-1,"pNull":2.58e-1,"synZ":-4.02e0,"misZ":-3.69e0,"loeuf":1.31e0}\n {"synZ":-3.33e0,"misZ":-2.59e0}\nFAM231D:\n {"synZ":-1.98e0,"misZ":-1.44e0}\n {"synZ":1.07e0,"misZ":3.13e-1}\n')),(0,l.kt)("p",null,(0,l.kt)("strong",{parentName:"p"},"Conflict resolution")),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"Pick the entry with the lowest LOEUF score"),(0,l.kt)("li",{parentName:"ul"},"If the same, pick the lowest pLI"),(0,l.kt)("li",{parentName:"ul"},"Otherwise pick the entry with the max absolute value of synZ + misZ")),(0,l.kt)("h3",{id:"download-url"},"Download URL"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz"},"https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz")),(0,l.kt)("h3",{id:"json-output-1"},"JSON output"),(0,l.kt)(i.default,{mdxType:"JSONG"}),(0,l.kt)("h2",{id:"structural-variants"},"Structural Variants"),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Collins, R.L., Brand, H., Karczewski, K.J. et al. 2020. A structural variation reference for medical and population genetics. ",(0,l.kt)("em",{parentName:"p"},"Nature")," ",(0,l.kt)("strong",{parentName:"p"},"581"),", pp.444\u2013451. ",(0,l.kt)("a",{parentName:"p",href:"https://doi.org/10.1038/s41586-020-2287-8"},"https://doi.org/10.1038/s41586-020-2287-8")))),(0,l.kt)("p",null,(0,l.kt)("strong",{parentName:"p"},"Note"),"\nThe gnomAD structural variant annotations are in a preview stage at the moment.\nCurrently, the annotations do not include translocation breakends.\nFuture updates will include a better way of annotating the structural variants."),(0,l.kt)("h3",{id:"source-files"},"Source Files"),(0,l.kt)(p.default,{mdxType:"SVDATADESCRIPTION"}),(0,l.kt)("h3",{id:"download-urls"},"Download URLs"),(0,l.kt)("h4",{id:"grch37"},"GRCh37"),(0,l.kt)("p",null,"The GRCh37 file was downloaded from the original source. Following table gives some essential data metrics:"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://storage.googleapis.com/gcp-public-data--gnomad/papers/2019-sv/gnomad_v2.1_sv.sites.bed.gz"},"https://storage.googleapis.com/gcp-public-data--gnomad/papers/2019-sv/gnomad_v2.1_sv.sites.bed.gz")),(0,l.kt)("h4",{id:"grch38"},"GRCh38"),(0,l.kt)("p",null,"Note: The data was unavailable from gnomAD 2.1 original source, however the lifted over structural variant dataset was created by dbVar and was obtained from them ",(0,l.kt)("a",{parentName:"p",href:"https://www.ncbi.nlm.nih.gov/sites/dbvarapp/studies/nstd166/"},"https://www.ncbi.nlm.nih.gov/sites/dbvarapp/studies/nstd166/"),"."),(0,l.kt)("h4",{id:"download-url-1"},"Download URL"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/tsv/nstd166.GRCh38.variant_call.tsv.gz"},"https://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/tsv/nstd166.GRCh38.variant_call.tsv.gz")),(0,l.kt)("h3",{id:"json-output-2"},"JSON output"),(0,l.kt)(o.default,{mdxType:"JSONSV"}))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/8d7894b4.c71e86b9.js b/assets/js/8d7894b4.c71e86b9.js deleted file mode 100644 index 293a971c..00000000 --- a/assets/js/8d7894b4.c71e86b9.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7117,6403],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>D});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var p=a.createContext({}),s=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},c=function(e){var t=s(e.components);return a.createElement(p.Provider,{value:t},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,p=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),d=s(n),u=r,D=d["".concat(p,".").concat(u)]||d[u]||m[u]||i;return n?a.createElement(D,o(o({ref:t},c),{},{components:n})):a.createElement(D,o({ref:t},c))}));function D(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var p in t)hasOwnProperty.call(t,p)&&(l[p]=t[p]);l.originalType=e,l[d]="string"==typeof e?e:r,o[1]=l;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>l,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/splice-ai-json",id:"version-3.21/data-sources/splice-ai-json",title:"splice-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/splice-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/splice-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/splice-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/splice-ai-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],s={toc:p},c="wrapper";function d(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"spliceAI":[ \n {\n "hgnc":"BLCAP",\n "acceptorGainDistance":-3,\n "acceptorGainScore":0.3,\n "donorLossDistance":7,\n "donorLossScore":0.9\n },\n { \n "hgnc":"NNAT",\n "acceptorGainDistance":-1,\n "acceptorGainScore":0.2,\n "donorGainDistance":-2,\n "donorGainScore":0.3\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")))))}d.isMDXComponent=!0},99540:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>o,metadata:()=>p,toc:()=>s});var a=n(87462),r=(n(67294),n(3905)),i=n(54887);const o={title:"Splice AI"},l=void 0,p={unversionedId:"data-sources/splice-ai",id:"version-3.21/data-sources/splice-ai",title:"Splice AI",description:"Overview",source:"@site/versioned_docs/version-3.21/data-sources/splice-ai.mdx",sourceDirName:"data-sources",slug:"/data-sources/splice-ai",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/splice-ai",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/splice-ai.mdx",tags:[],version:"3.21",frontMatter:{title:"Splice AI"},sidebar:"docs",previous:{title:"REVEL",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/revel"},next:{title:"TOPMed",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/topmed"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"VCF File",id:"vcf-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Pre-processing",id:"pre-processing",children:[{value:"Filtering",id:"filtering",children:[],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:s},d="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"SpliceAI, a 32-layer deep neural network, predicts splicing from a pre-mRNA sequence."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"K. Jaganathan, et al. Predicting splicing from primary sequence with deep learning. ",(0,r.kt)("em",{parentName:"p"},"Cell"),", ",(0,r.kt)("strong",{parentName:"p"},"176")," (3) (2019), pp. 535-548 e24"))),(0,r.kt)("h2",{id:"vcf-file"},"VCF File"),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},'##fileformat=VCFv4.0\n##assembly=GRCh37/hg19\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n#CHROM POS ID REF ALT QUAL FILTER INFO\n10 92946 . C T . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0000;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=-26;DP_AL=-10;DP_DG=3;DP_DL=35\n10 92946 . C G . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0008;DS_AL=0.0000;DS_DG=0.0003;DS_DL=0.0000;DP_AG=34;DP_AL=-27;DP_DG=35;DP_DL=1\n10 92946 . C A . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0004;DS_AL=0.0000;DS_DG=0.0001;DS_DL=0.0000;DP_AG=-10;DP_AL=-48;DP_DG=35;DP_DL=-21\n10 92947 . A C . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0002;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=-49;DP_AL=-11;DP_DG=0;DP_DL=34\n10 92947 . A T . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0002;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=33;DP_AL=-11;DP_DG=-22;DP_DL=34\n10 92947 . A G . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0006;DS_AL=0.0000;DS_DG=0.0001;DS_DL=0.0000;DP_AG=33;DP_AL=-11;DP_DG=34;DP_DL=32\n')),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the VCF file, we're mainly interested in the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_AG")," - \u0394 score (acceptor gain)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_AL")," - \u0394 score (acceptor loss)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_DG")," - \u0394 score (donor gain)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_DL")," - \u0394 score (donor loss)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_AG")," - \u0394 position (acceptor gain) relative to the variant position"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_AL")," - \u0394 position (acceptor loss) relative to the variant position"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_DG")," - \u0394 position (donor gain) relative to the variant position"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_DL")," - \u0394 position (donor loss) relative to the variant position")),(0,r.kt)("p",null,"The Splice AI team suggests the following interpretation for the scores:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"center"},"Range"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Confidence"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Pathogenicity"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"0 \u2264 x < 0.1"),(0,r.kt)("td",{parentName:"tr",align:"left"},"low"),(0,r.kt)("td",{parentName:"tr",align:"left"},"likely benign")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"0.1 \u2264 x \u2264 0.5"),(0,r.kt)("td",{parentName:"tr",align:"left"},"medium"),(0,r.kt)("td",{parentName:"tr",align:"left"},"likely pathogenic")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"x > 0.5"),(0,r.kt)("td",{parentName:"tr",align:"left"},"high"),(0,r.kt)("td",{parentName:"tr",align:"left"},"pathogenic")))),(0,r.kt)("h2",{id:"pre-processing"},"Pre-processing"),(0,r.kt)("h3",{id:"filtering"},"Filtering"),(0,r.kt)("p",null,"Splice AI provides a comprehensive list of entries throughout the genome. However, many of the entries have little value. I.e. observing low splice scores in intergenic regions. Not only do these extra entries require more storage, but the unused content has a negative impact on annotation speed."),(0,r.kt)("p",null,"As a result, Nirvana filters out all the values in the low confidence tier except for regions within 15 bp of nascent splice sites. For those regions, we found it useful to see if Splice AI predicted an interruption of the splicing mechanism."),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://basespace.illumina.com/s/5u6ThOblecrh"},"https://basespace.illumina.com/s/5u6ThOblecrh")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(i.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/8fd3b801.7ad2de43.js b/assets/js/8fd3b801.7ad2de43.js deleted file mode 100644 index 7e87ec10..00000000 --- a/assets/js/8fd3b801.7ad2de43.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7043],{3905:(t,e,n)=>{n.d(e,{Zo:()=>c,kt:()=>d});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function o(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var i=r.createContext({}),m=function(t){var e=r.useContext(i),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},c=function(t){var e=m(t.components);return r.createElement(i.Provider,{value:e},t.children)},s="mdxType",f={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},u=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,l=t.originalType,i=t.parentName,c=p(t,["components","mdxType","originalType","parentName"]),s=m(n),u=a,d=s["".concat(i,".").concat(u)]||s[u]||f[u]||l;return n?r.createElement(d,o(o({ref:e},c),{},{components:n})):r.createElement(d,o({ref:e},c))}));function d(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var l=n.length,o=new Array(l);o[0]=u;var p={};for(var i in e)hasOwnProperty.call(e,i)&&(p[i]=e[i]);p.originalType=t,p[s]="string"==typeof t?t:a,o[1]=p;for(var m=2;m{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>s,frontMatter:()=>l,metadata:()=>p,toc:()=>i});var r=n(87462),a=(n(67294),n(3905));const l={},o=void 0,p={unversionedId:"data-sources/1000Genomes-snv-json",id:"version-3.16/data-sources/1000Genomes-snv-json",title:"1000Genomes-snv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/1000Genomes-snv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-snv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/1000Genomes-snv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/1000Genomes-snv-json.md",tags:[],version:"3.16",frontMatter:{}},i=[],m={toc:i},c="wrapper";function s(t){let{components:e,...n}=t;return(0,a.kt)(c,(0,r.Z)({},m,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":{\n "allAf":0.200879,\n "afrAf":0.210287,\n "amrAf":0.139769,\n "easAf":0.275794,\n "eurAf":0.181909,\n "sasAf":0.173824,\n "allAn":5008,\n "afrAn":1322,\n "amrAn":694,\n "easAn":1008,\n "eurAn":1006,\n "sasAn":978,\n "allAc":1006,\n "afrAc":278,\n "amrAc":97,\n "easAc":278,\n "eurAc":183,\n "sasAc":170\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for all populations. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"allAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for all populations. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"allAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for all populations. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"afrAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the African super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"afrAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the African super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"afrAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the African super population. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"amrAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"amrAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the Ad Mixed American super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"amrAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the Ad Mixed American super population. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"easAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"easAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the East Asian super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"easAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the East Asian super population. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"eurAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the European super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"eurAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the European super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"eurAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the European super population. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"sasAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"sasAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the South Asian super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"sasAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the South Asian super population. Non-zero integer.")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/9080841a.20170b7b.js b/assets/js/9080841a.20170b7b.js deleted file mode 100644 index 90efee2a..00000000 --- a/assets/js/9080841a.20170b7b.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[198,9653],{3905:(e,n,t)=>{t.d(n,{Zo:()=>d,kt:()=>h});var a=t(67294);function r(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function o(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function i(e){for(var n=1;n=0||(r[t]=e[t]);return r}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(r[t]=e[t])}return r}var l=a.createContext({}),c=function(e){var n=a.useContext(l),t=n;return e&&(t="function"==typeof e?e(n):i(i({},n),e)),t},d=function(e){var n=c(e.components);return a.createElement(l.Provider,{value:n},e.children)},u="mdxType",p={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},m=a.forwardRef((function(e,n){var t=e.components,r=e.mdxType,o=e.originalType,l=e.parentName,d=s(e,["components","mdxType","originalType","parentName"]),u=c(t),m=r,h=u["".concat(l,".").concat(m)]||u[m]||p[m]||o;return t?a.createElement(h,i(i({ref:n},d),{},{components:t})):a.createElement(h,i({ref:n},d))}));function h(e,n){var t=arguments,r=n&&n.mdxType;if("string"==typeof e||r){var o=t.length,i=new Array(o);i[0]=m;var s={};for(var l in n)hasOwnProperty.call(n,l)&&(s[l]=n[l]);s.originalType=e,s[u]="string"==typeof e?e:r,i[1]=s;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>s,toc:()=>l});var a=t(87462),r=(t(67294),t(3905));const o={},i=void 0,s={unversionedId:"data-sources/amino-acid-conservation-json",id:"version-3.18/data-sources/amino-acid-conservation-json",title:"amino-acid-conservation-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/amino-acid-conservation-json.md",sourceDirName:"data-sources",slug:"/data-sources/amino-acid-conservation-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/amino-acid-conservation-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/amino-acid-conservation-json.md",tags:[],version:"3.18",frontMatter:{}},l=[],c={toc:l},d="wrapper";function u(e){let{components:n,...t}=e;return(0,r.kt)(d,(0,a.Z)({},c,t,{components:n,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"aminoAcidConservation": {\n "scores": [0.34]\n} \n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"aminoAcidConservation"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scores"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array of doubles"),(0,r.kt)("td",{parentName:"tr",align:"left"},"percent conserved with respect to human amino acid residue. Range: 0.01 - 1.00")))))}u.isMDXComponent=!0},67632:(e,n,t)=>{t.r(n),t.d(n,{contentTitle:()=>s,default:()=>p,frontMatter:()=>i,metadata:()=>l,toc:()=>c});var a=t(87462),r=(t(67294),t(3905)),o=t(70163);const i={title:"Amino Acid Conservation"},s=void 0,l={unversionedId:"data-sources/amino-acid-conservation",id:"version-3.18/data-sources/amino-acid-conservation",title:"Amino Acid Conservation",description:"Overview",source:"@site/versioned_docs/version-3.18/data-sources/amino-acid-conservation.mdx",sourceDirName:"data-sources",slug:"/data-sources/amino-acid-conservation",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/amino-acid-conservation",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/amino-acid-conservation.mdx",tags:[],version:"3.18",frontMatter:{title:"Amino Acid Conservation"},sidebar:"docs",previous:{title:"1000 Genomes",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/1000Genomes"},next:{title:"ClinGen",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/clingen"}},c=[{value:"Overview",id:"overview",children:[],level:2},{value:"FASTA File",id:"fasta-file",children:[],level:2},{value:"Parsing FASTA",id:"parsing-fasta",children:[],level:2},{value:"Assigning scores to Nirvana transcripts",id:"assigning-scores-to-nirvana-transcripts",children:[{value:"GRCh37",id:"grch37",children:[],level:3},{value:"GRCh38",id:"grch38",children:[],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],d={toc:c},u="wrapper";function p(e){let{components:n,...t}=e;return(0,r.kt)(u,(0,a.Z)({},d,t,{components:n,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"Amino acid conservation scores are obtained from multiple alignments of vertebrate exomes to the human ones. The score indicate the frequency with which a particular AA is observed in Humans."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Siepel A, Bejerano G, Pedersen JS, Hinrichs AS, Hou M, Rosenbloom K, Clawson H, Spieth J, Hillier LW, Richards S, et al. Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. ",(0,r.kt)("strong",{parentName:"p"},"Genome Res. 2005")," Aug;15(8):1034-50. (",(0,r.kt)("a",{parentName:"p",href:"http://www.genome.org/cgi/doi/10.1101/gr.3715005"},"http://www.genome.org/cgi/doi/10.1101/gr.3715005"),")"))),(0,r.kt)("h2",{id:"fasta-file"},"FASTA File"),(0,r.kt)("p",null,"The exon alignments are provided in FASTA files as follows:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},">ENST00000641515.2_hg38_1_2 3 0 0 chr1:65565-65573+\nMKK\n>ENST00000641515.2_panTro4_1_2 3 0 0 chrUn_GL393541:146907-146915+\nMKK\n>ENST00000641515.2_gorGor3_1_2 3 0 0\n---\n>ENST00000641515.2_ponAbe2_1_2 3 0 0 chr15:99141417-99141425-\nMKK\n>ENST00000641515.2_hg38_2_2 324 0 0 chr1:69037-70008+\nVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLLHFFGGSEMVILIAMGFDRYIAICKPLHYTTIMCGNACVGIMAVTWGIGFLHSVSQLAFAVHLLFCGPNEVDSFYCDLPRVIKLACTDTYRLDIMVIANSGVLTVCSFVLLIISYTIILMTIQHRPLDKSSKALSTLTAHITVVLLFFGPCVFIYAWPFPIKSLDKFLAVFYSVITPLLNPIIYTLRNKDMKTAIRQLRKWDAHSSVKFZ\n>ENST00000641515.2_panTro4_2_2 324 0 0 chrUn_GL393541:151333-152303+\n")),(0,r.kt)("h2",{id:"parsing-fasta"},"Parsing FASTA"),(0,r.kt)("p",null,"For each Ensembl transcript, we will need to aggregate all the exons together for each of the 100 species. From there, we should get a full alignment that can be used to determine conservation. For example, for ENST00000641515.2 we have:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"Human (hg38) MKKVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLL\nChimp MKKVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFL-MLFFVFYGGIVFGNLLIVRIVVSDSHLHSPMYFLLANLSLIDLSLCSVTAPKMITDFFSQRKVISFKGCLVQIFLL\nGorilla ----------------------------------------------------------------------------------------------------------------------\nOrangutan MKKVTAEAISWNESTSKTNNSVVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVIIVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLL\nGibbon ----------------------------------------------------------------------------------------------------------------------\nRhesus MKKVTEAAISWNESTSETNNSIVTEFIFLGLSDSQELQIFLFVLFLVFYGGIVFGNLLIVITVVSDSHLHSPMYLLLANLSVVDLSLSSVTAPKMITDFFSQRKAISFKGCLVQIFLL\nMacaque MKKVTEAAISWNESTSETNNSIVTEFIFLGLSDSQELQIFLFVLFLVFYGGIVFGNLLIVITVVSDSHLHSPMYLLLANLSVIDLSLSSVTAPKMITDFFSQRKAISFKGCLVQIFLL\n")),(0,r.kt)("p",null,"If we look at position 6, we see that humans have an Alanine (A) residue. This residue is shared by Chimp and Orangutan. However, Rhesus and Macaque have a Glutamic acid (E) residue at that position. Moreover, Gorilla and Gibbon don't even have data for that transcript.\nFor position 6, we would say that we have 43% conservation (3/7) since three organisms share the same residue as humans."),(0,r.kt)("h2",{id:"assigning-scores-to-nirvana-transcripts"},"Assigning scores to Nirvana transcripts"),(0,r.kt)("p",null,"The source FASTA file comes with Ensembl/UCSC transcript ids of the transcripts used for alignments. The Nirvana cache has RefSeq and Ensembl transcripts and our first attempt was to map the given Ensembl/UCSC ids to their equivalent RefSeq/Ensembl ids. This attempt was unsuccessful since UCSC Table Browser provided mapping without version numbers. So we proceeded as follows:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"Take proteins which have a unique mapping (and hence one set of conservation scores). For ones that mapped to both ChrX and ChrY, we accepted the one from ChrX."),(0,r.kt)("li",{parentName:"ul"},"A Nirvana transcript having an exact peptide sequence match with a uniquely aligned protein is assigned the corresponding conservation scores.")),(0,r.kt)("p",null,"Unfortunately this left us with a very small number of transcripts having conservation scores."),(0,r.kt)("h3",{id:"grch37"},"GRCh37"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"Source FASTA contained 41957 protein alignments."),(0,r.kt)("li",{parentName:"ul"},"38165 proteins had unique scores."),(0,r.kt)("li",{parentName:"ul"},"88 aligned proteins existed in Nirvana cache."),(0,r.kt)("li",{parentName:"ul"},"118 transcripts had conservation scores.")),(0,r.kt)("h3",{id:"grch38"},"GRCh38"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"Source FASTA contained 110024 protein alignments."),(0,r.kt)("li",{parentName:"ul"},"88961 proteins had unique scores."),(0,r.kt)("li",{parentName:"ul"},"11688 aligned proteins existed in Nirvana cache."),(0,r.kt)("li",{parentName:"ul"},"12098 transcripts had conservation scores.")),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,"GRCh37: ",(0,r.kt)("a",{parentName:"p",href:"http://hgdownload.soe.ucsc.edu/goldenPath/hg19/multiz100way/alignments/knownGene.exonAA.fa.gz"},"http://hgdownload.soe.ucsc.edu/goldenPath/hg19/multiz100way/alignments/knownGene.exonAA.fa.gz")),(0,r.kt)("p",null,"GRCh38: ",(0,r.kt)("a",{parentName:"p",href:"http://hgdownload.soe.ucsc.edu/goldenPath/hg38/multiz100way/alignments/knownGene.exonAA.fa.gz"},"http://hgdownload.soe.ucsc.edu/goldenPath/hg38/multiz100way/alignments/knownGene.exonAA.fa.gz")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)("p",null,"Conservation scores are reported in the transcript section. One score is reported for each alt allele"),(0,r.kt)(o.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/915fca76.23e392e7.js b/assets/js/915fca76.23e392e7.js deleted file mode 100644 index 3e7e9af7..00000000 --- a/assets/js/915fca76.23e392e7.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9639,7942],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>h});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},p=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},m="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),m=d(n),u=r,h=m["".concat(s,".").concat(u)]||m[u]||c[u]||i;return n?a.createElement(h,o(o({ref:t},p),{},{components:n})):a.createElement(h,o({ref:t},p))}));function h(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[m]="string"==typeof e?e:r,o[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>m,frontMatter:()=>i,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/primate-ai-json",id:"data-sources/primate-ai-json",title:"primate-ai-json",description:"GRCh38",source:"@site/docs/data-sources/primate-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/primate-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/primate-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/primate-ai-json.md",tags:[],version:"current",frontMatter:{}},s=[{value:"GRCh38",id:"grch38",children:[],level:4},{value:"GRCh37",id:"grch37",children:[],level:4}],d={toc:s},p="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h4",{id:"grch38"},"GRCh38"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI-3D": [\n {\n "aminoAcidPosition": 2,\n "refAminoAcid": "V",\n "altAminoAcid": "M",\n "score": 0.616944,\n "scorePercentile": 0.52,\n "ensemblTranscriptId": "ENST00000335137.4",\n "refSeqTranscriptId": "NM_001005484.1"\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"aminoAcidPosition"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Amino Acid Position (1-based)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAminoAcid"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Reference Amino Acid")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAminoAcid"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Alternate Amino Acid")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ensemblTranscriptId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Transcript ID (Ensembl)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refSeqTranscriptId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Transcript ID (RefSeq)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"score"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))),(0,r.kt)("h4",{id:"grch37"},"GRCh37"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI": [\n {\n "hgnc":"TP53",\n "scorePercentile":0.3,\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC Gene Symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))))}m.isMDXComponent=!0},93556:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>c,frontMatter:()=>o,metadata:()=>s,toc:()=>d});var a=n(87462),r=(n(67294),n(3905)),i=n(20737);const o={title:"Primate AI"},l=void 0,s={unversionedId:"data-sources/primate-ai",id:"data-sources/primate-ai",title:"Primate AI",description:"Overview",source:"@site/docs/data-sources/primate-ai.mdx",sourceDirName:"data-sources",slug:"/data-sources/primate-ai",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/primate-ai",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/primate-ai.mdx",tags:[],version:"current",frontMatter:{title:"Primate AI"},sidebar:"docs",previous:{title:"PhyloP",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/phylop"},next:{title:"REVEL",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/revel"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"Primate AI 3D: GRCh38",id:"primate-ai-3d-grch38",children:[{value:"Parsing",id:"parsing",children:[{value:"CSV File",id:"csv-file",children:[],level:4},{value:"Parsing Command",id:"parsing-command",children:[],level:4}],level:3},{value:"Known Issues",id:"known-issues",children:[],level:3},{value:"Download URL",id:"download-url",children:[],level:3}],level:2},{value:"Primate AI: GRCh37",id:"primate-ai-grch37",children:[{value:"Parsing",id:"parsing-1",children:[{value:"TSV File",id:"tsv-file",children:[],level:4}],level:3},{value:"Pre-processing",id:"pre-processing",children:[{value:"Converting UCSC IDs",id:"converting-ucsc-ids",children:[],level:4},{value:"Running the Pre-Processor",id:"running-the-pre-processor",children:[],level:4}],level:3},{value:"Known Issues",id:"known-issues-1",children:[],level:3},{value:"Download URL",id:"download-url-1",children:[],level:3}],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],p={toc:d},m="wrapper";function c(e){let{components:t,...n}=e;return(0,r.kt)(m,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"Primate AI is a deep residual neural network for classifying the pathogenicity of missense mutations."),(0,r.kt)("p",null,"The newer version, PrimateAI-3D, uses a 3D convolutional neural network, to predict protein variant pathogenicity using structural information.\nThe model's innovative use of primate sequencing and structural data offers promising insights into variant interpretation and disease gene identification.\nThe predictive score range between 0 and 1, with 0 being benign and 1 being most pathogenic."),(0,r.kt)("p",null,"For more details, refer to these publications:"),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("ol",{parentName:"div"},(0,r.kt)("li",{parentName:"ol"},"Hong Gao et al. ,The landscape of tolerated genetic variation in humans and primates. ",(0,r.kt)("em",{parentName:"li"},"Science")," ",(0,r.kt)("strong",{parentName:"li"},"380"),", eabn8153 (2023). ",(0,r.kt)("a",{parentName:"li",href:"https://doi.org/10.1126/science.abn8197"},"https://doi.org/10.1126/science.abn8197")),(0,r.kt)("li",{parentName:"ol"},"Sundaram, L., Gao, H., Padigepati, S.R. et al. Predicting the clinical impact of human mutation with deep neural networks. ",(0,r.kt)("em",{parentName:"li"},"Nat Genet")," ",(0,r.kt)("strong",{parentName:"li"},"50"),", 1161\u20131170 (2018). ",(0,r.kt)("a",{parentName:"li",href:"https://doi.org/10.1038/s41588-018-0167-z"},"https://doi.org/10.1038/s41588-018-0167-z"))))),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Professional data source")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"This is a Professional data source and is not available freely. Please contact ",(0,r.kt)("a",{parentName:"p",href:"mailto:annotation_support@illumina.com"},"annotation_support@illumina.com")," if you would like to obtain it."))),(0,r.kt)("p",null,"Primate AI is available in two versions based on assembly:"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"Primate AI 3D: Only available for GRCh38"),(0,r.kt)("li",{parentName:"ol"},"Primate AI: Only available for GRCh37")),(0,r.kt)("p",null,"Both have different file structures, and information. Therefore, they are handled separately:"),(0,r.kt)("h2",{id:"primate-ai-3d-grch38"},"Primate AI 3D: GRCh38"),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("h4",{id:"csv-file"},"CSV File"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},",chr,pos,non_flipped_ref,non_flipped_alt,gene_name,change_position_1based,ref_aa,alt_aa,score_PAI3D,percentile_PAI3D,refseq\n0,chr1,69094,G,A,ENST00000335137.4,2,V,M,0.6169436463713646,0.5200308441794135,NM_001005484.1\n1,chr1,69094,G,C,ENST00000335137.4,2,V,L,0.5557043975591658,0.4271457250214688,NM_001005484.1\n2,chr1,69094,G,T,ENST00000335137.4,2,V,L,0.5557043975591658,0.4271457391722522,NM_001005484.1\n")),(0,r.kt)("p",null,"From the CSV file, all columns are parsed:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"chr")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"ref")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"alt")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"gene_name")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"change_position_1based")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"ref_aa")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"alt_aa")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"score_PAI3D")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"percentile_PAI3D")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"refseq"))),(0,r.kt)("p",null,"The fields ",(0,r.kt)("inlineCode",{parentName:"p"},"gene_name")," and ",(0,r.kt)("inlineCode",{parentName:"p"},"refseq")," define the Ensembl and RefSeq transcript IDs respectively.\nThese transcripts are passed as-is and some of them might be unrecognized/deprecated by RefSeq/Ensembl."),(0,r.kt)("h4",{id:"parsing-command"},"Parsing Command"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-shell"},'dotnet SAUtils.dll \\\nPrimateAi \\\n--r "${References}/Homo_sapiens.GRCh38.Nirvana.dat" \\\n--i "${ExternalDataSources}/PrimateAI/3D/PAI3D_wholeProteome_23_04_11.percentiles.pkg.refseq.csv.gz" \\\n--o "${SaUtilsOutput]"\n')),(0,r.kt)("h3",{id:"known-issues"},"Known Issues"),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Some transcript IDs defined in the data file are obsolete, retired, or updated.\nThey are not removed or modified by Illumina Connected Annotations, and are passed as-is from the PrimateAI-3D data source."),(0,r.kt)("h4",{parentName:"div",id:"example"},"Example:"),(0,r.kt)("p",{parentName:"div"},(0,r.kt)("strong",{parentName:"p"},"ENST00000643905.1")," transcript is retired according to ",(0,r.kt)("a",{parentName:"p",href:"https://useast.ensembl.org/Homo_sapiens/Transcript/Idhistory?db=core;t=ENST00000643905"},"Ensembl")),(0,r.kt)("p",{parentName:"div"},(0,r.kt)("strong",{parentName:"p"},"NM_182838.2")," transcript is removed because it is a pseudo-gene according to ",(0,r.kt)("a",{parentName:"p",href:"https://www.ncbi.nlm.nih.gov/nuccore/NM_182838.3"},"RefSeq")))),(0,r.kt)("h3",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://primad.basespace.illumina.com/"},"https://primad.basespace.illumina.com/")),(0,r.kt)("h2",{id:"primate-ai-grch37"},"Primate AI: GRCh37"),(0,r.kt)("h3",{id:"parsing-1"},"Parsing"),(0,r.kt)("h4",{id:"tsv-file"},"TSV File"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr pos ref alt refAA altAA strand_1pos_0neg trinucleotide_context UCSC_gene ExAC_coverage primateDL_score\nchr10 1046704 C T R C 1 CCG uc001ift.3 45.49 0.849114537239\nchr10 1046704 C G R G 1 CCG uc001ift.3 45.49 0.795686006546\n")),(0,r.kt)("p",null,"From the TSV file, we're mainly interested in the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"chr")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"ref")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"alt")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"primateDL_score"))),(0,r.kt)("p",null,"We also use ",(0,r.kt)("inlineCode",{parentName:"p"},"UCSC_gene")," to filter out variants that don't have matching gene models in Illumina Connected Annotations."),(0,r.kt)("h3",{id:"pre-processing"},"Pre-processing"),(0,r.kt)("h4",{id:"converting-ucsc-ids"},"Converting UCSC IDs"),(0,r.kt)("p",null,"Primate AI only provides UCSC IDs. As an initial pre-processing step, we'll need to convert these to either Entrez or Ensembl Gene IDs."),(0,r.kt)("p",null,"The following queries are used to download the conversions from UCSC:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-bash"},'mysql -h genome-mysql.soe.ucsc.edu -u genome -A -P 3306 \\\n -e "select * FROM knownToLocusLink;" hg19 > ucsc_locuslink.tsv\n\nmysql -h genome-mysql.soe.ucsc.edu -u genome -A -P 3306 \\\n -e "select knownToEnsembl.name, knownToEnsembl.value, ensGene.name2 FROM knownToEnsembl, ensGene WHERE knownToEnsembl.value = ensGene.name;" \\\n hg19 > ucsc_ensembl.tsv\n')),(0,r.kt)("h4",{id:"running-the-pre-processor"},"Running the Pre-Processor"),(0,r.kt)("p",null,"The Primate AI pre-processor can be run as follows:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet PrimateAiPreProcessor.dll UGA_develop.tsv PrimateAI_scores_v0.2.tsv.gz \\\n ucsc_locuslink.tsv ucsc_ensembl.tsv PrimateAI_0.2_GRCh37.tsv.gz\n")),(0,r.kt)("p",null,"During conversion, 0.5% of the UCSC Ids cannot be converted to either Entrez or Ensembl gene IDs. Once the gene IDs have been acquired, we check to see which are available in Illumina Connected Annotations."),(0,r.kt)("p",null,"The following Entrez Gene IDs were not found:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"399753\n401980\n504189\n504191\n100293534\n")),(0,r.kt)("p",null,"Here is the output from the pre-processor:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"- loading UCSC to Entrez Gene ID dictionary... 73,432 genes loaded.\n- loading UCSC to Ensembl Gene ID dictionary... 76,178 genes loaded.\n- loading UGA gene ID to gene dictionary... 103,277 genes loaded.\n- parsing Primate AI variants... 70,121,953 variants parsed.\n\n# variants with unknown gene ID: 27,253 / 70,121,953\n# genes with unknown gene ID: 109 / 19,614\n\n# variants not in UGA: 2,036 / 70,121,953\n# genes not in UGA: 6 / 19,614\n")),(0,r.kt)("h3",{id:"known-issues-1"},"Known Issues"),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"The Primate AI data set provides raw scores, but the scores are biased according to gene context. I.e. a 0.4 means something different in ",(0,r.kt)("inlineCode",{parentName:"p"},"TP53")," than it does in ",(0,r.kt)("inlineCode",{parentName:"p"},"KRAS"),"."),(0,r.kt)("p",{parentName:"div"},"As a result, the Primate AI team provided guidance on aggregating these scores and presenting them as percentiles with respect to the associated gene. According to their research, the 25",(0,r.kt)("sup",null,"th")," percentile is a good proxy for benign variants and the 75",(0,r.kt)("sup",null,"th")," percentile is a good proxy for pathogenic variants."))),(0,r.kt)("h3",{id:"download-url-1"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://basespace.illumina.com/s/cPgCSmecvhb4"},"https://basespace.illumina.com/s/cPgCSmecvhb4")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(i.default,{mdxType:"JSON"}))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/915fca76.79d8286d.js b/assets/js/915fca76.79d8286d.js new file mode 100644 index 00000000..2df27be9 --- /dev/null +++ b/assets/js/915fca76.79d8286d.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9639,7942],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>h});var a=n(7294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},p=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},m="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),m=d(n),u=r,h=m["".concat(s,".").concat(u)]||m[u]||c[u]||i;return n?a.createElement(h,o(o({ref:t},p),{},{components:n})):a.createElement(h,o({ref:t},p))}));function h(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[m]="string"==typeof e?e:r,o[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>m,frontMatter:()=>i,metadata:()=>l,toc:()=>s});var a=n(7462),r=(n(7294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/primate-ai-json",id:"data-sources/primate-ai-json",title:"primate-ai-json",description:"GRCh38",source:"@site/docs/data-sources/primate-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/primate-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/primate-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/primate-ai-json.md",tags:[],version:"current",frontMatter:{}},s=[{value:"GRCh38",id:"grch38",children:[],level:4},{value:"GRCh37",id:"grch37",children:[],level:4}],d={toc:s},p="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h4",{id:"grch38"},"GRCh38"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI-3D": [\n {\n "aminoAcidPosition": 2,\n "refAminoAcid": "V",\n "altAminoAcid": "M",\n "score": 0.616944,\n "scorePercentile": 0.52,\n "ensemblTranscriptId": "ENST00000335137.4",\n "refSeqTranscriptId": "NM_001005484.1"\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"aminoAcidPosition"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Amino Acid Position (1-based)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAminoAcid"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Reference Amino Acid")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAminoAcid"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Alternate Amino Acid")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ensemblTranscriptId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Transcript ID (Ensembl)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refSeqTranscriptId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Transcript ID (RefSeq)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"score"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))),(0,r.kt)("h4",{id:"grch37"},"GRCh37"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI": [\n {\n "hgnc":"TP53",\n "scorePercentile":0.3,\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC Gene Symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))))}m.isMDXComponent=!0},3556:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>c,frontMatter:()=>o,metadata:()=>s,toc:()=>d});var a=n(7462),r=(n(7294),n(3905)),i=n(737);const o={title:"Primate AI"},l=void 0,s={unversionedId:"data-sources/primate-ai",id:"data-sources/primate-ai",title:"Primate AI",description:"Overview",source:"@site/docs/data-sources/primate-ai.mdx",sourceDirName:"data-sources",slug:"/data-sources/primate-ai",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/primate-ai",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/primate-ai.mdx",tags:[],version:"current",frontMatter:{title:"Primate AI"},sidebar:"docs",previous:{title:"PhyloP",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/phylop"},next:{title:"REVEL",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/revel"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"Primate AI 3D: GRCh38",id:"primate-ai-3d-grch38",children:[{value:"Parsing",id:"parsing",children:[{value:"CSV File",id:"csv-file",children:[],level:4},{value:"Parsing Command",id:"parsing-command",children:[],level:4}],level:3},{value:"Known Issues",id:"known-issues",children:[],level:3},{value:"Download URL",id:"download-url",children:[],level:3}],level:2},{value:"Primate AI: GRCh37",id:"primate-ai-grch37",children:[{value:"Parsing",id:"parsing-1",children:[{value:"TSV File",id:"tsv-file",children:[],level:4}],level:3},{value:"Pre-processing",id:"pre-processing",children:[{value:"Converting UCSC IDs",id:"converting-ucsc-ids",children:[],level:4},{value:"Running the Pre-Processor",id:"running-the-pre-processor",children:[],level:4}],level:3},{value:"Known Issues",id:"known-issues-1",children:[],level:3},{value:"Download URL",id:"download-url-1",children:[],level:3}],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],p={toc:d},m="wrapper";function c(e){let{components:t,...n}=e;return(0,r.kt)(m,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"Primate AI is a deep residual neural network for classifying the pathogenicity of missense mutations."),(0,r.kt)("p",null,"The newer version, PrimateAI-3D, uses a 3D convolutional neural network, to predict protein variant pathogenicity using structural information.\nThe model's innovative use of primate sequencing and structural data offers promising insights into variant interpretation and disease gene identification.\nThe predictive score range between 0 and 1, with 0 being benign and 1 being most pathogenic."),(0,r.kt)("p",null,"For more details, refer to these publications:"),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("ol",{parentName:"div"},(0,r.kt)("li",{parentName:"ol"},"Hong Gao et al. ,The landscape of tolerated genetic variation in humans and primates. ",(0,r.kt)("em",{parentName:"li"},"Science")," ",(0,r.kt)("strong",{parentName:"li"},"380"),", eabn8153 (2023). ",(0,r.kt)("a",{parentName:"li",href:"https://doi.org/10.1126/science.abn8197"},"https://doi.org/10.1126/science.abn8197")),(0,r.kt)("li",{parentName:"ol"},"Sundaram, L., Gao, H., Padigepati, S.R. et al. Predicting the clinical impact of human mutation with deep neural networks. ",(0,r.kt)("em",{parentName:"li"},"Nat Genet")," ",(0,r.kt)("strong",{parentName:"li"},"50"),", 1161\u20131170 (2018). ",(0,r.kt)("a",{parentName:"li",href:"https://doi.org/10.1038/s41588-018-0167-z"},"https://doi.org/10.1038/s41588-018-0167-z"))))),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Professional data source")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"This is a Professional data source and is not available freely. Please contact ",(0,r.kt)("a",{parentName:"p",href:"mailto:annotation_support@illumina.com"},"annotation_support@illumina.com")," if you would like to obtain it."))),(0,r.kt)("p",null,"Primate AI is available in two versions based on assembly:"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"Primate AI 3D: Only available for GRCh38"),(0,r.kt)("li",{parentName:"ol"},"Primate AI: Only available for GRCh37")),(0,r.kt)("p",null,"Both have different file structures, and information. Therefore, they are handled separately:"),(0,r.kt)("h2",{id:"primate-ai-3d-grch38"},"Primate AI 3D: GRCh38"),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("h4",{id:"csv-file"},"CSV File"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},",chr,pos,non_flipped_ref,non_flipped_alt,gene_name,change_position_1based,ref_aa,alt_aa,score_PAI3D,percentile_PAI3D,refseq\n0,chr1,69094,G,A,ENST00000335137.4,2,V,M,0.6169436463713646,0.5200308441794135,NM_001005484.1\n1,chr1,69094,G,C,ENST00000335137.4,2,V,L,0.5557043975591658,0.4271457250214688,NM_001005484.1\n2,chr1,69094,G,T,ENST00000335137.4,2,V,L,0.5557043975591658,0.4271457391722522,NM_001005484.1\n")),(0,r.kt)("p",null,"From the CSV file, all columns are parsed:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"chr")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"ref")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"alt")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"gene_name")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"change_position_1based")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"ref_aa")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"alt_aa")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"score_PAI3D")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"percentile_PAI3D")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"refseq"))),(0,r.kt)("p",null,"The fields ",(0,r.kt)("inlineCode",{parentName:"p"},"gene_name")," and ",(0,r.kt)("inlineCode",{parentName:"p"},"refseq")," define the Ensembl and RefSeq transcript IDs respectively.\nThese transcripts are passed as-is and some of them might be unrecognized/deprecated by RefSeq/Ensembl."),(0,r.kt)("h4",{id:"parsing-command"},"Parsing Command"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-shell"},'dotnet SAUtils.dll \\\nPrimateAi \\\n--r "${References}/Homo_sapiens.GRCh38.Nirvana.dat" \\\n--i "${ExternalDataSources}/PrimateAI/3D/PAI3D_wholeProteome_23_04_11.percentiles.pkg.refseq.csv.gz" \\\n--o "${SaUtilsOutput]"\n')),(0,r.kt)("h3",{id:"known-issues"},"Known Issues"),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Some transcript IDs defined in the data file are obsolete, retired, or updated.\nThey are not removed or modified by Illumina Connected Annotations, and are passed as-is from the PrimateAI-3D data source."),(0,r.kt)("h4",{parentName:"div",id:"example"},"Example:"),(0,r.kt)("p",{parentName:"div"},(0,r.kt)("strong",{parentName:"p"},"ENST00000643905.1")," transcript is retired according to ",(0,r.kt)("a",{parentName:"p",href:"https://useast.ensembl.org/Homo_sapiens/Transcript/Idhistory?db=core;t=ENST00000643905"},"Ensembl")),(0,r.kt)("p",{parentName:"div"},(0,r.kt)("strong",{parentName:"p"},"NM_182838.2")," transcript is removed because it is a pseudo-gene according to ",(0,r.kt)("a",{parentName:"p",href:"https://www.ncbi.nlm.nih.gov/nuccore/NM_182838.3"},"RefSeq")))),(0,r.kt)("h3",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://primad.basespace.illumina.com/"},"https://primad.basespace.illumina.com/")),(0,r.kt)("h2",{id:"primate-ai-grch37"},"Primate AI: GRCh37"),(0,r.kt)("h3",{id:"parsing-1"},"Parsing"),(0,r.kt)("h4",{id:"tsv-file"},"TSV File"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr pos ref alt refAA altAA strand_1pos_0neg trinucleotide_context UCSC_gene ExAC_coverage primateDL_score\nchr10 1046704 C T R C 1 CCG uc001ift.3 45.49 0.849114537239\nchr10 1046704 C G R G 1 CCG uc001ift.3 45.49 0.795686006546\n")),(0,r.kt)("p",null,"From the TSV file, we're mainly interested in the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"chr")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"ref")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"alt")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"primateDL_score"))),(0,r.kt)("p",null,"We also use ",(0,r.kt)("inlineCode",{parentName:"p"},"UCSC_gene")," to filter out variants that don't have matching gene models in Illumina Connected Annotations."),(0,r.kt)("h3",{id:"pre-processing"},"Pre-processing"),(0,r.kt)("h4",{id:"converting-ucsc-ids"},"Converting UCSC IDs"),(0,r.kt)("p",null,"Primate AI only provides UCSC IDs. As an initial pre-processing step, we'll need to convert these to either Entrez or Ensembl Gene IDs."),(0,r.kt)("p",null,"The following queries are used to download the conversions from UCSC:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-bash"},'mysql -h genome-mysql.soe.ucsc.edu -u genome -A -P 3306 \\\n -e "select * FROM knownToLocusLink;" hg19 > ucsc_locuslink.tsv\n\nmysql -h genome-mysql.soe.ucsc.edu -u genome -A -P 3306 \\\n -e "select knownToEnsembl.name, knownToEnsembl.value, ensGene.name2 FROM knownToEnsembl, ensGene WHERE knownToEnsembl.value = ensGene.name;" \\\n hg19 > ucsc_ensembl.tsv\n')),(0,r.kt)("h4",{id:"running-the-pre-processor"},"Running the Pre-Processor"),(0,r.kt)("p",null,"The Primate AI pre-processor can be run as follows:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet PrimateAiPreProcessor.dll UGA_develop.tsv PrimateAI_scores_v0.2.tsv.gz \\\n ucsc_locuslink.tsv ucsc_ensembl.tsv PrimateAI_0.2_GRCh37.tsv.gz\n")),(0,r.kt)("p",null,"During conversion, 0.5% of the UCSC Ids cannot be converted to either Entrez or Ensembl gene IDs. Once the gene IDs have been acquired, we check to see which are available in Illumina Connected Annotations."),(0,r.kt)("p",null,"The following Entrez Gene IDs were not found:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"399753\n401980\n504189\n504191\n100293534\n")),(0,r.kt)("p",null,"Here is the output from the pre-processor:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"- loading UCSC to Entrez Gene ID dictionary... 73,432 genes loaded.\n- loading UCSC to Ensembl Gene ID dictionary... 76,178 genes loaded.\n- loading UGA gene ID to gene dictionary... 103,277 genes loaded.\n- parsing Primate AI variants... 70,121,953 variants parsed.\n\n# variants with unknown gene ID: 27,253 / 70,121,953\n# genes with unknown gene ID: 109 / 19,614\n\n# variants not in UGA: 2,036 / 70,121,953\n# genes not in UGA: 6 / 19,614\n")),(0,r.kt)("h3",{id:"known-issues-1"},"Known Issues"),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"The Primate AI data set provides raw scores, but the scores are biased according to gene context. I.e. a 0.4 means something different in ",(0,r.kt)("inlineCode",{parentName:"p"},"TP53")," than it does in ",(0,r.kt)("inlineCode",{parentName:"p"},"KRAS"),"."),(0,r.kt)("p",{parentName:"div"},"As a result, the Primate AI team provided guidance on aggregating these scores and presenting them as percentiles with respect to the associated gene. According to their research, the 25",(0,r.kt)("sup",null,"th")," percentile is a good proxy for benign variants and the 75",(0,r.kt)("sup",null,"th")," percentile is a good proxy for pathogenic variants."))),(0,r.kt)("h3",{id:"download-url-1"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://basespace.illumina.com/s/cPgCSmecvhb4"},"https://basespace.illumina.com/s/cPgCSmecvhb4")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(i.default,{mdxType:"JSON"}))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/9287778e.73f1790f.js b/assets/js/9287778e.73f1790f.js deleted file mode 100644 index bb9e40cb..00000000 --- a/assets/js/9287778e.73f1790f.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3085],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>h});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},d=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},u="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,d=l(e,["components","mdxType","originalType","parentName"]),u=c(n),m=i,h=u["".concat(s,".").concat(m)]||u[m]||p[m]||r;return n?a.createElement(h,o(o({ref:t},d),{},{components:n})):a.createElement(h,o({ref:t},d))}));function h(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,o=new Array(r);o[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[u]="string"==typeof e?e:i,o[1]=l;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>u,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={title:"Getting Started"},o=void 0,l={unversionedId:"introduction/getting-started",id:"version-3.18/introduction/getting-started",title:"Getting Started",description:"Nirvana is written in C# using .NET Core (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files.",source:"@site/versioned_docs/version-3.18/introduction/getting-started.md",sourceDirName:"introduction",slug:"/introduction/getting-started",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/introduction/getting-started",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/introduction/getting-started.md",tags:[],version:"3.18",frontMatter:{title:"Getting Started"},sidebar:"docs",previous:{title:"Dependencies",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/introduction/dependencies"},next:{title:"Parsing Nirvana JSON",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/introduction/parsing-json"}},s=[{value:"Quick Start",id:"quick-start",children:[],level:2},{value:"Getting Nirvana",id:"getting-nirvana",children:[{value:"Compile from Source",id:"compile-from-source",children:[],level:3},{value:"GitHub Release Notes",id:"github-release-notes",children:[],level:3},{value:"Docker",id:"docker",children:[],level:3}],level:2},{value:"Downloading the data files",id:"downloading-the-data-files",children:[],level:2},{value:"Download a test VCF file",id:"download-a-test-vcf-file",children:[],level:2},{value:"Running Nirvana",id:"running-nirvana",children:[],level:2}],c={toc:s},d="wrapper";function u(e){let{components:t,...n}=e;return(0,i.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("p",null,"Nirvana is written in C# using ",(0,i.kt)("a",{parentName:"p",href:"https://www.microsoft.com/net/download/core"},".NET Core")," (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files."),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Nirvana currently uses .NET Core 3.1 or later. Please make sure that you have the most current runtime from the ",(0,i.kt)("a",{parentName:"p",href:"https://www.microsoft.com/net/download/core"},".NET Core downloads")," page."))),(0,i.kt)("h2",{id:"quick-start"},"Quick Start"),(0,i.kt)("p",null,"If you want to get started right away, we've created ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestNirvana.sh"},"a script")," that downloads Nirvana, compiles it, and starts annotating a test file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"curl -O https://illumina.github.io/NirvanaDocumentation/files/TestNirvana.sh\nbash ./TestNirvana.sh\n")),(0,i.kt)("p",null,"We have verified that this script works on Windows (using Git Bash or WSL), Linux, and Mac OS X."),(0,i.kt)("h2",{id:"getting-nirvana"},"Getting Nirvana"),(0,i.kt)("h3",{id:"compile-from-source"},"Compile from Source"),(0,i.kt)("p",null,"The following will grab the latest version of Nirvana from GitHub and compile it using the .NET Core compiler:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"git clone https://github.com/Illumina/Nirvana.git\ncd Nirvana\ndotnet build -c Release\n")),(0,i.kt)("h3",{id:"github-release-notes"},"GitHub Release Notes"),(0,i.kt)("p",null,"Alternatively, you can grab the latest binaries from our ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/Illumina/Nirvana/releases"},"GitHub Releases")," page:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"mkdir -p Nirvana/Data\ncd Nirvana\nunzip Nirvana-3.16.1-dotnet-3.1.0.zip\n")),(0,i.kt)("h3",{id:"docker"},"Docker"),(0,i.kt)("p",null,"You can find us on ",(0,i.kt)("a",{parentName:"p",href:"https://hub.docker.com/repository/docker/annotation/nirvana"},"Docker Hub")," under ",(0,i.kt)("inlineCode",{parentName:"p"},"annotation/nirvana"),":"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"We think Docker is fantastic. However, because our data files are usually accessed through a Docker volume, there is a noticeable performance penalty when running Nirvana in Docker."))),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"mkdir -p Nirvana/Data\ncd Nirvana\ndocker pull annotation/nirvana:3.14\n")),(0,i.kt)("p",null,"For Docker, we have special instructions for running the Downloader:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"sudo docker run --rm -it -v Data:/scratch annotation/nirvana:3.14 dotnet \\\n /opt/nirvana/Downloader.dll --ga GRCh37 -o /scratch\n")),(0,i.kt)("p",null,"Similarly, we have special instructions for running Nirvana (Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.vcf.gz"},"a toy VCF")," in case you need it):"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"sudo docker run --rm -it -v Data:/scratch annotation/nirvana:3.14 dotnet \\\n /opt/nirvana/Nirvana.dll -c /scratch/Cache/GRCh37/Both \\\n -r /scratch/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n --sd /scratch/SupplementaryAnnotation/GRCh37 \\\n -i /scratch/HiSeq.10000.vcf.gz -o /scratch/HiSeq\n")),(0,i.kt)("h2",{id:"downloading-the-data-files"},"Downloading the data files"),(0,i.kt)("p",null,"To download the latest data sources (or update the ones that you already have), use the following command to automate the download from S3:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp3.1/Downloader.dll \\\n --ga GRCh37 \\\n -o Data\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"--ga")," argument specifies the genome assembly which can be ",(0,i.kt)("inlineCode",{parentName:"li"},"GRCh37"),", ",(0,i.kt)("inlineCode",{parentName:"li"},"GRCh38"),", or ",(0,i.kt)("inlineCode",{parentName:"li"},"both"),"."),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output directory")),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Glitches in the Matrix")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Every once in a while, the download process does not go smoothly. Perhaps the internet connection cut out or you ran out of disk space. The Downloader attempts to detect these situations by checking the file sizes at the very end. If you see that a file was marked ",(0,i.kt)("inlineCode",{parentName:"p"},"truncated"),", try fixing the root cause and running the downloader again."))),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"From time to time, you can re-run the Downloader to get the latest annotation files. It will only download the files that changed."))),(0,i.kt)("h2",{id:"download-a-test-vcf-file"},"Download a test VCF file"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.vcf.gz"},"a toy VCF file")," you can play around with:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"curl -O https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.vcf.gz\n")),(0,i.kt)("h2",{id:"running-nirvana"},"Running Nirvana"),(0,i.kt)("p",null,"Once you have downloaded the data sets, use the following command to annotate your VCF:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp3.1/Nirvana.dll \\\n -c Data/Cache/GRCh37/Both \\\n --sd Data/SupplementaryAnnotation/GRCh37 \\\n -r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n -i HiSeq.10000.vcf.gz \\\n -o HiSeq.10000\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-c")," argument specifies the cache prefix"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"--sd")," argument specifies the supplementary annotation directory"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-r")," argument specifies the compressed reference path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input VCF path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output filename prefix")),(0,i.kt)("p",null,"When running Nirvana, performance metrics are shown as it evaluates each chromosome in the input VCF file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"---------------------------------------------------------------------------\nNirvana (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.16.1\n---------------------------------------------------------------------------\n\nInitialization Time Positions/s\n---------------------------------------------------------------------------\nCache 00:00:01.2\nSA Position Scan 00:00:00.1 55,270\n\nReference Preload Annotation Variants/s\n---------------------------------------------------------------------------\nchr1 00:00:00.1 00:00:01.5 6,323\n\nSummary Time Percent\n---------------------------------------------------------------------------\nInitialization 00:00:01.3 23.9 %\nPreload 00:00:00.1 2.9 %\nAnnotation 00:00:01.5 27.2 %\n\nPeak memory usage: 1.434 GB\nTime: 00:00:05.2\n")),(0,i.kt)("p",null,"The output will be a JSON file called ",(0,i.kt)("inlineCode",{parentName:"p"},"HiSeq.10000.json.gz"),". Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.json.gz"},"the full JSON file"),"."))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/935f2afb.0c7b267f.js b/assets/js/935f2afb.0c7b267f.js new file mode 100644 index 00000000..ae7b2936 --- /dev/null +++ b/assets/js/935f2afb.0c7b267f.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[53],{1109:e=>{e.exports=JSON.parse('{"pluginId":"default","version":"current","label":"3.22 (unreleased)","banner":null,"badge":false,"className":"docs-version-current","isLast":true,"docsSidebars":{"docs":[{"type":"category","label":"Introduction","items":[{"type":"link","label":"Introduction","href":"/IlluminaConnectedAnnotationsDocumentation/","docId":"introduction/introduction"},{"type":"link","label":"Dependencies","href":"/IlluminaConnectedAnnotationsDocumentation/introduction/dependencies","docId":"introduction/dependencies"},{"type":"link","label":"Getting Started","href":"/IlluminaConnectedAnnotationsDocumentation/introduction/getting-started","docId":"introduction/getting-started"}],"collapsible":true,"collapsed":true},{"type":"category","label":"Data Sources","items":[{"type":"link","label":"1000 Genomes","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes","docId":"data-sources/1000Genomes"},{"type":"link","label":"Amino Acid Conservation","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/amino-acid-conservation","docId":"data-sources/amino-acid-conservation"},{"type":"link","label":"Cancer Hotspots","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/cancer-hotspots","docId":"data-sources/cancer-hotspots"},{"type":"link","label":"ClinGen","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen","docId":"data-sources/clingen"},{"type":"link","label":"ClinVar","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/clinvar","docId":"data-sources/clinvar"},{"type":"link","label":"COSMIC","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic","docId":"data-sources/cosmic"},{"type":"link","label":"DANN","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/dann","docId":"data-sources/dann"},{"type":"link","label":"dbSNP","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/dbsnp","docId":"data-sources/dbsnp"},{"type":"link","label":"DECIPHER","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/decipher","docId":"data-sources/decipher"},{"type":"link","label":"FusionCatcher","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/fusioncatcher","docId":"data-sources/fusioncatcher"},{"type":"link","label":"GERP","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/gerp","docId":"data-sources/gerp"},{"type":"link","label":"GME Variome","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/gme","docId":"data-sources/gme"},{"type":"link","label":"gnomAD","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad","docId":"data-sources/gnomad"},{"type":"link","label":"Mitochondrial Heteroplasmy","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/mito-heteroplasmy","docId":"data-sources/mito-heteroplasmy"},{"type":"link","label":"MITOMAP","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap","docId":"data-sources/mitomap"},{"type":"link","label":"OMIM","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/omim","docId":"data-sources/omim"},{"type":"link","label":"PhyloP","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/phylop","docId":"data-sources/phylop"},{"type":"link","label":"Primate AI","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/primate-ai","docId":"data-sources/primate-ai"},{"type":"link","label":"REVEL","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/revel","docId":"data-sources/revel"},{"type":"link","label":"Splice AI","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/splice-ai","docId":"data-sources/splice-ai"},{"type":"link","label":"TOPMed","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/topmed","docId":"data-sources/topmed"}],"collapsible":true,"collapsed":true},{"type":"category","label":"File Formats","items":[{"type":"link","label":"Illumina Connected Annotations JSON File Format","href":"/IlluminaConnectedAnnotationsDocumentation/file-formats/illumina-annotator-json-file-format","docId":"file-formats/illumina-annotator-json-file-format"},{"type":"link","label":"Custom Annotations","href":"/IlluminaConnectedAnnotationsDocumentation/file-formats/custom-annotations","docId":"file-formats/custom-annotations"}],"collapsible":true,"collapsed":true},{"type":"category","label":"Core Functionality","items":[{"type":"link","label":"Canonical Transcripts","href":"/IlluminaConnectedAnnotationsDocumentation/core-functionality/canonical-transcripts","docId":"core-functionality/canonical-transcripts"},{"type":"link","label":"Transcript Consequence Impact","href":"/IlluminaConnectedAnnotationsDocumentation/core-functionality/transcript-consequence-impacts","docId":"core-functionality/transcript-consequence-impacts"},{"type":"link","label":"Gene Fusion Detection","href":"/IlluminaConnectedAnnotationsDocumentation/core-functionality/gene-fusions","docId":"core-functionality/gene-fusions"},{"type":"link","label":"Variant IDs","href":"/IlluminaConnectedAnnotationsDocumentation/core-functionality/variant-ids","docId":"core-functionality/variant-ids"}],"collapsible":true,"collapsed":true},{"type":"category","label":"Utilities","items":[{"type":"link","label":"Jasix","href":"/IlluminaConnectedAnnotationsDocumentation/utilities/jasix","docId":"utilities/jasix"},{"type":"link","label":"SAUtils","href":"/IlluminaConnectedAnnotationsDocumentation/utilities/sautils","docId":"utilities/sautils"}],"collapsible":true,"collapsed":true}]},"docs":{"core-functionality/canonical-transcripts":{"id":"core-functionality/canonical-transcripts","title":"Canonical Transcripts","description":"Overview","sidebar":"docs"},"core-functionality/gene-fusions":{"id":"core-functionality/gene-fusions","title":"Gene Fusion Detection","description":"Overview","sidebar":"docs"},"core-functionality/transcript-consequence-impacts":{"id":"core-functionality/transcript-consequence-impacts","title":"Transcript Consequence Impact","description":"Overview","sidebar":"docs"},"core-functionality/variant-ids":{"id":"core-functionality/variant-ids","title":"Variant IDs","description":"Overview","sidebar":"docs"},"data-sources/1000Genomes":{"id":"data-sources/1000Genomes","title":"1000 Genomes","description":"Overview","sidebar":"docs"},"data-sources/1000Genomes-snv-json":{"id":"data-sources/1000Genomes-snv-json","title":"1000Genomes-snv-json","description":"| Field | Type | Notes |"},"data-sources/1000Genomes-sv-json":{"id":"data-sources/1000Genomes-sv-json","title":"1000Genomes-sv-json","description":"| Field | Type | Notes |"},"data-sources/amino-acid-conservation":{"id":"data-sources/amino-acid-conservation","title":"Amino Acid Conservation","description":"Overview","sidebar":"docs"},"data-sources/amino-acid-conservation-json":{"id":"data-sources/amino-acid-conservation-json","title":"amino-acid-conservation-json","description":"| Field | Type | Notes |"},"data-sources/cancer-hotspots":{"id":"data-sources/cancer-hotspots","title":"Cancer Hotspots","description":"Overview","sidebar":"docs"},"data-sources/clingen":{"id":"data-sources/clingen","title":"ClinGen","description":"Overview","sidebar":"docs"},"data-sources/clingen-dosage-json":{"id":"data-sources/clingen-dosage-json","title":"clingen-dosage-json","description":"| Field | Type | Notes |"},"data-sources/clingen-gene-validity-json":{"id":"data-sources/clingen-gene-validity-json","title":"clingen-gene-validity-json","description":"| Field | Type | Notes |"},"data-sources/clingen-json":{"id":"data-sources/clingen-json","title":"clingen-json","description":"| Field | Type | Notes |"},"data-sources/clinvar":{"id":"data-sources/clinvar","title":"ClinVar","description":"Overview","sidebar":"docs"},"data-sources/clinvar-json":{"id":"data-sources/clinvar-json","title":"clinvar-json","description":"small variants:"},"data-sources/cosmic":{"id":"data-sources/cosmic","title":"COSMIC","description":"Overview","sidebar":"docs"},"data-sources/cosmic-cancer-gene-census":{"id":"data-sources/cosmic-cancer-gene-census","title":"cosmic-cancer-gene-census","description":"| Field | Type | Notes |"},"data-sources/cosmic-gene-fusion-json":{"id":"data-sources/cosmic-gene-fusion-json","title":"cosmic-gene-fusion-json","description":"| Field | Type | Notes |"},"data-sources/cosmic-json":{"id":"data-sources/cosmic-json","title":"cosmic-json","description":"| Field | Type | Notes |"},"data-sources/dann":{"id":"data-sources/dann","title":"DANN","description":"Overview","sidebar":"docs"},"data-sources/dann-json":{"id":"data-sources/dann-json","title":"dann-json","description":"| Field | Type | Notes |"},"data-sources/dbsnp":{"id":"data-sources/dbsnp","title":"dbSNP","description":"Overview","sidebar":"docs"},"data-sources/dbsnp-json":{"id":"data-sources/dbsnp-json","title":"dbsnp-json","description":"| Field | Type | Notes |"},"data-sources/decipher":{"id":"data-sources/decipher","title":"DECIPHER","description":"Overview","sidebar":"docs"},"data-sources/decipher-json":{"id":"data-sources/decipher-json","title":"decipher-json","description":"| Field | Type | Notes |"},"data-sources/fusioncatcher":{"id":"data-sources/fusioncatcher","title":"FusionCatcher","description":"Overview","sidebar":"docs"},"data-sources/fusioncatcher-json":{"id":"data-sources/fusioncatcher-json","title":"fusioncatcher-json","description":"| Field | Type | Notes |"},"data-sources/gerp":{"id":"data-sources/gerp","title":"GERP","description":"Overview","sidebar":"docs"},"data-sources/gerp-json":{"id":"data-sources/gerp-json","title":"gerp-json","description":"| Field | Type | Notes |"},"data-sources/gme":{"id":"data-sources/gme","title":"GME Variome","description":"Overview","sidebar":"docs"},"data-sources/gme-json":{"id":"data-sources/gme-json","title":"gme-json","description":"| Field | Type | Notes |"},"data-sources/gnomad":{"id":"data-sources/gnomad","title":"gnomAD","description":"Overview","sidebar":"docs"},"data-sources/gnomad-lof-json":{"id":"data-sources/gnomad-lof-json","title":"gnomad-lof-json","description":"| Field | Type | Notes |"},"data-sources/gnomad-small-variants-json":{"id":"data-sources/gnomad-small-variants-json","title":"gnomad-small-variants-json","description":"| Field | Type | Notes |"},"data-sources/gnomad-structural-variants-data_description":{"id":"data-sources/gnomad-structural-variants-data_description","title":"gnomad-structural-variants-data_description","description":"Bed Example"},"data-sources/gnomad-structural-variants-json":{"id":"data-sources/gnomad-structural-variants-json","title":"gnomad-structural-variants-json","description":"| Field | Type | Notes |"},"data-sources/mito-heteroplasmy":{"id":"data-sources/mito-heteroplasmy","title":"Mitochondrial Heteroplasmy","description":"Overview","sidebar":"docs"},"data-sources/mitomap":{"id":"data-sources/mitomap","title":"MITOMAP","description":"Overview","sidebar":"docs"},"data-sources/mitomap-small-variants-json":{"id":"data-sources/mitomap-small-variants-json","title":"mitomap-small-variants-json","description":"| Field | Type | Notes |"},"data-sources/mitomap-structural-variants-json":{"id":"data-sources/mitomap-structural-variants-json","title":"mitomap-structural-variants-json","description":"| Field | Type | Notes |"},"data-sources/omim":{"id":"data-sources/omim","title":"OMIM","description":"Overview","sidebar":"docs"},"data-sources/omim-json":{"id":"data-sources/omim-json","title":"omim-json","description":"| Field | Type | Notes |"},"data-sources/phylop":{"id":"data-sources/phylop","title":"PhyloP","description":"Overview","sidebar":"docs"},"data-sources/phylop-json":{"id":"data-sources/phylop-json","title":"phylop-json","description":"| Field | Type | Notes |"},"data-sources/primate-ai":{"id":"data-sources/primate-ai","title":"Primate AI","description":"Overview","sidebar":"docs"},"data-sources/primate-ai-json":{"id":"data-sources/primate-ai-json","title":"primate-ai-json","description":"GRCh38"},"data-sources/revel":{"id":"data-sources/revel","title":"REVEL","description":"Overview","sidebar":"docs"},"data-sources/revel-json":{"id":"data-sources/revel-json","title":"revel-json","description":"| Field | Type | Notes |"},"data-sources/splice-ai":{"id":"data-sources/splice-ai","title":"Splice AI","description":"Overview","sidebar":"docs"},"data-sources/splice-ai-json":{"id":"data-sources/splice-ai-json","title":"splice-ai-json","description":"| Field | Type | Notes |"},"data-sources/topmed":{"id":"data-sources/topmed","title":"TOPMed","description":"Overview","sidebar":"docs"},"data-sources/topmed-json":{"id":"data-sources/topmed-json","title":"topmed-json","description":"| Field | Type | Notes |"},"file-formats/custom-annotations":{"id":"file-formats/custom-annotations","title":"Custom Annotations","description":"Overview","sidebar":"docs"},"file-formats/illumina-annotator-json-file-format":{"id":"file-formats/illumina-annotator-json-file-format","title":"Illumina Connected Annotations JSON File Format","description":"Overview","sidebar":"docs"},"introduction/dependencies":{"id":"introduction/dependencies","title":"Dependencies","description":"All of the following dependencies have been included in this repository.","sidebar":"docs"},"introduction/getting-started":{"id":"introduction/getting-started","title":"Getting Started","description":"Illumina Connected Annotations is written in C# using .NET Core (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files.","sidebar":"docs"},"introduction/introduction":{"id":"introduction/introduction","title":"Introduction","description":"Clinical-grade variant annotation","sidebar":"docs"},"introduction/parsing-json":{"id":"introduction/parsing-json","title":"Parsing Illumina Connected Annotations JSON","description":"Parsing JSON"},"utilities/jasix":{"id":"utilities/jasix","title":"Jasix","description":"Overview","sidebar":"docs"},"utilities/sautils":{"id":"utilities/sautils","title":"SAUtils","description":"Overview","sidebar":"docs"}}}')}}]); \ No newline at end of file diff --git a/assets/js/935f2afb.2f4e99ea.js b/assets/js/935f2afb.2f4e99ea.js deleted file mode 100644 index 49d24c8c..00000000 --- a/assets/js/935f2afb.2f4e99ea.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[53],{1109:e=>{e.exports=JSON.parse('{"pluginId":"default","version":"current","label":"3.22 (unreleased)","banner":null,"badge":true,"className":"docs-version-current","isLast":true,"docsSidebars":{"docs":[{"type":"category","label":"Introduction","items":[{"type":"link","label":"Introduction","href":"/IlluminaConnectedAnnotationsDocumentation/","docId":"introduction/introduction"},{"type":"link","label":"Dependencies","href":"/IlluminaConnectedAnnotationsDocumentation/introduction/dependencies","docId":"introduction/dependencies"},{"type":"link","label":"Getting Started","href":"/IlluminaConnectedAnnotationsDocumentation/introduction/getting-started","docId":"introduction/getting-started"}],"collapsible":true,"collapsed":true},{"type":"category","label":"Data Sources","items":[{"type":"link","label":"1000 Genomes","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes","docId":"data-sources/1000Genomes"},{"type":"link","label":"Amino Acid Conservation","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/amino-acid-conservation","docId":"data-sources/amino-acid-conservation"},{"type":"link","label":"Cancer Hotspots","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/cancer-hotspots","docId":"data-sources/cancer-hotspots"},{"type":"link","label":"ClinGen","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen","docId":"data-sources/clingen"},{"type":"link","label":"ClinVar","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/clinvar","docId":"data-sources/clinvar"},{"type":"link","label":"COSMIC","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic","docId":"data-sources/cosmic"},{"type":"link","label":"DANN","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/dann","docId":"data-sources/dann"},{"type":"link","label":"dbSNP","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/dbsnp","docId":"data-sources/dbsnp"},{"type":"link","label":"DECIPHER","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/decipher","docId":"data-sources/decipher"},{"type":"link","label":"FusionCatcher","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/fusioncatcher","docId":"data-sources/fusioncatcher"},{"type":"link","label":"GERP","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/gerp","docId":"data-sources/gerp"},{"type":"link","label":"GME Variome","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/gme","docId":"data-sources/gme"},{"type":"link","label":"gnomAD","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad","docId":"data-sources/gnomad"},{"type":"link","label":"Mitochondrial Heteroplasmy","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/mito-heteroplasmy","docId":"data-sources/mito-heteroplasmy"},{"type":"link","label":"MITOMAP","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap","docId":"data-sources/mitomap"},{"type":"link","label":"OMIM","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/omim","docId":"data-sources/omim"},{"type":"link","label":"PhyloP","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/phylop","docId":"data-sources/phylop"},{"type":"link","label":"Primate AI","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/primate-ai","docId":"data-sources/primate-ai"},{"type":"link","label":"REVEL","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/revel","docId":"data-sources/revel"},{"type":"link","label":"Splice AI","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/splice-ai","docId":"data-sources/splice-ai"},{"type":"link","label":"TOPMed","href":"/IlluminaConnectedAnnotationsDocumentation/data-sources/topmed","docId":"data-sources/topmed"}],"collapsible":true,"collapsed":true},{"type":"category","label":"File Formats","items":[{"type":"link","label":"Illumina Connected Annotations JSON File Format","href":"/IlluminaConnectedAnnotationsDocumentation/file-formats/illumina-annotator-json-file-format","docId":"file-formats/illumina-annotator-json-file-format"},{"type":"link","label":"Custom Annotations","href":"/IlluminaConnectedAnnotationsDocumentation/file-formats/custom-annotations","docId":"file-formats/custom-annotations"}],"collapsible":true,"collapsed":true},{"type":"category","label":"Core Functionality","items":[{"type":"link","label":"Canonical Transcripts","href":"/IlluminaConnectedAnnotationsDocumentation/core-functionality/canonical-transcripts","docId":"core-functionality/canonical-transcripts"},{"type":"link","label":"Transcript Consequence Impact","href":"/IlluminaConnectedAnnotationsDocumentation/core-functionality/transcript-consequence-impacts","docId":"core-functionality/transcript-consequence-impacts"},{"type":"link","label":"Gene Fusion Detection","href":"/IlluminaConnectedAnnotationsDocumentation/core-functionality/gene-fusions","docId":"core-functionality/gene-fusions"},{"type":"link","label":"Variant IDs","href":"/IlluminaConnectedAnnotationsDocumentation/core-functionality/variant-ids","docId":"core-functionality/variant-ids"}],"collapsible":true,"collapsed":true},{"type":"category","label":"Utilities","items":[{"type":"link","label":"Jasix","href":"/IlluminaConnectedAnnotationsDocumentation/utilities/jasix","docId":"utilities/jasix"},{"type":"link","label":"SAUtils","href":"/IlluminaConnectedAnnotationsDocumentation/utilities/sautils","docId":"utilities/sautils"}],"collapsible":true,"collapsed":true}]},"docs":{"core-functionality/canonical-transcripts":{"id":"core-functionality/canonical-transcripts","title":"Canonical Transcripts","description":"Overview","sidebar":"docs"},"core-functionality/gene-fusions":{"id":"core-functionality/gene-fusions","title":"Gene Fusion Detection","description":"Overview","sidebar":"docs"},"core-functionality/transcript-consequence-impacts":{"id":"core-functionality/transcript-consequence-impacts","title":"Transcript Consequence Impact","description":"Overview","sidebar":"docs"},"core-functionality/variant-ids":{"id":"core-functionality/variant-ids","title":"Variant IDs","description":"Overview","sidebar":"docs"},"data-sources/1000Genomes":{"id":"data-sources/1000Genomes","title":"1000 Genomes","description":"Overview","sidebar":"docs"},"data-sources/1000Genomes-snv-json":{"id":"data-sources/1000Genomes-snv-json","title":"1000Genomes-snv-json","description":"| Field | Type | Notes |"},"data-sources/1000Genomes-sv-json":{"id":"data-sources/1000Genomes-sv-json","title":"1000Genomes-sv-json","description":"| Field | Type | Notes |"},"data-sources/amino-acid-conservation":{"id":"data-sources/amino-acid-conservation","title":"Amino Acid Conservation","description":"Overview","sidebar":"docs"},"data-sources/amino-acid-conservation-json":{"id":"data-sources/amino-acid-conservation-json","title":"amino-acid-conservation-json","description":"| Field | Type | Notes |"},"data-sources/cancer-hotspots":{"id":"data-sources/cancer-hotspots","title":"Cancer Hotspots","description":"Overview","sidebar":"docs"},"data-sources/clingen":{"id":"data-sources/clingen","title":"ClinGen","description":"Overview","sidebar":"docs"},"data-sources/clingen-dosage-json":{"id":"data-sources/clingen-dosage-json","title":"clingen-dosage-json","description":"| Field | Type | Notes |"},"data-sources/clingen-gene-validity-json":{"id":"data-sources/clingen-gene-validity-json","title":"clingen-gene-validity-json","description":"| Field | Type | Notes |"},"data-sources/clingen-json":{"id":"data-sources/clingen-json","title":"clingen-json","description":"| Field | Type | Notes |"},"data-sources/clinvar":{"id":"data-sources/clinvar","title":"ClinVar","description":"Overview","sidebar":"docs"},"data-sources/clinvar-json":{"id":"data-sources/clinvar-json","title":"clinvar-json","description":"small variants:"},"data-sources/cosmic":{"id":"data-sources/cosmic","title":"COSMIC","description":"Overview","sidebar":"docs"},"data-sources/cosmic-cancer-gene-census":{"id":"data-sources/cosmic-cancer-gene-census","title":"cosmic-cancer-gene-census","description":"| Field | Type | Notes |"},"data-sources/cosmic-gene-fusion-json":{"id":"data-sources/cosmic-gene-fusion-json","title":"cosmic-gene-fusion-json","description":"| Field | Type | Notes |"},"data-sources/cosmic-json":{"id":"data-sources/cosmic-json","title":"cosmic-json","description":"| Field | Type | Notes |"},"data-sources/dann":{"id":"data-sources/dann","title":"DANN","description":"Overview","sidebar":"docs"},"data-sources/dann-json":{"id":"data-sources/dann-json","title":"dann-json","description":"| Field | Type | Notes |"},"data-sources/dbsnp":{"id":"data-sources/dbsnp","title":"dbSNP","description":"Overview","sidebar":"docs"},"data-sources/dbsnp-json":{"id":"data-sources/dbsnp-json","title":"dbsnp-json","description":"| Field | Type | Notes |"},"data-sources/decipher":{"id":"data-sources/decipher","title":"DECIPHER","description":"Overview","sidebar":"docs"},"data-sources/decipher-json":{"id":"data-sources/decipher-json","title":"decipher-json","description":"| Field | Type | Notes |"},"data-sources/fusioncatcher":{"id":"data-sources/fusioncatcher","title":"FusionCatcher","description":"Overview","sidebar":"docs"},"data-sources/fusioncatcher-json":{"id":"data-sources/fusioncatcher-json","title":"fusioncatcher-json","description":"| Field | Type | Notes |"},"data-sources/gerp":{"id":"data-sources/gerp","title":"GERP","description":"Overview","sidebar":"docs"},"data-sources/gerp-json":{"id":"data-sources/gerp-json","title":"gerp-json","description":"| Field | Type | Notes |"},"data-sources/gme":{"id":"data-sources/gme","title":"GME Variome","description":"Overview","sidebar":"docs"},"data-sources/gme-json":{"id":"data-sources/gme-json","title":"gme-json","description":"| Field | Type | Notes |"},"data-sources/gnomad":{"id":"data-sources/gnomad","title":"gnomAD","description":"Overview","sidebar":"docs"},"data-sources/gnomad-lof-json":{"id":"data-sources/gnomad-lof-json","title":"gnomad-lof-json","description":"| Field | Type | Notes |"},"data-sources/gnomad-small-variants-json":{"id":"data-sources/gnomad-small-variants-json","title":"gnomad-small-variants-json","description":"| Field | Type | Notes |"},"data-sources/gnomad-structural-variants-data_description":{"id":"data-sources/gnomad-structural-variants-data_description","title":"gnomad-structural-variants-data_description","description":"Bed Example"},"data-sources/gnomad-structural-variants-json":{"id":"data-sources/gnomad-structural-variants-json","title":"gnomad-structural-variants-json","description":"| Field | Type | Notes |"},"data-sources/mito-heteroplasmy":{"id":"data-sources/mito-heteroplasmy","title":"Mitochondrial Heteroplasmy","description":"Overview","sidebar":"docs"},"data-sources/mitomap":{"id":"data-sources/mitomap","title":"MITOMAP","description":"Overview","sidebar":"docs"},"data-sources/mitomap-small-variants-json":{"id":"data-sources/mitomap-small-variants-json","title":"mitomap-small-variants-json","description":"| Field | Type | Notes |"},"data-sources/mitomap-structural-variants-json":{"id":"data-sources/mitomap-structural-variants-json","title":"mitomap-structural-variants-json","description":"| Field | Type | Notes |"},"data-sources/omim":{"id":"data-sources/omim","title":"OMIM","description":"Overview","sidebar":"docs"},"data-sources/omim-json":{"id":"data-sources/omim-json","title":"omim-json","description":"| Field | Type | Notes |"},"data-sources/phylop":{"id":"data-sources/phylop","title":"PhyloP","description":"Overview","sidebar":"docs"},"data-sources/phylop-json":{"id":"data-sources/phylop-json","title":"phylop-json","description":"| Field | Type | Notes |"},"data-sources/primate-ai":{"id":"data-sources/primate-ai","title":"Primate AI","description":"Overview","sidebar":"docs"},"data-sources/primate-ai-json":{"id":"data-sources/primate-ai-json","title":"primate-ai-json","description":"GRCh38"},"data-sources/revel":{"id":"data-sources/revel","title":"REVEL","description":"Overview","sidebar":"docs"},"data-sources/revel-json":{"id":"data-sources/revel-json","title":"revel-json","description":"| Field | Type | Notes |"},"data-sources/splice-ai":{"id":"data-sources/splice-ai","title":"Splice AI","description":"Overview","sidebar":"docs"},"data-sources/splice-ai-json":{"id":"data-sources/splice-ai-json","title":"splice-ai-json","description":"| Field | Type | Notes |"},"data-sources/topmed":{"id":"data-sources/topmed","title":"TOPMed","description":"Overview","sidebar":"docs"},"data-sources/topmed-json":{"id":"data-sources/topmed-json","title":"topmed-json","description":"| Field | Type | Notes |"},"file-formats/custom-annotations":{"id":"file-formats/custom-annotations","title":"Custom Annotations","description":"Overview","sidebar":"docs"},"file-formats/illumina-annotator-json-file-format":{"id":"file-formats/illumina-annotator-json-file-format","title":"Illumina Connected Annotations JSON File Format","description":"Overview","sidebar":"docs"},"introduction/dependencies":{"id":"introduction/dependencies","title":"Dependencies","description":"All of the following dependencies have been included in this repository.","sidebar":"docs"},"introduction/getting-started":{"id":"introduction/getting-started","title":"Getting Started","description":"Illumina Connected Annotations is written in C# using .NET Core (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files.","sidebar":"docs"},"introduction/introduction":{"id":"introduction/introduction","title":"Introduction","description":"Clinical-grade variant annotation","sidebar":"docs"},"introduction/parsing-json":{"id":"introduction/parsing-json","title":"Parsing Illumina Connected Annotations JSON","description":"Parsing JSON"},"utilities/jasix":{"id":"utilities/jasix","title":"Jasix","description":"Overview","sidebar":"docs"},"utilities/sautils":{"id":"utilities/sautils","title":"SAUtils","description":"Overview","sidebar":"docs"}}}')}}]); \ No newline at end of file diff --git a/assets/js/9367ef06.5682ca4a.js b/assets/js/9367ef06.5682ca4a.js deleted file mode 100644 index 4232e9e0..00000000 --- a/assets/js/9367ef06.5682ca4a.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3396],{3905:(e,t,n)=>{n.d(t,{Zo:()=>s,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var p=r.createContext({}),c=function(e){var t=r.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},s=function(e){var t=c(e.components);return r.createElement(p.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,p=e.parentName,s=i(e,["components","mdxType","originalType","parentName"]),u=c(n),m=a,f=u["".concat(p,".").concat(m)]||u[m]||d[m]||o;return n?r.createElement(f,l(l({ref:t},s),{},{components:n})):r.createElement(f,l({ref:t},s))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=m;var i={};for(var p in t)hasOwnProperty.call(t,p)&&(i[p]=t[p]);i.originalType=e,i[u]="string"==typeof e?e:a,l[1]=i;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>o,metadata:()=>i,toc:()=>p});var r=n(87462),a=(n(67294),n(3905));const o={},l=void 0,i={unversionedId:"data-sources/gnomad-lof-json",id:"version-3.21/data-sources/gnomad-lof-json",title:"gnomad-lof-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/gnomad-lof-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-lof-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gnomad-lof-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/gnomad-lof-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],c={toc:p},s="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(s,(0,r.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD":{ \n "pLi":1.00e0,\n "pNull":8.94e-40,\n "pRec":1.84e-16,\n "synZ":-8.44e-2,\n "misZ":5.96e-1,\n "loeuf":1.13e0\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"pLi"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"pNull"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"pRec"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"synZ"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"misZ"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/938c0222.08a8cf5d.js b/assets/js/938c0222.08a8cf5d.js deleted file mode 100644 index 0e08736a..00000000 --- a/assets/js/938c0222.08a8cf5d.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2634],{3905:(e,n,t)=>{t.d(n,{Zo:()=>p,kt:()=>h});var i=t(67294);function a(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function r(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);n&&(i=i.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,i)}return t}function o(e){for(var n=1;n=0||(a[t]=e[t]);return a}(e,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(i=0;i=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(a[t]=e[t])}return a}var s=i.createContext({}),c=function(e){var n=i.useContext(s),t=n;return e&&(t="function"==typeof e?e(n):o(o({},n),e)),t},p=function(e){var n=c(e.components);return i.createElement(s.Provider,{value:n},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var n=e.children;return i.createElement(i.Fragment,{},n)}},d=i.forwardRef((function(e,n){var t=e.components,a=e.mdxType,r=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),u=c(t),d=a,h=u["".concat(s,".").concat(d)]||u[d]||m[d]||r;return t?i.createElement(h,o(o({ref:n},p),{},{components:t})):i.createElement(h,o({ref:n},p))}));function h(e,n){var t=arguments,a=n&&n.mdxType;if("string"==typeof e||a){var r=t.length,o=new Array(r);o[0]=d;var l={};for(var s in n)hasOwnProperty.call(n,s)&&(l[s]=n[s]);l.originalType=e,l[u]="string"==typeof e?e:a,o[1]=l;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>o,default:()=>u,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var i=t(87462),a=(t(67294),t(3905));const r={title:"Jasix"},o=void 0,l={unversionedId:"utilities/jasix",id:"version-3.18/utilities/jasix",title:"Jasix",description:"Overview",source:"@site/versioned_docs/version-3.18/utilities/jasix.mdx",sourceDirName:"utilities",slug:"/utilities/jasix",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/utilities/jasix",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/utilities/jasix.mdx",tags:[],version:"3.18",frontMatter:{title:"Jasix"},sidebar:"docs",previous:{title:"Variant IDs",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/core-functionality/variant-ids"},next:{title:"SAUtils",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/utilities/sautils"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Creating the Jasix index",id:"creating-the-jasix-index",children:[{value:"Example",id:"example",children:[],level:3}],level:2},{value:"Querying the index",id:"querying-the-index",children:[],level:2},{value:"Extracting a section",id:"extracting-a-section",children:[],level:2}],c={toc:s},p="wrapper";function u(e){let{components:n,...t}=e;return(0,a.kt)(p,(0,i.Z)({},c,t,{components:n,mdxType:"MDXLayout"}),(0,a.kt)("h2",{id:"overview"},"Overview"),(0,a.kt)("p",null,"The Jasix index is aimed at providing TABIX like indexing capabilities for the Nirvana JSON output."),(0,a.kt)("h2",{id:"creating-the-jasix-index"},"Creating the Jasix index"),(0,a.kt)("p",null,"The Jasix index (that comes in a .jsi) file is generated on-the-fly with Nirvana output. It can also be generated independently by running the Jasix command line utility on the JSON output file. Please note that the Jasix utility can only consume JSON files that follow the Nirvana JSON output format. The following code blocks demonstrate the help menu and index generating functionalities of Jasix."),(0,a.kt)("h3",{id:"example"},"Example"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll -h\nUSAGE: dotnet Jasix.dll -i in.json.gz [options]\nIndexes a Nirvana annotated JSON file\n\nOPTIONS:\n --header, -t print also the header lines\n --only-header, -H print only the header lines\n --chromosomes, -l list chromosome names\n --index, -c create index\n --in, -i input\n --out, -o compressed output file name (default:console)\n --query, -q query range\n --section, -s complete section (positions or genes) to output\n --help, -h displays the help menu\n --version, -v displays the version\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll --index -i input.json.gz\n---------------------------------------------------------------------------\nJasix (c) 2017 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 2.0.0\n---------------------------------------------------------------------------\n\nRef Sequence chrM indexed in 00:00:00.2\nRef Sequence chr1 indexed in 00:00:05.8\nRef Sequence chr2 indexed in 00:00:06.0\n.\n.\n.\nPeak memory usage: 28.5 MB\nTime: 00:01:14.8\n")),(0,a.kt)("h2",{id:"querying-the-index"},"Querying the index"),(0,a.kt)("p",null,"The Jasix query format is chr:start-end. If not provided, it assumes end=start. If only chr is provided, all entries for that chromosome will be provided."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll -i input.json.gz chrM:5000-7000\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'{\n "positions":[\n {\n "chromosome":"chrM",\n "refAllele":"C",\n "position":5581,\n "quality":3070.00,\n "filters":[\n "LowGQXHomSNP"\n ],\n "altAlleles":[\n "T"\n ],\n "samples":[\n {\n "variantFreq":1,\n "totalDepth":1625,\n "genotypeQuality":1,\n "alleleDepths":[\n 0,\n 1625\n ],\n "genotype":"1/1"\n }\n ],\n "variants":[\n {\n "altAllele":"T",\n "refAllele":"C",\n "begin":5581,\n "chromosome":"chrM",\n "end":5581,\n "variantType":"SNV",\n "vid":"MT:5581:T"\n }\n ]\n },\n {\n "chromosome":"chrM",\n "refAllele":"A",\n "position":6267,\n "quality":1637.00,\n "filters":[\n "LowGQXHetSNP"\n ],\n "altAlleles":[\n "G"\n ],\n "samples":[\n {\n "variantFreq":0.6873,\n "totalDepth":323,\n "genotypeQuality":1,\n "alleleDepths":[\n 101,\n 222\n ],\n "genotype":"0/1"\n }\n ],\n "variants":[\n {\n "altAllele":"G",\n "refAllele":"A",\n "begin":6267,\n "chromosome":"chrM",\n "end":6267,\n "variantType":"SNV",\n "vid":"MT:6267:G"\n }\n ]\n }\n ]\n}\n\n')),(0,a.kt)("p",null,'The default output stream is Console. However, if an output filename is provided, Jasix outputs the results to that file in a bgzip compressed format. The output is always a valid JSON entry. If requested (via -t option) the header of the indexed file will be provided. Multiple queries can be submitted in the same command and the output will contain them within the same "positions" block in order of the submitted queries (Warning: if the queries are out of order, or overlapping, the output will be out or order and intersecting).'),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll -i input.json.gz -q chrM:5000-7000 -q chrM:8500-9500 -t\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'{\n "header":{\n "annotator":"Illumina Annotation Engine 1.6.2.0",\n "creationTime":"2017-08-30 11:42:57",\n "genomeAssembly":"GRCh37",\n "schemaVersion":6,\n "dataVersion":"84.24.39",\n "dataSources":[\n {\n "name":"VEP",\n "version":"84",\n "description":"Ensembl",\n "releaseDate":"2017-01-16"\n }\n ],\n "samples":[\n "Mother"\n ]\n },\n "positions":[\n {\n "chromosome":"chrM",\n "refAllele":"C",\n "position":5581,\n "quality":3070.00,\n "filters":[\n "LowGQXHomSNP"\n ],\n "altAlleles":[\n "T"\n ],\n "samples":[\n {\n "variantFreq":1,\n "totalDepth":1625,\n "genotypeQuality":1,\n "alleleDepths":[\n 0,\n 1625\n ],\n "genotype":"1/1"\n }\n ],\n "variants":[\n {\n "altAllele":"T",\n "refAllele":"C",\n "begin":5581,\n "chromosome":"chrM",\n "end":5581,\n "variantType":"SNV",\n "vid":"MT:5581:T"\n }\n ]\n },\n {\n "chromosome":"chrM",\n "refAllele":"A",\n "position":6267,\n "quality":1637.00,\n "filters":[\n "LowGQXHetSNP"\n ],\n "altAlleles":[\n "G"\n ],\n "samples":[\n {\n "variantFreq":0.6873,\n "totalDepth":323,\n "genotypeQuality":1,\n "alleleDepths":[\n 101,\n 222\n ],\n "genotype":"0/1"\n }\n ],\n "variants":[\n {\n "altAllele":"G",\n "refAllele":"A",\n "begin":6267,\n "chromosome":"chrM",\n "end":6267,\n "variantType":"SNV",\n "vid":"MT:6267:G"\n }\n ]\n },\n {\n "chromosome":"chrM",\n "refAllele":"G",\n "position":8702,\n "quality":3070.00,\n "filters":[\n "LowGQXHomSNP"\n ],\n "altAlleles":[\n "A"\n ],\n "samples":[\n {\n "variantFreq":0.9987,\n "totalDepth":1534,\n "genotypeQuality":1,\n "alleleDepths":[\n 2,\n 1532\n ],\n "genotype":"1/1"\n }\n ],\n "variants":[\n {\n "altAllele":"A",\n "refAllele":"G",\n "begin":8702,\n "chromosome":"chrM",\n "end":8702,\n "variantType":"SNV",\n "vid":"MT:8702:A"\n }\n ]\n },\n {\n "chromosome":"chrM",\n "refAllele":"G",\n "position":9378,\n "quality":3070.00,\n "filters":[\n "LowGQXHomSNP"\n ],\n "altAlleles":[\n "A"\n ],\n "samples":[\n {\n "variantFreq":1,\n "totalDepth":1018,\n "genotypeQuality":1,\n "alleleDepths":[\n 0,\n 1018\n ],\n "genotype":"1/1"\n }\n ],\n "variants":[\n {\n "altAllele":"A",\n "refAllele":"G",\n "begin":9378,\n "chromosome":"chrM",\n "end":9378,\n "variantType":"SNV",\n "vid":"MT:9378:A"\n }\n ]\n }\n ]\n}\n')),(0,a.kt)("h2",{id:"extracting-a-section"},"Extracting a section"),(0,a.kt)("p",null,"The Nirvana JSON file has three sections: header, positions and genes. Header can be printed using the -H option. If you are interested in only the positions or genes section, you can use the -s or --section option."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll -i input.json.gz -s genes\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'[\n{\n "name": "ABCB10",\n "omim": [\n {\n "mimNumber": 605454,\n "geneName": "ATP-binding cassette, subfamily B, member 10"\n }\n ]\n},\n{\n "name": "ABCD3",\n "omim": [\n {\n "mimNumber": 170995,\n "geneName": "ATP-binding cassette, subfamily D, member 3 (peroxisomal membrane protein 1, 70kD)",\n "description": "The ABCD3 gene encodes a peroxisomal membrane transporter involved in the transport of branched-chain fatty acids and C27 bile acids into the peroxisome; the latter function is a crucial step in bile acid biosynthesis (summary by Ferdinandusse et al., 2015).",\n "phenotypes": [\n {\n "mimNumber": 616278,\n "phenotype": "?Bile acid synthesis defect, congenital, 5",\n "mapping": "molecular basis of the disorder is known",\n "inheritances": [\n "Autosomal recessive"\n ],\n "comments": [\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n ]\n}\n]\n')))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/9620026c.490b8fcf.js b/assets/js/9620026c.490b8fcf.js new file mode 100644 index 00000000..835de61a --- /dev/null +++ b/assets/js/9620026c.490b8fcf.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6602],{3905:(t,e,n)=>{n.d(e,{Zo:()=>c,kt:()=>d});var a=n(7294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var i=a.createContext({}),m=function(t){var e=a.useContext(i),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},c=function(t){var e=m(t.components);return a.createElement(i.Provider,{value:e},t.children)},f="mdxType",u={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},s=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,i=t.parentName,c=p(t,["components","mdxType","originalType","parentName"]),f=m(n),s=r,d=f["".concat(i,".").concat(s)]||f[s]||u[s]||l;return n?a.createElement(d,o(o({ref:e},c),{},{components:n})):a.createElement(d,o({ref:e},c))}));function d(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=s;var p={};for(var i in e)hasOwnProperty.call(e,i)&&(p[i]=e[i]);p.originalType=t,p[f]="string"==typeof t?t:r,o[1]=p;for(var m=2;m{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>f,frontMatter:()=>l,metadata:()=>p,toc:()=>i});var a=n(7462),r=(n(7294),n(3905));const l={},o=void 0,p={unversionedId:"data-sources/1000Genomes-snv-json",id:"data-sources/1000Genomes-snv-json",title:"1000Genomes-snv-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/1000Genomes-snv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-snv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes-snv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/1000Genomes-snv-json.md",tags:[],version:"current",frontMatter:{}},i=[],m={toc:i},c="wrapper";function f(t){let{components:e,...n}=t;return(0,r.kt)(c,(0,a.Z)({},m,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":{\n "allAf":0.200879,\n "afrAf":0.210287,\n "amrAf":0.139769,\n "easAf":0.275794,\n "eurAf":0.181909,\n "sasAf":0.173824,\n "allAn":5008,\n "afrAn":1322,\n "amrAn":694,\n "easAn":1008,\n "eurAn":1006,\n "sasAn":978,\n "allAc":1006,\n "afrAc":278,\n "amrAc":97,\n "easAc":278,\n "eurAc":183,\n "sasAc":170\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the African super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the African super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the Ad Mixed American super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the Ad Mixed American super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the East Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the East Asian super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the European super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the European super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the South Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the South Asian super population. Non-zero integer.")))))}f.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/9620026c.ae109651.js b/assets/js/9620026c.ae109651.js deleted file mode 100644 index eea84b7b..00000000 --- a/assets/js/9620026c.ae109651.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6602],{3905:(t,e,n)=>{n.d(e,{Zo:()=>c,kt:()=>d});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var i=a.createContext({}),m=function(t){var e=a.useContext(i),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},c=function(t){var e=m(t.components);return a.createElement(i.Provider,{value:e},t.children)},f="mdxType",u={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},s=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,i=t.parentName,c=p(t,["components","mdxType","originalType","parentName"]),f=m(n),s=r,d=f["".concat(i,".").concat(s)]||f[s]||u[s]||l;return n?a.createElement(d,o(o({ref:e},c),{},{components:n})):a.createElement(d,o({ref:e},c))}));function d(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=s;var p={};for(var i in e)hasOwnProperty.call(e,i)&&(p[i]=e[i]);p.originalType=t,p[f]="string"==typeof t?t:r,o[1]=p;for(var m=2;m{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>f,frontMatter:()=>l,metadata:()=>p,toc:()=>i});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,p={unversionedId:"data-sources/1000Genomes-snv-json",id:"data-sources/1000Genomes-snv-json",title:"1000Genomes-snv-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/1000Genomes-snv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-snv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes-snv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/1000Genomes-snv-json.md",tags:[],version:"current",frontMatter:{}},i=[],m={toc:i},c="wrapper";function f(t){let{components:e,...n}=t;return(0,r.kt)(c,(0,a.Z)({},m,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":{\n "allAf":0.200879,\n "afrAf":0.210287,\n "amrAf":0.139769,\n "easAf":0.275794,\n "eurAf":0.181909,\n "sasAf":0.173824,\n "allAn":5008,\n "afrAn":1322,\n "amrAn":694,\n "easAn":1008,\n "eurAn":1006,\n "sasAn":978,\n "allAc":1006,\n "afrAc":278,\n "amrAc":97,\n "easAc":278,\n "eurAc":183,\n "sasAc":170\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the African super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the African super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the Ad Mixed American super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the Ad Mixed American super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the East Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the East Asian super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the European super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the European super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the South Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the South Asian super population. Non-zero integer.")))))}f.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/962050bd.4bd65e64.js b/assets/js/962050bd.4bd65e64.js deleted file mode 100644 index b7ef6b46..00000000 --- a/assets/js/962050bd.4bd65e64.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2616],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>k});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function i(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var o=a.createContext({}),s=function(t){var e=a.useContext(o),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},m=function(t){var e=s(t.components);return a.createElement(o.Provider,{value:e},t.children)},d="mdxType",c={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},u=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,o=t.parentName,m=p(t,["components","mdxType","originalType","parentName"]),d=s(n),u=r,k=d["".concat(o,".").concat(u)]||d[u]||c[u]||l;return n?a.createElement(k,i(i({ref:e},m),{},{components:n})):a.createElement(k,i({ref:e},m))}));function k(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,i=new Array(l);i[0]=u;var p={};for(var o in e)hasOwnProperty.call(e,o)&&(p[o]=e[o]);p.originalType=t,p[d]="string"==typeof t?t:r,i[1]=p;for(var s=2;s{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>d,frontMatter:()=>l,metadata:()=>p,toc:()=>o});var a=n(87462),r=(n(67294),n(3905));const l={},i=void 0,p={unversionedId:"data-sources/clinvar-json",id:"version-3.21/data-sources/clinvar-json",title:"clinvar-json",description:"small variants:",source:"@site/versioned_docs/version-3.21/data-sources/clinvar-json.md",sourceDirName:"data-sources",slug:"/data-sources/clinvar-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/clinvar-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/clinvar-json.md",tags:[],version:"3.21",frontMatter:{}},o=[],s={toc:o},m="wrapper";function d(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},s,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"small variants:")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "id":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "significance":[\n "benign"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "lastUpdatedDate":"2020-03-01",\n "isAlleleSpecific":true\n },\n {\n "id":"RCV000030258.4",\n "variationId":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "alleleOrigins":[\n "germline"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "phenotypes":[\n "Lynch syndrome"\n ],\n "medGenIds":[\n "C1333990"\n ],\n "omimIds":[\n "120435"\n ],\n "significance":[\n "benign"\n ],\n "lastUpdatedDate":"2017-05-01",\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"large variants:")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "chromosome":"1", \n "begin":629025, \n "end":8537745, \n "variantType":"copy_number_loss", \n "id":"RCV000051993.4", \n "variationId":"VCV000058242.1", \n "reviewStatus":"criteria provided, single submitter", \n "alleleOrigins":[\n "not provided"\n ], \n "phenotypes":[\n "See cases"\n ], \n "significance":[\n "pathogenic"\n ], \n "lastUpdatedDate":"2022-04-21", \n "pubMedIds":[\n "21844811"\n ]\n },\n {\n "id":"VCV000058242.1",\n "reviewStatus":"criteria provided, single submitter",\n "significance":[\n "pathogenic"\n ],\n "lastUpdatedDate":"2022-04-21"\n },\n ......\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variationId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar VCV ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"variant type")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"reviewStatus"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"alleleOrigins"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"medGenIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MedGen IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"omimIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"orphanetIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Orphanet IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"significance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"lastUpdatedDate"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the ClinVar alternate allele")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"reviewStatus:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no assertion provided"),(0,r.kt)("li",{parentName:"ul"},"no assertion criteria provided"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, single submitter"),(0,r.kt)("li",{parentName:"ul"},"practice guideline"),(0,r.kt)("li",{parentName:"ul"},"classified by multiple submitters"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, conflicting interpretations"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, multiple submitters, no conflicts"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"alleleOrigins:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"unknown"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"germline"),(0,r.kt)("li",{parentName:"ul"},"somatic"),(0,r.kt)("li",{parentName:"ul"},"inherited"),(0,r.kt)("li",{parentName:"ul"},"paternal"),(0,r.kt)("li",{parentName:"ul"},"maternal"),(0,r.kt)("li",{parentName:"ul"},"de-novo"),(0,r.kt)("li",{parentName:"ul"},"biparental"),(0,r.kt)("li",{parentName:"ul"},"uniparental"),(0,r.kt)("li",{parentName:"ul"},"not-tested"),(0,r.kt)("li",{parentName:"ul"},"tested-inconclusive")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"significance:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"uncertain significance"),(0,r.kt)("li",{parentName:"ul"},"not provided"),(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"drug response"),(0,r.kt)("li",{parentName:"ul"},"histocompatibility"),(0,r.kt)("li",{parentName:"ul"},"association"),(0,r.kt)("li",{parentName:"ul"},"risk factor"),(0,r.kt)("li",{parentName:"ul"},"protective"),(0,r.kt)("li",{parentName:"ul"},"affects"),(0,r.kt)("li",{parentName:"ul"},"conflicting data from submitters"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant"),(0,r.kt)("li",{parentName:"ul"},"conflicting interpretations of pathogenicity")))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/973f83e7.142e5e31.js b/assets/js/973f83e7.142e5e31.js deleted file mode 100644 index 237b35ab..00000000 --- a/assets/js/973f83e7.142e5e31.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8478],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>m});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},p=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},d="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},h=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),d=c(n),h=r,m=d["".concat(s,".").concat(h)]||d[h]||u[h]||i;return n?a.createElement(m,o(o({ref:t},p),{},{components:n})):a.createElement(m,o({ref:t},p))}));function m(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=h;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[d]="string"==typeof e?e:r,o[1]=l;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const i={title:"Canonical Transcripts"},o=void 0,l={unversionedId:"core-functionality/canonical-transcripts",id:"version-3.16/core-functionality/canonical-transcripts",title:"Canonical Transcripts",description:"Overview",source:"@site/versioned_docs/version-3.16/core-functionality/canonical-transcripts.md",sourceDirName:"core-functionality",slug:"/core-functionality/canonical-transcripts",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/core-functionality/canonical-transcripts",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/core-functionality/canonical-transcripts.md",tags:[],version:"3.16",frontMatter:{title:"Canonical Transcripts"},sidebar:"version-3.16/docs",previous:{title:"Custom Annotations",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/file-formats/custom-annotations"},next:{title:"Gene Fusion Detection",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/core-functionality/gene-fusions"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Known Algorithms",id:"known-algorithms",children:[{value:"UCSC",id:"ucsc",children:[],level:3},{value:"Ensembl",id:"ensembl",children:[],level:3},{value:"ACMG",id:"acmg",children:[],level:3},{value:"ClinVar",id:"clinvar",children:[],level:3}],level:2},{value:"Unified Approach",id:"unified-approach",children:[],level:2}],c={toc:s},p="wrapper";function d(e){let{components:t,...i}=e;return(0,r.kt)(p,(0,a.Z)({},c,i,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"One of the more polarizing topics within annotation is the notion of canonical transcripts. Because of alternative splicing, we often have several transcripts for each gene. In the human genome, there are an average of 3.4 transcripts per gene (Tung, 2020). As scientists, we seem to have a need for identifying a representative example of a gene - even if there's no biological basis for the motivation."),(0,r.kt)("p",null,(0,r.kt)("img",{src:n(86020).Z})),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Golden Helix Blog")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"A few years ago, the guys over at Golden Helix wrote an excellent post about the pitfalls and issues surrounding the identification of canonical transcripts: ",(0,r.kt)("a",{parentName:"p",href:"https://blog.goldenhelix.com/whats-in-a-name-the-intricacies-of-identifying-variants/"},"What\u2019s in a Name: The Intricacies of Identifying Variants"),"."))),(0,r.kt)("p",null,"In Nirvana, we wanted to identify an algorithm for determining the canonical transcript and apply it consistently to all of our transcript data sources."),(0,r.kt)("h2",{id:"known-algorithms"},"Known Algorithms"),(0,r.kt)("h3",{id:"ucsc"},"UCSC"),(0,r.kt)("p",null,"UCSC publishes a list of canonical transcripts in its ",(0,r.kt)("inlineCode",{parentName:"p"},"knownCanonical")," table which is available via the ",(0,r.kt)("a",{parentName:"p",href:"https://genome.ucsc.edu/cgi-bin/hgTables"},"TableBrowser"),". Of the RefSeq data sources, it was the only one we could find that provided canonical transcripts:"),(0,r.kt)("blockquote",null,(0,r.kt)("p",{parentName:"blockquote"},"The canonical transcript is defined as either the longest CDS, if the gene has translated transcripts, or the longest cDNA.")),(0,r.kt)("p",null,"If you were to implement this and compare it with the knownCanonical table, you would see a lot of exceptions to the rule."),(0,r.kt)("h3",{id:"ensembl"},"Ensembl"),(0,r.kt)("p",null,"The ",(0,r.kt)("a",{parentName:"p",href:"http://uswest.ensembl.org/Help/Glossary"},"Ensembl glossary")," states:"),(0,r.kt)("blockquote",null,(0,r.kt)("p",{parentName:"blockquote"},"The canonical transcript is used in the gene tree analysis in Ensembl and does not necessarily reflect the most biologically relevant transcript of a gene. For human, the canonical transcript for a gene is set according to the following hierarchy:"),(0,r.kt)("ol",{parentName:"blockquote"},(0,r.kt)("li",{parentName:"ol"},"Longest CCDS translation with no stop codons."),(0,r.kt)("li",{parentName:"ol"},"If no (1), choose the longest Ensembl/Havana merged translation with no stop codons."),(0,r.kt)("li",{parentName:"ol"},"If no (2), choose the longest translation with no stop codons."),(0,r.kt)("li",{parentName:"ol"},"If no translation, choose the longest non-protein-coding transcript."))),(0,r.kt)("h3",{id:"acmg"},"ACMG"),(0,r.kt)("p",null,"From the ACMG Guidelines for the Interpretation of Sequence Variants:"),(0,r.kt)("blockquote",null,(0,r.kt)("p",{parentName:"blockquote"},"A reference transcript for each gene should be used and provided in the report when describing coding variants. The transcript should represent either the longest known transcript and/or the most clinically relevant transcript.")),(0,r.kt)("h3",{id:"clinvar"},"ClinVar"),(0,r.kt)("p",null,"From the ClinVar paper:"),(0,r.kt)("blockquote",null,(0,r.kt)("p",{parentName:"blockquote"},"When there are multiple transcripts for a gene, ClinVar selects one HGVS expression to construct a preferred name. By default, this selection is based on the first reference standard transcript identified by the RefSeqGene/LRG (Locus Reference Genomic) collaboration.")),(0,r.kt)("h2",{id:"unified-approach"},"Unified Approach"),(0,r.kt)("p",null,"Our approach is almost identical to the one Golden Helix discussed in their article:"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"If we're looking at RefSeq, only consider NM & NR transcripts as candidates for canonical transcripts."),(0,r.kt)("li",{parentName:"ol"},"Sort the transcripts in the following order:",(0,r.kt)("ol",{parentName:"li"},(0,r.kt)("li",{parentName:"ol"},(0,r.kt)("a",{parentName:"li",href:"https://www.lrg-sequence.org/"},"Locus Reference Genomic (LRG)")," entries occur before non-LRG entries"),(0,r.kt)("li",{parentName:"ol"},"Descending CDS length"),(0,r.kt)("li",{parentName:"ol"},"Descending transcript length"),(0,r.kt)("li",{parentName:"ol"},"Ascending accession number"))),(0,r.kt)("li",{parentName:"ol"},"Grab the first entry")))}d.isMDXComponent=!0},86020:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/hk1-transcripts-a5b85474d3b002553687715dbd004907.png"}}]); \ No newline at end of file diff --git a/assets/js/9850fae3.e6d022c5.js b/assets/js/9850fae3.e6d022c5.js deleted file mode 100644 index 7631e125..00000000 --- a/assets/js/9850fae3.e6d022c5.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2146],{3905:(t,n,e)=>{e.d(n,{Zo:()=>m,kt:()=>k});var a=e(67294);function l(t,n,e){return n in t?Object.defineProperty(t,n,{value:e,enumerable:!0,configurable:!0,writable:!0}):t[n]=e,t}function r(t,n){var e=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(t,n).enumerable}))),e.push.apply(e,a)}return e}function o(t){for(var n=1;n=0||(l[e]=t[e]);return l}(t,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,e)&&(l[e]=t[e])}return l}var p=a.createContext({}),u=function(t){var n=a.useContext(p),e=n;return t&&(e="function"==typeof t?t(n):o(o({},n),t)),e},m=function(t){var n=u(t.components);return a.createElement(p.Provider,{value:n},t.children)},d="mdxType",g={inlineCode:"code",wrapper:function(t){var n=t.children;return a.createElement(a.Fragment,{},n)}},N=a.forwardRef((function(t,n){var e=t.components,l=t.mdxType,r=t.originalType,p=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),d=u(e),N=l,k=d["".concat(p,".").concat(N)]||d[N]||g[N]||r;return e?a.createElement(k,o(o({ref:n},m),{},{components:e})):a.createElement(k,o({ref:n},m))}));function k(t,n){var e=arguments,l=n&&n.mdxType;if("string"==typeof t||l){var r=e.length,o=new Array(r);o[0]=N;var i={};for(var p in n)hasOwnProperty.call(n,p)&&(i[p]=n[p]);i.originalType=t,i[d]="string"==typeof t?t:l,o[1]=i;for(var u=2;u{e.r(n),e.d(n,{contentTitle:()=>o,default:()=>d,frontMatter:()=>r,metadata:()=>i,toc:()=>p});var a=e(87462),l=(e(67294),e(3905));const r={},o=void 0,i={unversionedId:"data-sources/gnomad-small-variants-json",id:"version-3.21/data-sources/gnomad-small-variants-json",title:"gnomad-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/gnomad-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gnomad-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/gnomad-small-variants-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],u={toc:p},m="wrapper";function d(t){let{components:n,...e}=t;return(0,l.kt)(m,(0,a.Z)({},u,e,{components:n,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomad":{ \n "coverage":20,\n "allAf":0.190317,\n "maleAf":0.193,\n "femaleAf": 0.1935,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "maleAn":15096,\n "femaleAn":15700\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "maleAc":2930,\n "femaleAc": 2931,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "maleHc":280,\n "femaleHc":281,\n "controlsAllAf":0.190317,\n "controlsAllAn":30796,\n "controlsAllAc":5861,\n "lowComplexityRegion":true,\n "failedFilter":true\n}\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"coverage"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the controls subset. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for male population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for female population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the controls subset. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for male population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for female population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the controls subset. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the South Asian population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the South Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the South Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"lowComplexityRegion"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant is located in a low complexity region.")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/988d0ae8.535839c4.js b/assets/js/988d0ae8.535839c4.js deleted file mode 100644 index 4570d993..00000000 --- a/assets/js/988d0ae8.535839c4.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[472,6192],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>v});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},u="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),u=d(n),m=r,v=u["".concat(s,".").concat(m)]||u[m]||p[m]||o;return n?a.createElement(v,i(i({ref:t},c),{},{components:n})):a.createElement(v,i({ref:t},c))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[u]="string"==typeof e?e:r,i[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/dann-json",id:"data-sources/dann-json",title:"dann-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/dann-json.md",sourceDirName:"data-sources",slug:"/data-sources/dann-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dann-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/dann-json.md",tags:[],version:"current",frontMatter:{}},s=[],d={toc:s},c="wrapper";function u(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"dannScore": 0.27\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"dannScore"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1.0")))))}u.isMDXComponent=!0},95771:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>p,frontMatter:()=>i,metadata:()=>s,toc:()=>d});var a=n(87462),r=(n(67294),n(3905)),o=n(40540);const i={title:"DANN"},l=void 0,s={unversionedId:"data-sources/dann",id:"data-sources/dann",title:"DANN",description:"Overview",source:"@site/docs/data-sources/dann.mdx",sourceDirName:"data-sources",slug:"/data-sources/dann",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dann",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/dann.mdx",tags:[],version:"current",frontMatter:{title:"DANN"},sidebar:"docs",previous:{title:"COSMIC",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic"},next:{title:"dbSNP",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dbsnp"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"TSV File",id:"tsv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"GRCh38 liftover",id:"grch38-liftover",children:[],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:d},u="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(u,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"DANN uses the same feature set and training data as CADD (Combined Annotation-Dependent Depletion) to train a deep neural network (DNN).\nCADD is an algorithm designed to annotate both coding and non-coding variants, and has been shown to outperform other annotation algorithms.\nDANN improves on CADD (which uses Support Vector Machines (SVMs)) by capturing non-linear relationships by using a deep neural network instead of SVMs.\nDANN achieves about a 19% relative reduction in the error rate and about a 14% relative increase in the area under the curve (AUC) metric over CADD\u2019s SVM methodology."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Quang, Daniel, Yifei Chen, and Xiaohui Xie. DANN: a deep learning approach for annotating the pathogenicity of genetic variants. ",(0,r.kt)("em",{parentName:"p"},"Bioinformatics")," ",(0,r.kt)("strong",{parentName:"p"},"31.5")," 761-763 (2015). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1093/bioinformatics/btu703"},"https://doi.org/10.1093/bioinformatics/btu703")))),(0,r.kt)("h2",{id:"tsv-file"},"TSV File"),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-tsv"},"chr grch37_pos ref alt DANN\n1 10001 T A 0.16461391399220135\n1 10001 T C 0.4396994049749739\n1 10001 T G 0.38108629377072734\n1 10002 A C 0.36182020272810128\n1 10002 A G 0.44413258111779291\n1 10002 A T 0.16812846819989813\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the CSV file, we are interested in all columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"chr")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"grch37_pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"ref")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"alt")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DANN"))),(0,r.kt)("h2",{id:"grch38-liftover"},"GRCh38 liftover"),(0,r.kt)("p",null,"The data is not available for GRCh38 on DANN website. We performed a liftover from GRCh37 to GRCh38 using crossmap."),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("p",null,"None"),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://cbcl.ics.uci.edu/public_data/DANN/"},"https://cbcl.ics.uci.edu/public_data/DANN/")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(o.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/988d0ae8.e10fbff5.js b/assets/js/988d0ae8.e10fbff5.js new file mode 100644 index 00000000..74396fa6 --- /dev/null +++ b/assets/js/988d0ae8.e10fbff5.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[472,6192],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>v});var a=n(7294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},u="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),u=d(n),m=r,v=u["".concat(s,".").concat(m)]||u[m]||p[m]||o;return n?a.createElement(v,i(i({ref:t},c),{},{components:n})):a.createElement(v,i({ref:t},c))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[u]="string"==typeof e?e:r,i[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(7462),r=(n(7294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/dann-json",id:"data-sources/dann-json",title:"dann-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/dann-json.md",sourceDirName:"data-sources",slug:"/data-sources/dann-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dann-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/dann-json.md",tags:[],version:"current",frontMatter:{}},s=[],d={toc:s},c="wrapper";function u(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"dannScore": 0.27\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"dannScore"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1.0")))))}u.isMDXComponent=!0},5771:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>p,frontMatter:()=>i,metadata:()=>s,toc:()=>d});var a=n(7462),r=(n(7294),n(3905)),o=n(540);const i={title:"DANN"},l=void 0,s={unversionedId:"data-sources/dann",id:"data-sources/dann",title:"DANN",description:"Overview",source:"@site/docs/data-sources/dann.mdx",sourceDirName:"data-sources",slug:"/data-sources/dann",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dann",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/dann.mdx",tags:[],version:"current",frontMatter:{title:"DANN"},sidebar:"docs",previous:{title:"COSMIC",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic"},next:{title:"dbSNP",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dbsnp"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"TSV File",id:"tsv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"GRCh38 liftover",id:"grch38-liftover",children:[],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:d},u="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(u,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"DANN uses the same feature set and training data as CADD (Combined Annotation-Dependent Depletion) to train a deep neural network (DNN).\nCADD is an algorithm designed to annotate both coding and non-coding variants, and has been shown to outperform other annotation algorithms.\nDANN improves on CADD (which uses Support Vector Machines (SVMs)) by capturing non-linear relationships by using a deep neural network instead of SVMs.\nDANN achieves about a 19% relative reduction in the error rate and about a 14% relative increase in the area under the curve (AUC) metric over CADD\u2019s SVM methodology."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Quang, Daniel, Yifei Chen, and Xiaohui Xie. DANN: a deep learning approach for annotating the pathogenicity of genetic variants. ",(0,r.kt)("em",{parentName:"p"},"Bioinformatics")," ",(0,r.kt)("strong",{parentName:"p"},"31.5")," 761-763 (2015). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1093/bioinformatics/btu703"},"https://doi.org/10.1093/bioinformatics/btu703")))),(0,r.kt)("h2",{id:"tsv-file"},"TSV File"),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-tsv"},"chr grch37_pos ref alt DANN\n1 10001 T A 0.16461391399220135\n1 10001 T C 0.4396994049749739\n1 10001 T G 0.38108629377072734\n1 10002 A C 0.36182020272810128\n1 10002 A G 0.44413258111779291\n1 10002 A T 0.16812846819989813\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the CSV file, we are interested in all columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"chr")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"grch37_pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"ref")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"alt")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DANN"))),(0,r.kt)("h2",{id:"grch38-liftover"},"GRCh38 liftover"),(0,r.kt)("p",null,"The data is not available for GRCh38 on DANN website. We performed a liftover from GRCh37 to GRCh38 using crossmap."),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("p",null,"None"),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://cbcl.ics.uci.edu/public_data/DANN/"},"https://cbcl.ics.uci.edu/public_data/DANN/")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(o.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/98bbf06c.35018af1.js b/assets/js/98bbf06c.35018af1.js deleted file mode 100644 index 1ee6cdd0..00000000 --- a/assets/js/98bbf06c.35018af1.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4858,9082,7860,4105,3805],{3905:(t,e,n)=>{n.d(e,{Zo:()=>s,kt:()=>g});var a=n(67294);function l(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function r(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function i(t){for(var e=1;e=0||(l[n]=t[n]);return l}(t,e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(l[n]=t[n])}return l}var p=a.createContext({}),m=function(t){var e=a.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},s=function(t){var e=m(t.components);return a.createElement(p.Provider,{value:e},t.children)},u="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},N=a.forwardRef((function(t,e){var n=t.components,l=t.mdxType,r=t.originalType,p=t.parentName,s=o(t,["components","mdxType","originalType","parentName"]),u=m(n),N=l,g=u["".concat(p,".").concat(N)]||u[N]||d[N]||r;return n?a.createElement(g,i(i({ref:e},s),{},{components:n})):a.createElement(g,i({ref:e},s))}));function g(t,e){var n=arguments,l=e&&e.mdxType;if("string"==typeof t||l){var r=n.length,i=new Array(r);i[0]=N;var o={};for(var p in e)hasOwnProperty.call(e,p)&&(o[p]=e[p]);o.originalType=t,o[u]="string"==typeof t?t:l,i[1]=o;for(var m=2;m{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>u,frontMatter:()=>r,metadata:()=>o,toc:()=>p});var a=n(87462),l=(n(67294),n(3905));const r={},i=void 0,o={unversionedId:"data-sources/gnomad-lof-json",id:"data-sources/gnomad-lof-json",title:"gnomad-lof-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gnomad-lof-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-lof-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-lof-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gnomad-lof-json.md",tags:[],version:"current",frontMatter:{}},p=[],m={toc:p},s="wrapper";function u(t){let{components:e,...n}=t;return(0,l.kt)(s,(0,a.Z)({},m,n,{components:e,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD":{ \n "pLi":1.00e0,\n "pNull":8.94e-40,\n "pRec":1.84e-16,\n "synZ":-8.44e-2,\n "misZ":5.96e-1,\n "loeuf":1.13e0\n}\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pLi"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pNull"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pRec"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"synZ"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"misZ"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))))}u.isMDXComponent=!0},73827:(t,e,n)=>{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>u,frontMatter:()=>r,metadata:()=>o,toc:()=>p});var a=n(87462),l=(n(67294),n(3905));const r={},i=void 0,o={unversionedId:"data-sources/gnomad-small-variants-json",id:"data-sources/gnomad-small-variants-json",title:"gnomad-small-variants-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gnomad-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gnomad-small-variants-json.md",tags:[],version:"current",frontMatter:{}},p=[],m={toc:p},s="wrapper";function u(t){let{components:e,...n}=t;return(0,l.kt)(s,(0,a.Z)({},m,n,{components:e,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomad":{ \n "coverage":20,\n "allAf":0.190317,\n "maleAf":0.193,\n "femaleAf": 0.1935,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "maleAn":15096,\n "femaleAn":15700\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "maleAc":2930,\n "femaleAc": 2931,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "maleHc":280,\n "femaleHc":281,\n "controlsAllAf":0.190317,\n "controlsAllAn":30796,\n "controlsAllAc":5861,\n "lowComplexityRegion":true,\n "failedFilter":true\n}\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"coverage"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the controls subset. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for male population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for female population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the controls subset. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for male population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for female population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the controls subset. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the South Asian population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the South Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the South Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"lowComplexityRegion"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant is located in a low complexity region.")))))}u.isMDXComponent=!0},36335:(t,e,n)=>{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>u,frontMatter:()=>r,metadata:()=>o,toc:()=>p});var a=n(87462),l=(n(67294),n(3905));const r={},i=void 0,o={unversionedId:"data-sources/gnomad-structural-variants-data_description",id:"data-sources/gnomad-structural-variants-data_description",title:"gnomad-structural-variants-data_description",description:"Bed Example",source:"@site/docs/data-sources/gnomad-structural-variants-data_description.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-structural-variants-data_description",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-structural-variants-data_description",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gnomad-structural-variants-data_description.md",tags:[],version:"current",frontMatter:{}},p=[{value:"Bed Example",id:"bed-example",children:[],level:4},{value:"TSV Example",id:"tsv-example",children:[],level:4},{value:"Structural Variant Type Mapping",id:"structural-variant-type-mapping",children:[],level:4}],m={toc:p},s="wrapper";function u(t){let{components:e,...n}=t;return(0,l.kt)(s,(0,a.Z)({},m,n,{components:e,mdxType:"MDXLayout"}),(0,l.kt)("h4",{id:"bed-example"},"Bed Example"),(0,l.kt)("p",null,"The bed file was obtained from original source for GRCh37"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"#chrom start end name svtype ALGORITHMS BOTHSIDES_SUPPORT CHR2 CPX_INTERVALS CPX_TYPE END2 ENDEVIDENCE HIGH_SR_BACKGROUND PCRPLUS_DEPLETED PESR_GT_OVERDISPERSION POS2 PROTEIN_CODING__COPY_GAIN PROTEIN_CODING__DUP_LOF PROTEIN_CODING__DUP_PARTIAL PROTEIN_CODING__INTERGENIC PROTEIN_CODING__INTRONIC PROTEIN_CODING__INV_SPAN PROTEIN_CODING__LOF PROTEIN_CODING__MSV_EXON_OVR PROTEIN_CODING__NEAREST_TSS PROTEIN_CODING__PROMOTER PROTEIN_CODING__UTR SOURCE STRANDS SVLEN SVTYPE UNRESOLVED_TYPE UNSTABLE_AF_PCRPLUS VARIABLE_ACROSS_BATCHES AN AC AF N_BI_GENOS N_HOMREF N_HET N_HOMALT FREQ_HOMREF FREQ_HET FREQ_HOMALT MALE_AN MALE_AC MALE_AF MALE_N_BI_GENOS MALE_N_HOMREF MALE_N_HET MALE_N_HOMALT MALE_FREQ_HOMREF MALE_FREQ_HET MALE_FREQ_HOMALT MALE_N_HEMIREF MALE_N_HEMIALT MALE_FREQ_HEMIREF MALE_FREQ_HEMIALT PAR FEMALE_AN FEMALE_AC FEMALE_AF FEMALE_N_BI_GENOS FEMALE_N_HOMREF FEMALE_N_HET FEMALE_N_HOMALT FEMALE_FREQ_HOMREF FEMALE_FREQ_HET FEMALE_FREQ_HOMALT POPMAX_AF AFR_AN AFR_AC AFR_AF AFR_N_BI_GENOS AFR_N_HOMREF AFR_N_HET AFR_N_HOMALT AFR_FREQ_HOMREF AFR_FREQ_HEAFR_FREQ_HOMALT AFR_MALE_AN AFR_MALE_AC AFR_MALE_AF AFR_MALE_N_BI_GENOS AFR_MALE_N_HOMREF AFR_MALE_N_HET AFR_MALE_N_HOMALT AFR_MALE_FREQ_HOMREF AFR_MALE_FREQ_HET AFR_MALE_FREQ_HOMALT AFR_MALE_N_HEMIREF AFR_MALE_N_HEMIALT AFR_MALE_FREQ_HEMIREF AFR_MALE_FREQ_HEMIALT AFR_FEMALE_AN AFR_FEMALE_AC AFR_FEMALE_AF AFR_FEMALE_N_BI_GENOS AFR_FEMALE_N_HOMREF AFR_FEMALE_N_HET AFR_FEMALE_N_HOMALT AFR_FEMALE_FREQ_HOMREF AFR_FEMALE_FREQ_HET AFR_FEMALE_FREQ_HOMALT AMR_AN AMR_AC AMR_AF AMR_N_BI_GENOS AMR_N_HOMREF AMR_N_HET AMR_N_HOMALT AMR_FREQ_HOMREF AMR_FREQ_HET AMR_FREQ_HOMALT AMR_MALE_AN AMR_MALE_AC AMR_MALE_AF AMR_MALE_N_BI_GENOS AMR_MALE_N_HOMREF AMR_MALE_N_HET AMR_MALE_N_HOMALT AMR_MALE_FREQ_HOMREF AMR_MALE_FREQ_HET AMR_MALE_FREQ_HOMALT AMR_MALE_N_HEMIREF AMR_MALE_N_HEMIALT AMR_MALE_FREQ_HEMIREF AMR_MALE_FREQ_HEMIALT AMR_FEMALE_AN AMR_FEMALE_AC AMR_FEMALE_AF AMR_FEMALE_N_BI_GENOS AMR_FEMALE_N_HOMREF AMR_FEMALE_N_HET AMR_FEMALE_N_HOMALT AMR_FEMALE_FREQ_HOMREF AMR_FEMALE_FREQ_HET AMR_FEMALE_FREQ_HOMALT EAS_AN EAS_AC EAS_AF EAS_N_BI_GENOS EAS_N_HOMREF EAS_N_HET EAS_N_HOMALT EAS_FREQ_HOMREF EAS_FREQ_HET EAS_FREQ_HOMALT EAS_MALE_AN EAS_MALE_AC EAS_MALE_AF EAS_MALE_N_BI_GENOS EAS_MALE_N_HOMREF EAS_MALE_N_HET EAS_MALE_N_HOMALT EAS_MALE_FREQ_HOMREF EAS_MALE_FREQ_HET EAS_MALE_FREQ_HOMALT EAS_MALE_N_HEMIREF EAS_MALE_N_HEMIALT EAS_MALE_FREQ_HEMIREF EAS_MALE_FREQ_HEMIALT EAS_FEMALE_AN EAS_FEMALE_AC EAS_FEMALE_AF EAS_FEMALE_N_BI_GENOS EAS_FEMALE_N_HOMREF EAS_FEMALE_N_HET EAS_FEMALE_N_HOMALT EAS_FEMALE_FREQ_HOMREF EAS_FEMALE_FREQ_HET EAS_FEMALE_FREQ_HOMALT EUR_AN EUR_AC EUR_AF EUR_N_BI_GENOS EUR_N_HOMREF EUR_N_HET EUR_N_HOMALT EUR_FREQ_HOMREF EUR_FREQ_HET EUR_FREQ_HOMALT EUR_MALE_AN EUR_MALE_AC EUR_MALE_AF EUR_MALE_N_BI_GENOS EUR_MALE_N_HOMREF EUR_MALE_N_HET EUR_MALE_N_HOMALT EUR_MALE_FREQ_HOMREF EUR_MALE_FREQ_HET EUR_MALE_FREQ_HOMALT EUR_MALE_N_HEMIREF EUR_MALE_N_HEMIALT EUR_MALE_FREQ_HEMIREF EUR_MALE_FREQ_HEMIALT EUR_FEMALE_AN EUR_FEMALE_AC EUR_FEMALE_AF EUR_FEMALE_N_BI_GENOS EUR_FEMALE_N_HOMREF EUR_FEMALE_N_HET EUR_FEMALE_N_HOMALT EUR_FEMALE_FREQ_HOMREF EUR_FEMALE_FREQ_HET EUR_FEMALE_FREQ_HOMALT OTH_AN OTH_AC OTH_AF OTH_N_BI_GENOS OTH_N_HOMREF OTH_N_HET OTH_N_HOMALT OTH_FREQ_HOMREF OTH_FREQ_HET OTH_FREQ_HOMALT OTH_MALE_AN OTH_MALE_AC OTH_MALE_AF OTH_MALE_N_BI_GENOS OTH_MALE_N_HOMREF OTH_MALE_N_HET OTH_MALE_N_HOMALT OTH_MALE_FREQ_HOMREF OTH_MALE_FREQ_HET OTH_MALE_FREQ_HOMALT OTH_MALE_N_HEMIREF OTH_MALE_N_HEMIALT OTH_MALE_FREQ_HEMIREF OTH_MALE_FREQ_HEMIALT OTH_FEMALE_AN OTH_FEMALE_AC OTH_FEMALE_AF OTH_FEMALE_N_BI_GENOS OTH_FEMALE_N_HOMREF OTH_FEMALE_N_HET OTH_FEMALE_N_HOMALT OTH_FEMALE_FREQ_HOMREF OTH_FEMALE_FREQ_HET OTH_FEMALE_FREQ_HOMALT FILTER\n1 10641 10642 gnomAD-SV_v2.1_BND_1_1 BND manta False 15 NA NA 10643 10643 PE,SR False False True 10642 NA NA NA False NA NA NA NA NA NA NA NA NA -1 BND SINGLE_ENDER_-- False False 21366 145 0.006785999983549118 10683 10543 135 5 0.9868950247764587 0.012636899948120117 0.00046803298755548894 10866 69 0.00634999992325902 5433 5366 65 2 0.987667977809906 0.011963900178670883 0.000368120992789045 NA NA NA NA False 10454 76 0.007269999943673615227 5154 70 3 0.9860339760780334 0.013392000459134579 0.0005739430198445916 0.015956999734044075 93972 0.007660999894142151 4699 4629 68 2 0.9851030111312866 0.014471200294792652 0.0004256220126990229 5154 33 0.006403000093996525 2577 2544 33 0 0.9871940016746521 0.012805599719285965 0.0NA NA NA NA 4232 39 0.009216000325977802 2116 2079 35 2 0.9825140237808228 0.01654059998691082 0.0009451800142414868 1910 7 0.003664999967440963 955 949 5 1 0.9937170147895813 0.00523559981957078 0.001047119963914156 950 4 0.004211000166833401 475 472 2 1 0.9936839938163757 0.00421052984893322 0.0021052600350230932 NA NA NA NA 952 3 0.0031510000117123127 476473 3 0 0.9936969876289368 0.006302520167082548 0.0 2296 31 0.013501999899744987 1148 11131 0 0.9729970097541809 0.02700350061058998 0.0 1312 13 0.009909000247716904 656 643 13 0.9801830053329468 0.01981710083782673 0.0 NA NA NA NA 976 18 0.018442999571561813 488470 18 0 0.9631149768829346 0.03688519820570946 0.0 7574 32 0.004224999807775021 3787 37528 2 0.9920780062675476 0.007393720094114542 0.0005281229969114065 3374 17 0.005038999952375889 1681671 15 1 0.9905160069465637 0.008891520090401173 0.000592768017668277 NA NA NA NA 41815 0.003587000072002411 2091 2077 13 1 0.9933050274848938 0.006217120215296745 0.00047823999193497188 3 0.015956999734044075 94 91 3 0 0.968084990978241 0.03191490098834038 0.0 76 0.026316000148653984 38 36 2 0 0.9473680257797241 0.05263160169124603 0.0 NA NA NA NA 112 1 0.008929000236093998 56 55 1 0 0.982142984867096 0.017857100814580917 0.0UNRESOLVED \n")),(0,l.kt)("h4",{id:"tsv-example"},"TSV Example"),(0,l.kt)("p",null,"The tsv was obtained from lifted over dataset created by dbVar for GRCh38"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"#variant_call_accession variant_call_id variant_call_type experiment_id sample_id sampleset_id assembly chrcontig outer_start start inner_start inner_stop stop outer_stop insertion_length variant_region_acc variant_region_id copy_number description validation zygosity origin phenotype hgvs_name placement_method placement_rank placements_per_assembly remap_alignment remap_best_within_cluster remap_coverage remap_diff_chr remap_failure_code allele_count allele_frequency allele_number\nnssv15777856 gnomAD-SV_v2.1_CNV_10_564_alt_1 copy number variation 1 1 GRCh38.p12 10 736806 738184 nsv4039284 10__782746___784124______GRCh37.p13_copy_number_variation 0 Remapped BestAvailable Single First Pass 0 1 AC=21,AFR_AC=10,AMR_AC=9,EAS_AC=0,EUR_AC=2,OTH_AC=0AF=0.038889,AFR_AF=0.044643,AMR_AF=0.03913,EAS_AF=0,EUR_AF=0.023256,OTH_AF=0 AN=540,AFR_AN=224,AMR_AN=230,EAS_AN=0,EUR_AN=86,OTH_AN=0\n")),(0,l.kt)("h4",{id:"structural-variant-type-mapping"},"Structural Variant Type Mapping"),(0,l.kt)("p",null,"The source files represented the structural variants with keys using various naming conventions.\nIn the Illumina Connected Annotations JSON output, these keys will be mapped according to the following. "),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Illumina Connected Annotations JSON SV Type Key"),(0,l.kt)("th",{parentName:"tr",align:null},"GRCh37 Source SV Type Key"),(0,l.kt)("th",{parentName:"tr",align:null},"GRCh38 Source SV Type Key"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"copy_number_variation"),(0,l.kt)("td",{parentName:"tr",align:null}),(0,l.kt)("td",{parentName:"tr",align:null},"copy number variation")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"deletion"),(0,l.kt)("td",{parentName:"tr",align:null},"DEL, CN=0"),(0,l.kt)("td",{parentName:"tr",align:null},"deletion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"duplication"),(0,l.kt)("td",{parentName:"tr",align:null},"DUP"),(0,l.kt)("td",{parentName:"tr",align:null},"duplication")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"insertion"),(0,l.kt)("td",{parentName:"tr",align:null},"INS"),(0,l.kt)("td",{parentName:"tr",align:null},"insertion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"inversion"),(0,l.kt)("td",{parentName:"tr",align:null},"INV"),(0,l.kt)("td",{parentName:"tr",align:null},"inversion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,l.kt)("td",{parentName:"tr",align:null},"INS:ME"),(0,l.kt)("td",{parentName:"tr",align:null},"mobile element insertion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,l.kt)("td",{parentName:"tr",align:null},"INS:ME:ALU"),(0,l.kt)("td",{parentName:"tr",align:null},"alu insertion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,l.kt)("td",{parentName:"tr",align:null},"INS:ME:LINE1"),(0,l.kt)("td",{parentName:"tr",align:null},"line1 insertion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,l.kt)("td",{parentName:"tr",align:null},"INS:ME:SVA"),(0,l.kt)("td",{parentName:"tr",align:null},"sva insertion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"structural alteration"),(0,l.kt)("td",{parentName:"tr",align:null}),(0,l.kt)("td",{parentName:"tr",align:null},"sequence alteration")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"complex_structural_alteration"),(0,l.kt)("td",{parentName:"tr",align:null},"CPX"),(0,l.kt)("td",{parentName:"tr",align:null})))))}u.isMDXComponent=!0},90818:(t,e,n)=>{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>u,frontMatter:()=>r,metadata:()=>o,toc:()=>p});var a=n(87462),l=(n(67294),n(3905));const r={},i=void 0,o={unversionedId:"data-sources/gnomad-structural-variants-json",id:"data-sources/gnomad-structural-variants-json",title:"gnomad-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gnomad-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gnomad-structural-variants-json.md",tags:[],version:"current",frontMatter:{}},p=[],m={toc:p},s="wrapper";function u(t){let{components:e,...n}=t;return(0,l.kt)(s,(0,a.Z)({},m,n,{components:e,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD-preview": [\n {\n "chromosome": "1",\n "begin": 40001,\n "end": 47200,\n "variantId": "gnomAD-SV_v2.1_DUP_1_1",\n "variantType": "duplication",\n "failedFilter": true,\n "allAf": 0.068963,\n "afrAf": 0.135694,\n "amrAf": 0.022876,\n "easAf": 0.01101,\n "eurAf": 0.007846,\n "othAf": 0.017544,\n "femaleAf": 0.065288,\n "maleAf": 0.07255,\n "allAc": 943,\n "afrAc": 866,\n "amrAc": 21,\n "easAc": 17,\n "eurAc": 37,\n "othAc": 2,\n "femaleAc": 442,\n "maleAc": 499,\n "allAn": 13674,\n "afrAn": 6382,\n "amrAn": 918,\n "easAn": 1544,\n "eurAn": 4716,\n "othAn": 114,\n "femaleAn": 6770,\n "maleAn": 6878,\n "allHc": 91,\n "afrHc": 90,\n "amrHc": 1,\n "easHc": 0,\n "eurHc": 0,\n "othHc": 55,\n "femaleHc": 44,\n "maleHc": 47,\n "reciprocalOverlap": 0.01839,\n "annotationOverlap": 0.16667\n }\n]\n\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,l.kt)("td",{parentName:"tr",align:null},"string"),(0,l.kt)("td",{parentName:"tr",align:null},"chromosome number")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"begin"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"position interval start")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"end"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"position internal end")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"variantType"),(0,l.kt)("td",{parentName:"tr",align:null},"string"),(0,l.kt)("td",{parentName:"tr",align:null},"structural variant type")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"variantId"),(0,l.kt)("td",{parentName:"tr",align:null},"string"),(0,l.kt)("td",{parentName:"tr",align:null},"gnomAD ID")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for all other populations. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for all populations.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the African super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Ad Mixed American super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"eurAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the European super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for all other populations.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for male population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for female population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for all populations.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the African super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Ad Mixed American super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"eurAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the European super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for all other populations.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for female population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for male population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the African / African American population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Latino population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the East Asian population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"eurAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the European super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all other populations.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,l.kt)("td",{parentName:"tr",align:null},"boolean"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"Reciprocal overlap. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"Reciprocal overlap. Range: 0 - 1.0")))),(0,l.kt)("p",null,(0,l.kt)("strong",{parentName:"p"},"Note:")," Following fields are not available in ",(0,l.kt)("em",{parentName:"p"},"GRCh38")," because the source file does not contain this information:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAf")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAf")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAn")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAn")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"eurAc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"failedFilter")))))}u.isMDXComponent=!0},61106:(t,e,n)=>{n.r(e),n.d(e,{contentTitle:()=>s,default:()=>c,frontMatter:()=>m,metadata:()=>u,toc:()=>d});var a=n(87462),l=(n(67294),n(3905)),r=n(73827),i=n(74859),o=n(90818),p=n(36335);const m={title:"gnomAD"},s=void 0,u={unversionedId:"data-sources/gnomad",id:"data-sources/gnomad",title:"gnomAD",description:"Overview",source:"@site/docs/data-sources/gnomad.mdx",sourceDirName:"data-sources",slug:"/data-sources/gnomad",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gnomad.mdx",tags:[],version:"current",frontMatter:{title:"gnomAD"},sidebar:"docs",previous:{title:"GME Variome",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gme"},next:{title:"Mitochondrial Heteroplasmy",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mito-heteroplasmy"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF extraction",id:"vcf-extraction",children:[],level:3},{value:"Computation",id:"computation",children:[],level:3},{value:"Merging genomes and exomes",id:"merging-genomes-and-exomes",children:[],level:3},{value:"Filters",id:"filters",children:[],level:3},{value:"VCF download instructions",id:"vcf-download-instructions",children:[],level:3},{value:"JSON output",id:"json-output",children:[],level:3},{value:"Building the supplementary files",id:"building-the-supplementary-files",children:[{value:"Source data files",id:"source-data-files",children:[],level:4}],level:3}],level:2},{value:"LoF Gene Metrics",id:"lof-gene-metrics",children:[{value:"Tab delimited file example",id:"tab-delimited-file-example",children:[],level:3},{value:"JSON key to TSV column mapping",id:"json-key-to-tsv-column-mapping",children:[],level:3},{value:"Gene symbol update",id:"gene-symbol-update",children:[],level:3},{value:"Conflict resolution",id:"conflict-resolution",children:[],level:3},{value:"Download URL",id:"download-url",children:[],level:3},{value:"JSON output",id:"json-output-1",children:[],level:3}],level:2},{value:"Structural Variants",id:"structural-variants",children:[{value:"Source Files",id:"source-files",children:[],level:3},{value:"Download URLs",id:"download-urls",children:[{value:"GRCh37",id:"grch37",children:[],level:4},{value:"GRCh38",id:"grch38",children:[],level:4},{value:"Download URL",id:"download-url-1",children:[],level:4}],level:3},{value:"JSON output",id:"json-output-2",children:[],level:3}],level:2}],N={toc:d},g="wrapper";function c(t){let{components:e,...n}=t;return(0,l.kt)(g,(0,a.Z)({},N,n,{components:e,mdxType:"MDXLayout"}),(0,l.kt)("h2",{id:"overview"},"Overview"),(0,l.kt)("p",null,"The Genome Aggregation Database (",(0,l.kt)("a",{parentName:"p",href:"https://gnomad.broadinstitute.org/"},"gnomAD"),") is a resource developed by an international coalition of investigators, with the goal of aggregating and harmonizing both exome and genome sequencing data from a wide variety of large-scale sequencing projects, and making summary data available for the wider scientific community."),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Koch, L., 2020. Exploring human genomic diversity with gnomAD. ",(0,l.kt)("em",{parentName:"p"},"Nature Reviews Genetics"),", ",(0,l.kt)("strong",{parentName:"p"},"21(8)"),", pp.448-448."))),(0,l.kt)("h2",{id:"small-variants"},"Small Variants"),(0,l.kt)("h3",{id:"vcf-extraction"},"VCF extraction"),(0,l.kt)("p",null,"We currently extract the following info fields from gnomAD genome and exome VCF files:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},'##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n')),(0,l.kt)("p",null,"We also extract the following extra fields from gnomAD exome VCF file:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},'##INFO=\n##INFO=\n##INFO=\n')),(0,l.kt)("h3",{id:"computation"},"Computation"),(0,l.kt)("p",null,"Using these, we compute the following:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"Coverage"),(0,l.kt)("li",{parentName:"ul"},"Allele count, Homozygous count, allele number and allele frequencies for:"),(0,l.kt)("li",{parentName:"ul"},"Global population"),(0,l.kt)("li",{parentName:"ul"},"African/African Americans"),(0,l.kt)("li",{parentName:"ul"},"Admixed Americans"),(0,l.kt)("li",{parentName:"ul"},"Ashkenazi Jews"),(0,l.kt)("li",{parentName:"ul"},"East Asians"),(0,l.kt)("li",{parentName:"ul"},"Finnish"),(0,l.kt)("li",{parentName:"ul"},"Non-Finnish Europeans"),(0,l.kt)("li",{parentName:"ul"},"South Asian"),(0,l.kt)("li",{parentName:"ul"},"Others (population not assigned)"),(0,l.kt)("li",{parentName:"ul"},"Male"),(0,l.kt)("li",{parentName:"ul"},"Female"),(0,l.kt)("li",{parentName:"ul"},"Controls")),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Note")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("ul",{parentName:"div"},(0,l.kt)("li",{parentName:"ul"},"Coverage = DP / AN. Frequencies are computed using AC/AN for each population."),(0,l.kt)("li",{parentName:"ul"},"Please note that currently there is no genome sequencing data of south asian (SAS) population available in gnomAD."),(0,l.kt)("li",{parentName:"ul"},"Allele Count, Homozygous count, allele number and allele frequencies for control groups are also provided for the global population.")))),(0,l.kt)("h3",{id:"merging-genomes-and-exomes"},"Merging genomes and exomes"),(0,l.kt)("p",null,"When merging the genomes and exomes, the allele counts and allele numbers will be summed across both of the data sets."),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("ul",{parentName:"div"},(0,l.kt)("li",{parentName:"ul"},"For GRCh37, Illumina Connected Annotations currently uses gnomAD version 2.1 which contains both genomes and exomes data. Genomes and exomes data are merged in the output."),(0,l.kt)("li",{parentName:"ul"},"For GRCh38, Illumina Connected Annotations currently uses gnomAD version 3.0 which doesn't contain the exomes data. Therefore, only genomes data are presented in the output.")))),(0,l.kt)("h3",{id:"filters"},"Filters"),(0,l.kt)("p",null,"The following strategy will be used when there's a conflict in filter status:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"center"}),(0,l.kt)("th",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"th"},"Genomes PASS")),(0,l.kt)("th",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"th"},"Genomes Filtered")))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"td"},"Exomes PASS")),(0,l.kt)("td",{parentName:"tr",align:"center"},"PASS"),(0,l.kt)("td",{parentName:"tr",align:"center"},"Only use exome data")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"td"},"Exomes Filtered")),(0,l.kt)("td",{parentName:"tr",align:"center"},"Only use genome data"),(0,l.kt)("td",{parentName:"tr",align:"center"},"Filtered")))),(0,l.kt)("h3",{id:"vcf-download-instructions"},"VCF download instructions"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://gnomad.broadinstitute.org/downloads"},"https://gnomad.broadinstitute.org/downloads")),(0,l.kt)("h3",{id:"json-output"},"JSON output"),(0,l.kt)(r.default,{mdxType:"JSONV"}),(0,l.kt)("h3",{id:"building-the-supplementary-files"},"Building the supplementary files"),(0,l.kt)("p",null,"The gnomAD ",(0,l.kt)("inlineCode",{parentName:"p"},".nsa")," for Illumina Connected Annotations can be built using the ",(0,l.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's ",(0,l.kt)("inlineCode",{parentName:"p"},"gnomad")," subcommand. We will describe building gnomAD version 3.1 here."),(0,l.kt)("h4",{id:"source-data-files"},"Source data files"),(0,l.kt)("p",null,"Input VCF files (one per chromosome) and a ",(0,l.kt)("inlineCode",{parentName:"p"},".version")," file are required in a folder to build the ",(0,l.kt)("inlineCode",{parentName:"p"},".nsa")," file. For example, my directory contains:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr10.vcf.bgz chr22.vcf.bgz\nchr11.vcf.bgz chr2.vcf.bgz\nchr12.vcf.bgz chr3.vcf.bgz\nchr13.vcf.bgz chr4.vcf.bgz\nchr14.vcf.bgz chr5.vcf.bgz\nchr15.vcf.bgz chr6.vcf.bgz\nchr16.vcf.bgz chr7.vcf.bgz\nchr17.vcf.bgz chr8.vcf.bgz\nchr18.vcf.bgz chr9.vcf.bgz\nchr19.vcf.bgz chrM.vcf.bgz\nchr1.vcf.bgz chrX.vcf.bgz\nchr20.vcf.bgz chrY.vcf.bgz\nchr21.vcf.bgz gnomad.r3.1.version\n")),(0,l.kt)("p",null,"The version file is a text file with the following content."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"NAME=gnomAD\nVERSION=3.1\nDATE=2020-10-29\nDESCRIPTION=Allele frequencies from Genome Aggregation Database (gnomAD)\n")),(0,l.kt)("p",null,"The help menu for the utility is as follows:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"SAUtils.dll gnomad\n---------------------------------------------------------------------------\nSAUtils (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.17.0\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll gnomad [options]\nReads provided supplementary data files and populates tsv files\n\nOPTIONS:\n --ref, -r compressed reference sequence file\n --genome, -g input directory containing VCF (and .version)\n files with genomic frequencies\n --exome, -e input directory containing VCF (and .version)\n files with exomic frequencies\n --temp, -t output temp directory for intermediate (per chrom)\n NSA files\n --out, -o output directory for NSA file\n --help, -h displays the help menu\n --version, -v displays the version\n")),(0,l.kt)("p",null,"Here is a sample execution:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet SAUtils.dll Gnomad \\\\\n--ref ~/References/7/Homo_sapiens.GRCh38.Nirvana.dat --genome genomes/ \\\\\n--out ~/SupplementaryDatabase/63/GRCh38 --temp ~/ExternalDataSources/gnomAD/3.1/GRCh38/temp\n")),(0,l.kt)("h2",{id:"lof-gene-metrics"},"LoF Gene Metrics"),(0,l.kt)("h3",{id:"tab-delimited-file-example"},"Tab delimited file example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"gene transcript obs_mis exp_mis oe_mis mu_mis possible_mis obs_mis_pphen exp_mis_pphen oe_mis_pphen possible_mis_pphen obs_syn exp_syn oe_syn mu_syn possible_syn obs_lof mu_lof possible_lof exp_lof pLI pNull pRec oe_lof oe_syn_lower oe_syn_upper oe_mis_lower oe_mis_upper oe_lof_lower oe_lof_upper constraint_flag syn_zmis_z lof_z oe_lof_upper_rank oe_lof_upper_bin oe_lof_upper_bin_6 n_sites classic_caf max_af no_lofs obs_het_lof obs_hom_lof defined p exp_hom_lof classic_caf_afr classic_caf_amr classic_caf_asj classic_caf_eas classic_caf_fin classic_caf_nfe classic_caf_oth classic_caf_sas p_afr p_amr p_asj p_eas p_fin p_nfep_oth p_sas transcript_type gene_id transcript_level cds_length num_coding_exons gene_type gene_length exac_pLI exac_obs_lof exac_exp_lof exac_oe_lof brain_expression chromosome start_positionend_position\nMED13 ENST00000397786 871 1.1178e+03 7.7921e-01 5.5598e-05 14195 314 5.2975e+02 5.9273e-01 6708 422 3.8753e+02 1.0890e+00 1.9097e-05 4248 0 4.9203e-06 1257 9.8429e+01 1.0000e+00 8.9436e-40 1.8383e-16 0.0000e+00 1.0050e+00 1.1800e+00 7.3600e-01 8.2400e-01 0.0000e+00 3.0000e-02 -1.3765e+00 2.6232e+00 9.1935e+00 0 0 0 2 1.2058e-05 8.0492e-06 124782 3 0 124785 1.2021e-05 1.8031e-05 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 9.2812e-05 8.8571e-06 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 9.2760e-05 8.8276e-06 0.0000e+00 0.0000e+00 protein_coding ENSG00000108510 2 6522 30 protein_coding 122678 1.0000e+00 0 6.4393e+01 0.0000e+00 NA 17 60019966 60142643\n")),(0,l.kt)("h3",{id:"json-key-to-tsv-column-mapping"},"JSON key to TSV column mapping"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"JSON key"),(0,l.kt)("th",{parentName:"tr",align:null},"TSV column"),(0,l.kt)("th",{parentName:"tr",align:null},"Description"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pLi"),(0,l.kt)("td",{parentName:"tr",align:null},"pLI"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pNull"),(0,l.kt)("td",{parentName:"tr",align:null},"pNull"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pRec"),(0,l.kt)("td",{parentName:"tr",align:null},"pRec"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"synZ"),(0,l.kt)("td",{parentName:"tr",align:null},"syn_z"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"misZ"),(0,l.kt)("td",{parentName:"tr",align:null},"mis_z"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,l.kt)("td",{parentName:"tr",align:null},"oe_lof_upper"),(0,l.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))),(0,l.kt)("h3",{id:"gene-symbol-update"},"Gene symbol update"),(0,l.kt)("p",null,"The input file provides Ensembl gene ids for each entry. We observed that they were unique while gene symbols may be repeated (multiple lines may have the same gene symbol). Since Ensembl gene Ids are more stable, and Illumina Connected Annotations transcript cache data contains Ensembl gene ids, we use these ids to extract the gene symbols from the transcript cache. For example, if ENSG0001 has gene symbol GENE1 in the input but Illumina Connected Annotations cache say ENSG0001 maps to GENE2, we use GENE2 as the gene symbol for that entry."),(0,l.kt)("h3",{id:"conflict-resolution"},"Conflict resolution"),(0,l.kt)("p",null,"gnomAD uses Ensembl GeneID as unique identifiers in the ",(0,l.kt)("a",{parentName:"p",href:"https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz"},"source file")," but Illumina Connected Annotations uses HGNC gene symbols. Multiple Ensembl GeneIDs can map to the same HGNC symbol and therefore may result is conflict."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"MDGA2 ENST00000426342 306 4.0043e+02 7.6419e-01 2.1096e-05 4724 78 1.6525e+02 4.7202e-01 1923 125 1.3737e+02 9.0993e-01 7.1973e-06 1413 4 2.0926e-06 453 3.8316e+01 9.9922e-01 8.6490e-12 7.8128e-04 1.0440e-01 7.8600e-01 1.0560e+00 6.9500e-01 8.4000e-01 5.0000e-02 2.3900e-01 8.2988e-01 1.6769e+00 5.1372e+00 1529 0 0 7 2.8103e-05 4.0317e-06 124784 7 0 124791 2.8047e-05 9.8167e-05 0.0000e+00 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 3.5391e-05 1.6672e-04 3.2680e-05 0.0000e+00 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 3.5308e-05 1.6492e-04 3.2678e-05 protein_coding ENSG00000139915 2 2181 13 protein_coding 835332 9.9322e-01 3 2.7833e+01 1.0779e-01 NA 14 47308826 48144157\nMDGA2 ENST00000439988 438 5.5311e+02 7.9189e-01 2.9490e-05 6608 105 2.0496e+02 5.1228e-01 2386 180 1.9491e+02 9.2351e-01 9.8371e-06 2048 11 2.8074e-06 627 5.1882e+01 6.6457e-01 5.5841e-10 3.3543e-01 2.1202e-01 8.1700e-01 1.0450e+00 7.3100e-01 8.5700e-01 1.3200e-01 3.5100e-01 8.3940e-01 1.7393e+00 5.2595e+00 2989 1 0 9 3.6173e-05 4.0463e-06 124782 9 0 124791 3.6061e-05 1.6228e-04 6.4986e-05 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 4.4275e-05 1.6672e-04 3.2680e-05 6.4577e-05 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 4.4135e-05 1.6492e-04 3.2678e-05 protein_coding ENSG00000272781 3 3075 17 protein_coding 832866 NA NA NA NA NA 14 47311134 48143999\n")),(0,l.kt)("p",null,'In such cases, Illumina Connected Annotations chooses the entry with the smallest "LOEUF" value. The reason for choosing this value can be highlighted by the following table:'),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"right"},"LOEUF decile"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Haplo-insufficient"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Autosomal Dominant"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Autosomal Recessive"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Olfactory Genes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"0-10%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"104"),(0,l.kt)("td",{parentName:"tr",align:"right"},"140"),(0,l.kt)("td",{parentName:"tr",align:"right"},"36"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"10-20%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"47"),(0,l.kt)("td",{parentName:"tr",align:"right"},"128"),(0,l.kt)("td",{parentName:"tr",align:"right"},"72"),(0,l.kt)("td",{parentName:"tr",align:"right"},"1")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"20-30%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"17"),(0,l.kt)("td",{parentName:"tr",align:"right"},"86"),(0,l.kt)("td",{parentName:"tr",align:"right"},"112"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"30-40%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"8"),(0,l.kt)("td",{parentName:"tr",align:"right"},"80"),(0,l.kt)("td",{parentName:"tr",align:"right"},"173"),(0,l.kt)("td",{parentName:"tr",align:"right"},"4")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"40-50%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"7"),(0,l.kt)("td",{parentName:"tr",align:"right"},"65"),(0,l.kt)("td",{parentName:"tr",align:"right"},"206"),(0,l.kt)("td",{parentName:"tr",align:"right"},"8")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"50-60%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"4"),(0,l.kt)("td",{parentName:"tr",align:"right"},"54"),(0,l.kt)("td",{parentName:"tr",align:"right"},"207"),(0,l.kt)("td",{parentName:"tr",align:"right"},"6")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"60-70%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0"),(0,l.kt)("td",{parentName:"tr",align:"right"},"46"),(0,l.kt)("td",{parentName:"tr",align:"right"},"154"),(0,l.kt)("td",{parentName:"tr",align:"right"},"18")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"70-80%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"2"),(0,l.kt)("td",{parentName:"tr",align:"right"},"49"),(0,l.kt)("td",{parentName:"tr",align:"right"},"120"),(0,l.kt)("td",{parentName:"tr",align:"right"},"49")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"80-90%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0"),(0,l.kt)("td",{parentName:"tr",align:"right"},"34"),(0,l.kt)("td",{parentName:"tr",align:"right"},"58"),(0,l.kt)("td",{parentName:"tr",align:"right"},"96")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"90-100%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0"),(0,l.kt)("td",{parentName:"tr",align:"right"},"26"),(0,l.kt)("td",{parentName:"tr",align:"right"},"40"),(0,l.kt)("td",{parentName:"tr",align:"right"},"174")))),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Note")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("ul",{parentName:"div"},(0,l.kt)("li",{parentName:"ul"},"Table source: ",(0,l.kt)("a",{parentName:"li",href:"https://www.biorxiv.org/content/biorxiv/early/2019/01/28/531210.full-text.pdf"},"https://www.biorxiv.org/content/biorxiv/early/2019/01/28/531210.full-text.pdf")),(0,l.kt)("li",{parentName:"ul"},"This table indicates that lower LOEUF scores have more deleterious effect on genes."),(0,l.kt)("li",{parentName:"ul"},"Only 15 out of 19685 genes have conflicting entries.")))),(0,l.kt)("p",null,(0,l.kt)("strong",{parentName:"p"},"List of genes with conflicting entries")),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},'MDGA2:\n {"pLI":9.99e-1,"pRec":7.81e-4,"pNull":8.65e-12,"synZ":8.30e-1,"misZ":1.68e0,"loeuf":2.39e-1}\n {"pLI":6.65e-1,"pRec":3.35e-1,"pNull":5.58e-10,"synZ":8.39e-1,"misZ":1.74e0,"loeuf":3.51e-1}\nCRYBG3:\n {"pLI":9.27e-5,"pRec":1.00e0,"pNull":1.88e-7,"synZ":1.82e0,"misZ":4.68e-1,"loeuf":4.93e-1}\n {"pLI":2.69e-4,"pRec":1.00e0,"pNull":1.20e-4,"synZ":2.63e0,"misZ":9.80e-1,"loeuf":5.98e-1}\nCHTF8:\n {"pLI":8.29e-1,"pRec":1.67e-1,"pNull":3.21e-3,"synZ":1.94e0,"misZ":9.48e-1,"loeuf":5.13e-1}\n {"pLI":3.73e-1,"pRec":5.84e-1,"pNull":4.29e-2,"synZ":3.33e-1,"misZ":2.91e-1,"loeuf":9.92e-1}\nSEPT1:\n {"pLI":6.77e-8,"pRec":8.90e-1,"pNull":1.10e-1,"synZ":1.58e-1,"misZ":1.57e0,"loeuf":9.68e-1}\n {"pLI":1.96e-8,"pRec":6.71e-1,"pNull":3.29e-1,"synZ":1.68e-1,"misZ":1.41e0,"loeuf":1.08e0}\nARL14EPL:\n {"pLI":3.48e-2,"pRec":8.38e-1,"pNull":1.28e-1,"synZ":3.56e-1,"misZ":-1.87e-1,"loeuf":1.23e0}\n {"pLI":3.23e-2,"pRec":8.29e-1,"pNull":1.38e-1,"synZ":1.15e0,"misZ":-4.05e-1,"loeuf":1.26e0}\nUGT2A1:\n {"pLI":2.90e-13,"pRec":1.40e-1,"pNull":8.60e-1,"synZ":-1.29e0,"misZ":-1.77e0,"loeuf":1.18e0}\n {"pLI":3.88e-17,"pRec":2.87e-3,"pNull":9.97e-1,"synZ":-8.00e-1,"misZ":-1.40e0,"loeuf":1.53e0}\nLTB4R2:\n {"pLI":4.39e-4,"pRec":6.71e-1,"pNull":3.29e-1,"synZ":-5.24e-1,"misZ":-2.96e-1,"loeuf":1.40e0}\n {"pLI":1.38e-5,"pRec":4.12e-1,"pNull":5.88e-1,"synZ":-4.58e-1,"misZ":-2.02e-1,"loeuf":1.54e0}\nCDRT1:\n {"pLI":4.98e-14,"pRec":5.31e-1,"pNull":4.69e-1,"synZ":8.18e-1,"misZ":6.57e-1,"loeuf":1.00e0}\n {"pLI":3.50e-3,"pRec":6.37e-1,"pNull":3.59e-1,"synZ":4.89e-1,"misZ":6.90e-1,"loeuf":1.63e0}\nMUC3A:\n {"pLI":1.48e-10,"pRec":5.76e-1,"pNull":4.24e-1,"synZ":5.81e-2,"misZ":-6.01e-1,"loeuf":1.06e0}\n {"pLI":4.03e-1,"pRec":4.79e-1,"pNull":1.17e-1,"synZ":4.05e-2,"misZ":-1.60e-1,"loeuf":1.70e0}\nCOG8:\n {"pLI":2.97e-9,"pRec":5.04e-1,"pNull":4.96e-1,"synZ":-1.35e0,"misZ":-9.37e-2,"loeuf":1.13e0}\n {"pLI":2.31e-3,"pRec":5.47e-1,"pNull":4.50e-1,"synZ":-4.94e-1,"misZ":-1.48e-1,"loeuf":1.76e0}\nAC006486.1:\n {"pLI":9.37e-1,"pRec":6.27e-2,"pNull":2.47e-4,"synZ":1.44e0,"misZ":2.12e0,"loeuf":3.41e-1}\n {"pLI":1.14e-1,"pRec":6.16e-1,"pNull":2.70e-1,"synZ":-7.57e-2,"misZ":8.33e-2,"loeuf":1.84e0}\nAL645922.1:\n {"pLI":4.67e-16,"pRec":1.00e0,"pNull":4.15e-5,"synZ":7.99e-1,"misZ":1.61e0,"loeuf":6.92e-1}\n {"pLI":1.60e-3,"pRec":2.78e-1,"pNull":7.21e-1,"synZ":-7.30e-2,"misZ":3.21e-1,"loeuf":1.96e0}\nNBPF20:\n {"pLI":1.42e-7,"pRec":3.40e-2,"pNull":9.66e-1,"synZ":-1.86e0,"misZ":-2.88e0,"loeuf":1.97e0}\n {"pLI":1.92e-22,"pRec":7.96e-6,"pNull":1.00e0,"synZ":-9.73e0,"misZ":-7.67e0,"loeuf":1.97e0}\nPRAMEF11:\n {"pLI":6.16e-4,"pRec":7.42e-1,"pNull":2.58e-1,"synZ":-4.02e0,"misZ":-3.69e0,"loeuf":1.31e0}\n {"synZ":-3.33e0,"misZ":-2.59e0}\nFAM231D:\n {"synZ":-1.98e0,"misZ":-1.44e0}\n {"synZ":1.07e0,"misZ":3.13e-1}\n')),(0,l.kt)("p",null,(0,l.kt)("strong",{parentName:"p"},"Conflict resolution")),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"Pick the entry with the lowest LOEUF score"),(0,l.kt)("li",{parentName:"ul"},"If the same, pick the lowest pLI"),(0,l.kt)("li",{parentName:"ul"},"Otherwise pick the entry with the max absolute value of synZ + misZ")),(0,l.kt)("h3",{id:"download-url"},"Download URL"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz"},"https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz")),(0,l.kt)("h3",{id:"json-output-1"},"JSON output"),(0,l.kt)(i.default,{mdxType:"JSONG"}),(0,l.kt)("h2",{id:"structural-variants"},"Structural Variants"),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Collins, R.L., Brand, H., Karczewski, K.J. et al. 2020. A structural variation reference for medical and population genetics. ",(0,l.kt)("em",{parentName:"p"},"Nature")," ",(0,l.kt)("strong",{parentName:"p"},"581"),", pp.444\u2013451. ",(0,l.kt)("a",{parentName:"p",href:"https://doi.org/10.1038/s41586-020-2287-8"},"https://doi.org/10.1038/s41586-020-2287-8")))),(0,l.kt)("p",null,(0,l.kt)("strong",{parentName:"p"},"Note"),"\nThe gnomAD structural variant annotations are in a preview stage at the moment.\nCurrently, the annotations do not include translocation breakends.\nFuture updates will include a better way of annotating the structural variants."),(0,l.kt)("h3",{id:"source-files"},"Source Files"),(0,l.kt)(p.default,{mdxType:"SVDATADESCRIPTION"}),(0,l.kt)("h3",{id:"download-urls"},"Download URLs"),(0,l.kt)("h4",{id:"grch37"},"GRCh37"),(0,l.kt)("p",null,"The GRCh37 file was downloaded from the original source. Following table gives some essential data metrics:"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://storage.googleapis.com/gcp-public-data--gnomad/papers/2019-sv/gnomad_v2.1_sv.sites.bed.gz"},"https://storage.googleapis.com/gcp-public-data--gnomad/papers/2019-sv/gnomad_v2.1_sv.sites.bed.gz")),(0,l.kt)("h4",{id:"grch38"},"GRCh38"),(0,l.kt)("p",null,"Note: The data was unavailable from gnomAD 2.1 original source, however the lifted over structural variant dataset was created by dbVar and was obtained from them ",(0,l.kt)("a",{parentName:"p",href:"https://www.ncbi.nlm.nih.gov/sites/dbvarapp/studies/nstd166/"},"https://www.ncbi.nlm.nih.gov/sites/dbvarapp/studies/nstd166/"),"."),(0,l.kt)("h4",{id:"download-url-1"},"Download URL"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/tsv/nstd166.GRCh38.variant_call.tsv.gz"},"https://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/tsv/nstd166.GRCh38.variant_call.tsv.gz")),(0,l.kt)("h3",{id:"json-output-2"},"JSON output"),(0,l.kt)(o.default,{mdxType:"JSONSV"}))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/98bbf06c.9e455330.js b/assets/js/98bbf06c.9e455330.js new file mode 100644 index 00000000..b50a5635 --- /dev/null +++ b/assets/js/98bbf06c.9e455330.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4858,9082,7860,4105,3805],{3905:(t,e,n)=>{n.d(e,{Zo:()=>s,kt:()=>g});var a=n(7294);function l(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function r(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function i(t){for(var e=1;e=0||(l[n]=t[n]);return l}(t,e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(l[n]=t[n])}return l}var p=a.createContext({}),m=function(t){var e=a.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},s=function(t){var e=m(t.components);return a.createElement(p.Provider,{value:e},t.children)},u="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},N=a.forwardRef((function(t,e){var n=t.components,l=t.mdxType,r=t.originalType,p=t.parentName,s=o(t,["components","mdxType","originalType","parentName"]),u=m(n),N=l,g=u["".concat(p,".").concat(N)]||u[N]||d[N]||r;return n?a.createElement(g,i(i({ref:e},s),{},{components:n})):a.createElement(g,i({ref:e},s))}));function g(t,e){var n=arguments,l=e&&e.mdxType;if("string"==typeof t||l){var r=n.length,i=new Array(r);i[0]=N;var o={};for(var p in e)hasOwnProperty.call(e,p)&&(o[p]=e[p]);o.originalType=t,o[u]="string"==typeof t?t:l,i[1]=o;for(var m=2;m{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>u,frontMatter:()=>r,metadata:()=>o,toc:()=>p});var a=n(7462),l=(n(7294),n(3905));const r={},i=void 0,o={unversionedId:"data-sources/gnomad-lof-json",id:"data-sources/gnomad-lof-json",title:"gnomad-lof-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gnomad-lof-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-lof-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-lof-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gnomad-lof-json.md",tags:[],version:"current",frontMatter:{}},p=[],m={toc:p},s="wrapper";function u(t){let{components:e,...n}=t;return(0,l.kt)(s,(0,a.Z)({},m,n,{components:e,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD":{ \n "pLi":1.00e0,\n "pNull":8.94e-40,\n "pRec":1.84e-16,\n "synZ":-8.44e-2,\n "misZ":5.96e-1,\n "loeuf":1.13e0\n}\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pLi"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pNull"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pRec"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"synZ"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"misZ"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))))}u.isMDXComponent=!0},3827:(t,e,n)=>{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>u,frontMatter:()=>r,metadata:()=>o,toc:()=>p});var a=n(7462),l=(n(7294),n(3905));const r={},i=void 0,o={unversionedId:"data-sources/gnomad-small-variants-json",id:"data-sources/gnomad-small-variants-json",title:"gnomad-small-variants-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gnomad-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gnomad-small-variants-json.md",tags:[],version:"current",frontMatter:{}},p=[],m={toc:p},s="wrapper";function u(t){let{components:e,...n}=t;return(0,l.kt)(s,(0,a.Z)({},m,n,{components:e,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomad":{ \n "coverage":20,\n "allAf":0.190317,\n "maleAf":0.193,\n "femaleAf": 0.1935,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "maleAn":15096,\n "femaleAn":15700\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "maleAc":2930,\n "femaleAc": 2931,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "maleHc":280,\n "femaleHc":281,\n "controlsAllAf":0.190317,\n "controlsAllAn":30796,\n "controlsAllAc":5861,\n "lowComplexityRegion":true,\n "failedFilter":true\n}\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"coverage"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the controls subset. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for male population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for female population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the controls subset. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for male population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for female population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the controls subset. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the South Asian population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the South Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the South Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"lowComplexityRegion"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant is located in a low complexity region.")))))}u.isMDXComponent=!0},6335:(t,e,n)=>{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>u,frontMatter:()=>r,metadata:()=>o,toc:()=>p});var a=n(7462),l=(n(7294),n(3905));const r={},i=void 0,o={unversionedId:"data-sources/gnomad-structural-variants-data_description",id:"data-sources/gnomad-structural-variants-data_description",title:"gnomad-structural-variants-data_description",description:"Bed Example",source:"@site/docs/data-sources/gnomad-structural-variants-data_description.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-structural-variants-data_description",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-structural-variants-data_description",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gnomad-structural-variants-data_description.md",tags:[],version:"current",frontMatter:{}},p=[{value:"Bed Example",id:"bed-example",children:[],level:4},{value:"TSV Example",id:"tsv-example",children:[],level:4},{value:"Structural Variant Type Mapping",id:"structural-variant-type-mapping",children:[],level:4}],m={toc:p},s="wrapper";function u(t){let{components:e,...n}=t;return(0,l.kt)(s,(0,a.Z)({},m,n,{components:e,mdxType:"MDXLayout"}),(0,l.kt)("h4",{id:"bed-example"},"Bed Example"),(0,l.kt)("p",null,"The bed file was obtained from original source for GRCh37"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"#chrom start end name svtype ALGORITHMS BOTHSIDES_SUPPORT CHR2 CPX_INTERVALS CPX_TYPE END2 ENDEVIDENCE HIGH_SR_BACKGROUND PCRPLUS_DEPLETED PESR_GT_OVERDISPERSION POS2 PROTEIN_CODING__COPY_GAIN PROTEIN_CODING__DUP_LOF PROTEIN_CODING__DUP_PARTIAL PROTEIN_CODING__INTERGENIC PROTEIN_CODING__INTRONIC PROTEIN_CODING__INV_SPAN PROTEIN_CODING__LOF PROTEIN_CODING__MSV_EXON_OVR PROTEIN_CODING__NEAREST_TSS PROTEIN_CODING__PROMOTER PROTEIN_CODING__UTR SOURCE STRANDS SVLEN SVTYPE UNRESOLVED_TYPE UNSTABLE_AF_PCRPLUS VARIABLE_ACROSS_BATCHES AN AC AF N_BI_GENOS N_HOMREF N_HET N_HOMALT FREQ_HOMREF FREQ_HET FREQ_HOMALT MALE_AN MALE_AC MALE_AF MALE_N_BI_GENOS MALE_N_HOMREF MALE_N_HET MALE_N_HOMALT MALE_FREQ_HOMREF MALE_FREQ_HET MALE_FREQ_HOMALT MALE_N_HEMIREF MALE_N_HEMIALT MALE_FREQ_HEMIREF MALE_FREQ_HEMIALT PAR FEMALE_AN FEMALE_AC FEMALE_AF FEMALE_N_BI_GENOS FEMALE_N_HOMREF FEMALE_N_HET FEMALE_N_HOMALT FEMALE_FREQ_HOMREF FEMALE_FREQ_HET FEMALE_FREQ_HOMALT POPMAX_AF AFR_AN AFR_AC AFR_AF AFR_N_BI_GENOS AFR_N_HOMREF AFR_N_HET AFR_N_HOMALT AFR_FREQ_HOMREF AFR_FREQ_HEAFR_FREQ_HOMALT AFR_MALE_AN AFR_MALE_AC AFR_MALE_AF AFR_MALE_N_BI_GENOS AFR_MALE_N_HOMREF AFR_MALE_N_HET AFR_MALE_N_HOMALT AFR_MALE_FREQ_HOMREF AFR_MALE_FREQ_HET AFR_MALE_FREQ_HOMALT AFR_MALE_N_HEMIREF AFR_MALE_N_HEMIALT AFR_MALE_FREQ_HEMIREF AFR_MALE_FREQ_HEMIALT AFR_FEMALE_AN AFR_FEMALE_AC AFR_FEMALE_AF AFR_FEMALE_N_BI_GENOS AFR_FEMALE_N_HOMREF AFR_FEMALE_N_HET AFR_FEMALE_N_HOMALT AFR_FEMALE_FREQ_HOMREF AFR_FEMALE_FREQ_HET AFR_FEMALE_FREQ_HOMALT AMR_AN AMR_AC AMR_AF AMR_N_BI_GENOS AMR_N_HOMREF AMR_N_HET AMR_N_HOMALT AMR_FREQ_HOMREF AMR_FREQ_HET AMR_FREQ_HOMALT AMR_MALE_AN AMR_MALE_AC AMR_MALE_AF AMR_MALE_N_BI_GENOS AMR_MALE_N_HOMREF AMR_MALE_N_HET AMR_MALE_N_HOMALT AMR_MALE_FREQ_HOMREF AMR_MALE_FREQ_HET AMR_MALE_FREQ_HOMALT AMR_MALE_N_HEMIREF AMR_MALE_N_HEMIALT AMR_MALE_FREQ_HEMIREF AMR_MALE_FREQ_HEMIALT AMR_FEMALE_AN AMR_FEMALE_AC AMR_FEMALE_AF AMR_FEMALE_N_BI_GENOS AMR_FEMALE_N_HOMREF AMR_FEMALE_N_HET AMR_FEMALE_N_HOMALT AMR_FEMALE_FREQ_HOMREF AMR_FEMALE_FREQ_HET AMR_FEMALE_FREQ_HOMALT EAS_AN EAS_AC EAS_AF EAS_N_BI_GENOS EAS_N_HOMREF EAS_N_HET EAS_N_HOMALT EAS_FREQ_HOMREF EAS_FREQ_HET EAS_FREQ_HOMALT EAS_MALE_AN EAS_MALE_AC EAS_MALE_AF EAS_MALE_N_BI_GENOS EAS_MALE_N_HOMREF EAS_MALE_N_HET EAS_MALE_N_HOMALT EAS_MALE_FREQ_HOMREF EAS_MALE_FREQ_HET EAS_MALE_FREQ_HOMALT EAS_MALE_N_HEMIREF EAS_MALE_N_HEMIALT EAS_MALE_FREQ_HEMIREF EAS_MALE_FREQ_HEMIALT EAS_FEMALE_AN EAS_FEMALE_AC EAS_FEMALE_AF EAS_FEMALE_N_BI_GENOS EAS_FEMALE_N_HOMREF EAS_FEMALE_N_HET EAS_FEMALE_N_HOMALT EAS_FEMALE_FREQ_HOMREF EAS_FEMALE_FREQ_HET EAS_FEMALE_FREQ_HOMALT EUR_AN EUR_AC EUR_AF EUR_N_BI_GENOS EUR_N_HOMREF EUR_N_HET EUR_N_HOMALT EUR_FREQ_HOMREF EUR_FREQ_HET EUR_FREQ_HOMALT EUR_MALE_AN EUR_MALE_AC EUR_MALE_AF EUR_MALE_N_BI_GENOS EUR_MALE_N_HOMREF EUR_MALE_N_HET EUR_MALE_N_HOMALT EUR_MALE_FREQ_HOMREF EUR_MALE_FREQ_HET EUR_MALE_FREQ_HOMALT EUR_MALE_N_HEMIREF EUR_MALE_N_HEMIALT EUR_MALE_FREQ_HEMIREF EUR_MALE_FREQ_HEMIALT EUR_FEMALE_AN EUR_FEMALE_AC EUR_FEMALE_AF EUR_FEMALE_N_BI_GENOS EUR_FEMALE_N_HOMREF EUR_FEMALE_N_HET EUR_FEMALE_N_HOMALT EUR_FEMALE_FREQ_HOMREF EUR_FEMALE_FREQ_HET EUR_FEMALE_FREQ_HOMALT OTH_AN OTH_AC OTH_AF OTH_N_BI_GENOS OTH_N_HOMREF OTH_N_HET OTH_N_HOMALT OTH_FREQ_HOMREF OTH_FREQ_HET OTH_FREQ_HOMALT OTH_MALE_AN OTH_MALE_AC OTH_MALE_AF OTH_MALE_N_BI_GENOS OTH_MALE_N_HOMREF OTH_MALE_N_HET OTH_MALE_N_HOMALT OTH_MALE_FREQ_HOMREF OTH_MALE_FREQ_HET OTH_MALE_FREQ_HOMALT OTH_MALE_N_HEMIREF OTH_MALE_N_HEMIALT OTH_MALE_FREQ_HEMIREF OTH_MALE_FREQ_HEMIALT OTH_FEMALE_AN OTH_FEMALE_AC OTH_FEMALE_AF OTH_FEMALE_N_BI_GENOS OTH_FEMALE_N_HOMREF OTH_FEMALE_N_HET OTH_FEMALE_N_HOMALT OTH_FEMALE_FREQ_HOMREF OTH_FEMALE_FREQ_HET OTH_FEMALE_FREQ_HOMALT FILTER\n1 10641 10642 gnomAD-SV_v2.1_BND_1_1 BND manta False 15 NA NA 10643 10643 PE,SR False False True 10642 NA NA NA False NA NA NA NA NA NA NA NA NA -1 BND SINGLE_ENDER_-- False False 21366 145 0.006785999983549118 10683 10543 135 5 0.9868950247764587 0.012636899948120117 0.00046803298755548894 10866 69 0.00634999992325902 5433 5366 65 2 0.987667977809906 0.011963900178670883 0.000368120992789045 NA NA NA NA False 10454 76 0.007269999943673615227 5154 70 3 0.9860339760780334 0.013392000459134579 0.0005739430198445916 0.015956999734044075 93972 0.007660999894142151 4699 4629 68 2 0.9851030111312866 0.014471200294792652 0.0004256220126990229 5154 33 0.006403000093996525 2577 2544 33 0 0.9871940016746521 0.012805599719285965 0.0NA NA NA NA 4232 39 0.009216000325977802 2116 2079 35 2 0.9825140237808228 0.01654059998691082 0.0009451800142414868 1910 7 0.003664999967440963 955 949 5 1 0.9937170147895813 0.00523559981957078 0.001047119963914156 950 4 0.004211000166833401 475 472 2 1 0.9936839938163757 0.00421052984893322 0.0021052600350230932 NA NA NA NA 952 3 0.0031510000117123127 476473 3 0 0.9936969876289368 0.006302520167082548 0.0 2296 31 0.013501999899744987 1148 11131 0 0.9729970097541809 0.02700350061058998 0.0 1312 13 0.009909000247716904 656 643 13 0.9801830053329468 0.01981710083782673 0.0 NA NA NA NA 976 18 0.018442999571561813 488470 18 0 0.9631149768829346 0.03688519820570946 0.0 7574 32 0.004224999807775021 3787 37528 2 0.9920780062675476 0.007393720094114542 0.0005281229969114065 3374 17 0.005038999952375889 1681671 15 1 0.9905160069465637 0.008891520090401173 0.000592768017668277 NA NA NA NA 41815 0.003587000072002411 2091 2077 13 1 0.9933050274848938 0.006217120215296745 0.00047823999193497188 3 0.015956999734044075 94 91 3 0 0.968084990978241 0.03191490098834038 0.0 76 0.026316000148653984 38 36 2 0 0.9473680257797241 0.05263160169124603 0.0 NA NA NA NA 112 1 0.008929000236093998 56 55 1 0 0.982142984867096 0.017857100814580917 0.0UNRESOLVED \n")),(0,l.kt)("h4",{id:"tsv-example"},"TSV Example"),(0,l.kt)("p",null,"The tsv was obtained from lifted over dataset created by dbVar for GRCh38"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"#variant_call_accession variant_call_id variant_call_type experiment_id sample_id sampleset_id assembly chrcontig outer_start start inner_start inner_stop stop outer_stop insertion_length variant_region_acc variant_region_id copy_number description validation zygosity origin phenotype hgvs_name placement_method placement_rank placements_per_assembly remap_alignment remap_best_within_cluster remap_coverage remap_diff_chr remap_failure_code allele_count allele_frequency allele_number\nnssv15777856 gnomAD-SV_v2.1_CNV_10_564_alt_1 copy number variation 1 1 GRCh38.p12 10 736806 738184 nsv4039284 10__782746___784124______GRCh37.p13_copy_number_variation 0 Remapped BestAvailable Single First Pass 0 1 AC=21,AFR_AC=10,AMR_AC=9,EAS_AC=0,EUR_AC=2,OTH_AC=0AF=0.038889,AFR_AF=0.044643,AMR_AF=0.03913,EAS_AF=0,EUR_AF=0.023256,OTH_AF=0 AN=540,AFR_AN=224,AMR_AN=230,EAS_AN=0,EUR_AN=86,OTH_AN=0\n")),(0,l.kt)("h4",{id:"structural-variant-type-mapping"},"Structural Variant Type Mapping"),(0,l.kt)("p",null,"The source files represented the structural variants with keys using various naming conventions.\nIn the Illumina Connected Annotations JSON output, these keys will be mapped according to the following. "),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Illumina Connected Annotations JSON SV Type Key"),(0,l.kt)("th",{parentName:"tr",align:null},"GRCh37 Source SV Type Key"),(0,l.kt)("th",{parentName:"tr",align:null},"GRCh38 Source SV Type Key"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"copy_number_variation"),(0,l.kt)("td",{parentName:"tr",align:null}),(0,l.kt)("td",{parentName:"tr",align:null},"copy number variation")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"deletion"),(0,l.kt)("td",{parentName:"tr",align:null},"DEL, CN=0"),(0,l.kt)("td",{parentName:"tr",align:null},"deletion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"duplication"),(0,l.kt)("td",{parentName:"tr",align:null},"DUP"),(0,l.kt)("td",{parentName:"tr",align:null},"duplication")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"insertion"),(0,l.kt)("td",{parentName:"tr",align:null},"INS"),(0,l.kt)("td",{parentName:"tr",align:null},"insertion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"inversion"),(0,l.kt)("td",{parentName:"tr",align:null},"INV"),(0,l.kt)("td",{parentName:"tr",align:null},"inversion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,l.kt)("td",{parentName:"tr",align:null},"INS:ME"),(0,l.kt)("td",{parentName:"tr",align:null},"mobile element insertion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,l.kt)("td",{parentName:"tr",align:null},"INS:ME:ALU"),(0,l.kt)("td",{parentName:"tr",align:null},"alu insertion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,l.kt)("td",{parentName:"tr",align:null},"INS:ME:LINE1"),(0,l.kt)("td",{parentName:"tr",align:null},"line1 insertion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,l.kt)("td",{parentName:"tr",align:null},"INS:ME:SVA"),(0,l.kt)("td",{parentName:"tr",align:null},"sva insertion")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"structural alteration"),(0,l.kt)("td",{parentName:"tr",align:null}),(0,l.kt)("td",{parentName:"tr",align:null},"sequence alteration")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"complex_structural_alteration"),(0,l.kt)("td",{parentName:"tr",align:null},"CPX"),(0,l.kt)("td",{parentName:"tr",align:null})))))}u.isMDXComponent=!0},818:(t,e,n)=>{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>u,frontMatter:()=>r,metadata:()=>o,toc:()=>p});var a=n(7462),l=(n(7294),n(3905));const r={},i=void 0,o={unversionedId:"data-sources/gnomad-structural-variants-json",id:"data-sources/gnomad-structural-variants-json",title:"gnomad-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gnomad-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gnomad-structural-variants-json.md",tags:[],version:"current",frontMatter:{}},p=[],m={toc:p},s="wrapper";function u(t){let{components:e,...n}=t;return(0,l.kt)(s,(0,a.Z)({},m,n,{components:e,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD-preview": [\n {\n "chromosome": "1",\n "begin": 40001,\n "end": 47200,\n "variantId": "gnomAD-SV_v2.1_DUP_1_1",\n "variantType": "duplication",\n "failedFilter": true,\n "allAf": 0.068963,\n "afrAf": 0.135694,\n "amrAf": 0.022876,\n "easAf": 0.01101,\n "eurAf": 0.007846,\n "othAf": 0.017544,\n "femaleAf": 0.065288,\n "maleAf": 0.07255,\n "allAc": 943,\n "afrAc": 866,\n "amrAc": 21,\n "easAc": 17,\n "eurAc": 37,\n "othAc": 2,\n "femaleAc": 442,\n "maleAc": 499,\n "allAn": 13674,\n "afrAn": 6382,\n "amrAn": 918,\n "easAn": 1544,\n "eurAn": 4716,\n "othAn": 114,\n "femaleAn": 6770,\n "maleAn": 6878,\n "allHc": 91,\n "afrHc": 90,\n "amrHc": 1,\n "easHc": 0,\n "eurHc": 0,\n "othHc": 55,\n "femaleHc": 44,\n "maleHc": 47,\n "reciprocalOverlap": 0.01839,\n "annotationOverlap": 0.16667\n }\n]\n\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,l.kt)("td",{parentName:"tr",align:null},"string"),(0,l.kt)("td",{parentName:"tr",align:null},"chromosome number")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"begin"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"position interval start")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"end"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"position internal end")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"variantType"),(0,l.kt)("td",{parentName:"tr",align:null},"string"),(0,l.kt)("td",{parentName:"tr",align:null},"structural variant type")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"variantId"),(0,l.kt)("td",{parentName:"tr",align:null},"string"),(0,l.kt)("td",{parentName:"tr",align:null},"gnomAD ID")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for all other populations. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for all populations.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the African super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Ad Mixed American super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"eurAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the European super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for all other populations.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for male population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for female population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for all populations.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the African super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Ad Mixed American super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"eurAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the European super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for all other populations.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for female population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for male population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the African / African American population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Latino population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the East Asian population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"eurAc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the European super population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all other populations.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"integer"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,l.kt)("td",{parentName:"tr",align:null},"boolean"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"Reciprocal overlap. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,l.kt)("td",{parentName:"tr",align:null},"floating point"),(0,l.kt)("td",{parentName:"tr",align:null},"Reciprocal overlap. Range: 0 - 1.0")))),(0,l.kt)("p",null,(0,l.kt)("strong",{parentName:"p"},"Note:")," Following fields are not available in ",(0,l.kt)("em",{parentName:"p"},"GRCh38")," because the source file does not contain this information:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAf")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAf")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAn")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAn")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"eurAc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleHc")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"failedFilter")))))}u.isMDXComponent=!0},1106:(t,e,n)=>{n.r(e),n.d(e,{contentTitle:()=>s,default:()=>c,frontMatter:()=>m,metadata:()=>u,toc:()=>d});var a=n(7462),l=(n(7294),n(3905)),r=n(3827),i=n(4859),o=n(818),p=n(6335);const m={title:"gnomAD"},s=void 0,u={unversionedId:"data-sources/gnomad",id:"data-sources/gnomad",title:"gnomAD",description:"Overview",source:"@site/docs/data-sources/gnomad.mdx",sourceDirName:"data-sources",slug:"/data-sources/gnomad",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gnomad.mdx",tags:[],version:"current",frontMatter:{title:"gnomAD"},sidebar:"docs",previous:{title:"GME Variome",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gme"},next:{title:"Mitochondrial Heteroplasmy",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mito-heteroplasmy"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF extraction",id:"vcf-extraction",children:[],level:3},{value:"Computation",id:"computation",children:[],level:3},{value:"Merging genomes and exomes",id:"merging-genomes-and-exomes",children:[],level:3},{value:"Filters",id:"filters",children:[],level:3},{value:"VCF download instructions",id:"vcf-download-instructions",children:[],level:3},{value:"JSON output",id:"json-output",children:[],level:3},{value:"Building the supplementary files",id:"building-the-supplementary-files",children:[{value:"Source data files",id:"source-data-files",children:[],level:4}],level:3}],level:2},{value:"LoF Gene Metrics",id:"lof-gene-metrics",children:[{value:"Tab delimited file example",id:"tab-delimited-file-example",children:[],level:3},{value:"JSON key to TSV column mapping",id:"json-key-to-tsv-column-mapping",children:[],level:3},{value:"Gene symbol update",id:"gene-symbol-update",children:[],level:3},{value:"Conflict resolution",id:"conflict-resolution",children:[],level:3},{value:"Download URL",id:"download-url",children:[],level:3},{value:"JSON output",id:"json-output-1",children:[],level:3}],level:2},{value:"Structural Variants",id:"structural-variants",children:[{value:"Source Files",id:"source-files",children:[],level:3},{value:"Download URLs",id:"download-urls",children:[{value:"GRCh37",id:"grch37",children:[],level:4},{value:"GRCh38",id:"grch38",children:[],level:4},{value:"Download URL",id:"download-url-1",children:[],level:4}],level:3},{value:"JSON output",id:"json-output-2",children:[],level:3}],level:2}],N={toc:d},g="wrapper";function c(t){let{components:e,...n}=t;return(0,l.kt)(g,(0,a.Z)({},N,n,{components:e,mdxType:"MDXLayout"}),(0,l.kt)("h2",{id:"overview"},"Overview"),(0,l.kt)("p",null,"The Genome Aggregation Database (",(0,l.kt)("a",{parentName:"p",href:"https://gnomad.broadinstitute.org/"},"gnomAD"),") is a resource developed by an international coalition of investigators, with the goal of aggregating and harmonizing both exome and genome sequencing data from a wide variety of large-scale sequencing projects, and making summary data available for the wider scientific community."),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Koch, L., 2020. Exploring human genomic diversity with gnomAD. ",(0,l.kt)("em",{parentName:"p"},"Nature Reviews Genetics"),", ",(0,l.kt)("strong",{parentName:"p"},"21(8)"),", pp.448-448."))),(0,l.kt)("h2",{id:"small-variants"},"Small Variants"),(0,l.kt)("h3",{id:"vcf-extraction"},"VCF extraction"),(0,l.kt)("p",null,"We currently extract the following info fields from gnomAD genome and exome VCF files:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},'##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n')),(0,l.kt)("p",null,"We also extract the following extra fields from gnomAD exome VCF file:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},'##INFO=\n##INFO=\n##INFO=\n')),(0,l.kt)("h3",{id:"computation"},"Computation"),(0,l.kt)("p",null,"Using these, we compute the following:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"Coverage"),(0,l.kt)("li",{parentName:"ul"},"Allele count, Homozygous count, allele number and allele frequencies for:"),(0,l.kt)("li",{parentName:"ul"},"Global population"),(0,l.kt)("li",{parentName:"ul"},"African/African Americans"),(0,l.kt)("li",{parentName:"ul"},"Admixed Americans"),(0,l.kt)("li",{parentName:"ul"},"Ashkenazi Jews"),(0,l.kt)("li",{parentName:"ul"},"East Asians"),(0,l.kt)("li",{parentName:"ul"},"Finnish"),(0,l.kt)("li",{parentName:"ul"},"Non-Finnish Europeans"),(0,l.kt)("li",{parentName:"ul"},"South Asian"),(0,l.kt)("li",{parentName:"ul"},"Others (population not assigned)"),(0,l.kt)("li",{parentName:"ul"},"Male"),(0,l.kt)("li",{parentName:"ul"},"Female"),(0,l.kt)("li",{parentName:"ul"},"Controls")),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Note")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("ul",{parentName:"div"},(0,l.kt)("li",{parentName:"ul"},"Coverage = DP / AN. Frequencies are computed using AC/AN for each population."),(0,l.kt)("li",{parentName:"ul"},"Please note that currently there is no genome sequencing data of south asian (SAS) population available in gnomAD."),(0,l.kt)("li",{parentName:"ul"},"Allele Count, Homozygous count, allele number and allele frequencies for control groups are also provided for the global population.")))),(0,l.kt)("h3",{id:"merging-genomes-and-exomes"},"Merging genomes and exomes"),(0,l.kt)("p",null,"When merging the genomes and exomes, the allele counts and allele numbers will be summed across both of the data sets."),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("ul",{parentName:"div"},(0,l.kt)("li",{parentName:"ul"},"For GRCh37, Illumina Connected Annotations currently uses gnomAD version 2.1 which contains both genomes and exomes data. Genomes and exomes data are merged in the output."),(0,l.kt)("li",{parentName:"ul"},"For GRCh38, Illumina Connected Annotations currently uses gnomAD version 3.0 which doesn't contain the exomes data. Therefore, only genomes data are presented in the output.")))),(0,l.kt)("h3",{id:"filters"},"Filters"),(0,l.kt)("p",null,"The following strategy will be used when there's a conflict in filter status:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"center"}),(0,l.kt)("th",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"th"},"Genomes PASS")),(0,l.kt)("th",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"th"},"Genomes Filtered")))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"td"},"Exomes PASS")),(0,l.kt)("td",{parentName:"tr",align:"center"},"PASS"),(0,l.kt)("td",{parentName:"tr",align:"center"},"Only use exome data")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"td"},"Exomes Filtered")),(0,l.kt)("td",{parentName:"tr",align:"center"},"Only use genome data"),(0,l.kt)("td",{parentName:"tr",align:"center"},"Filtered")))),(0,l.kt)("h3",{id:"vcf-download-instructions"},"VCF download instructions"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://gnomad.broadinstitute.org/downloads"},"https://gnomad.broadinstitute.org/downloads")),(0,l.kt)("h3",{id:"json-output"},"JSON output"),(0,l.kt)(r.default,{mdxType:"JSONV"}),(0,l.kt)("h3",{id:"building-the-supplementary-files"},"Building the supplementary files"),(0,l.kt)("p",null,"The gnomAD ",(0,l.kt)("inlineCode",{parentName:"p"},".nsa")," for Illumina Connected Annotations can be built using the ",(0,l.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's ",(0,l.kt)("inlineCode",{parentName:"p"},"gnomad")," subcommand. We will describe building gnomAD version 3.1 here."),(0,l.kt)("h4",{id:"source-data-files"},"Source data files"),(0,l.kt)("p",null,"Input VCF files (one per chromosome) and a ",(0,l.kt)("inlineCode",{parentName:"p"},".version")," file are required in a folder to build the ",(0,l.kt)("inlineCode",{parentName:"p"},".nsa")," file. For example, my directory contains:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr10.vcf.bgz chr22.vcf.bgz\nchr11.vcf.bgz chr2.vcf.bgz\nchr12.vcf.bgz chr3.vcf.bgz\nchr13.vcf.bgz chr4.vcf.bgz\nchr14.vcf.bgz chr5.vcf.bgz\nchr15.vcf.bgz chr6.vcf.bgz\nchr16.vcf.bgz chr7.vcf.bgz\nchr17.vcf.bgz chr8.vcf.bgz\nchr18.vcf.bgz chr9.vcf.bgz\nchr19.vcf.bgz chrM.vcf.bgz\nchr1.vcf.bgz chrX.vcf.bgz\nchr20.vcf.bgz chrY.vcf.bgz\nchr21.vcf.bgz gnomad.r3.1.version\n")),(0,l.kt)("p",null,"The version file is a text file with the following content."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"NAME=gnomAD\nVERSION=3.1\nDATE=2020-10-29\nDESCRIPTION=Allele frequencies from Genome Aggregation Database (gnomAD)\n")),(0,l.kt)("p",null,"The help menu for the utility is as follows:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"SAUtils.dll gnomad\n---------------------------------------------------------------------------\nSAUtils (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.17.0\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll gnomad [options]\nReads provided supplementary data files and populates tsv files\n\nOPTIONS:\n --ref, -r compressed reference sequence file\n --genome, -g input directory containing VCF (and .version)\n files with genomic frequencies\n --exome, -e input directory containing VCF (and .version)\n files with exomic frequencies\n --temp, -t output temp directory for intermediate (per chrom)\n NSA files\n --out, -o output directory for NSA file\n --help, -h displays the help menu\n --version, -v displays the version\n")),(0,l.kt)("p",null,"Here is a sample execution:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet SAUtils.dll Gnomad \\\\\n--ref ~/References/7/Homo_sapiens.GRCh38.Nirvana.dat --genome genomes/ \\\\\n--out ~/SupplementaryDatabase/63/GRCh38 --temp ~/ExternalDataSources/gnomAD/3.1/GRCh38/temp\n")),(0,l.kt)("h2",{id:"lof-gene-metrics"},"LoF Gene Metrics"),(0,l.kt)("h3",{id:"tab-delimited-file-example"},"Tab delimited file example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"gene transcript obs_mis exp_mis oe_mis mu_mis possible_mis obs_mis_pphen exp_mis_pphen oe_mis_pphen possible_mis_pphen obs_syn exp_syn oe_syn mu_syn possible_syn obs_lof mu_lof possible_lof exp_lof pLI pNull pRec oe_lof oe_syn_lower oe_syn_upper oe_mis_lower oe_mis_upper oe_lof_lower oe_lof_upper constraint_flag syn_zmis_z lof_z oe_lof_upper_rank oe_lof_upper_bin oe_lof_upper_bin_6 n_sites classic_caf max_af no_lofs obs_het_lof obs_hom_lof defined p exp_hom_lof classic_caf_afr classic_caf_amr classic_caf_asj classic_caf_eas classic_caf_fin classic_caf_nfe classic_caf_oth classic_caf_sas p_afr p_amr p_asj p_eas p_fin p_nfep_oth p_sas transcript_type gene_id transcript_level cds_length num_coding_exons gene_type gene_length exac_pLI exac_obs_lof exac_exp_lof exac_oe_lof brain_expression chromosome start_positionend_position\nMED13 ENST00000397786 871 1.1178e+03 7.7921e-01 5.5598e-05 14195 314 5.2975e+02 5.9273e-01 6708 422 3.8753e+02 1.0890e+00 1.9097e-05 4248 0 4.9203e-06 1257 9.8429e+01 1.0000e+00 8.9436e-40 1.8383e-16 0.0000e+00 1.0050e+00 1.1800e+00 7.3600e-01 8.2400e-01 0.0000e+00 3.0000e-02 -1.3765e+00 2.6232e+00 9.1935e+00 0 0 0 2 1.2058e-05 8.0492e-06 124782 3 0 124785 1.2021e-05 1.8031e-05 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 9.2812e-05 8.8571e-06 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 9.2760e-05 8.8276e-06 0.0000e+00 0.0000e+00 protein_coding ENSG00000108510 2 6522 30 protein_coding 122678 1.0000e+00 0 6.4393e+01 0.0000e+00 NA 17 60019966 60142643\n")),(0,l.kt)("h3",{id:"json-key-to-tsv-column-mapping"},"JSON key to TSV column mapping"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"JSON key"),(0,l.kt)("th",{parentName:"tr",align:null},"TSV column"),(0,l.kt)("th",{parentName:"tr",align:null},"Description"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pLi"),(0,l.kt)("td",{parentName:"tr",align:null},"pLI"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pNull"),(0,l.kt)("td",{parentName:"tr",align:null},"pNull"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pRec"),(0,l.kt)("td",{parentName:"tr",align:null},"pRec"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"synZ"),(0,l.kt)("td",{parentName:"tr",align:null},"syn_z"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"misZ"),(0,l.kt)("td",{parentName:"tr",align:null},"mis_z"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,l.kt)("td",{parentName:"tr",align:null},"oe_lof_upper"),(0,l.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))),(0,l.kt)("h3",{id:"gene-symbol-update"},"Gene symbol update"),(0,l.kt)("p",null,"The input file provides Ensembl gene ids for each entry. We observed that they were unique while gene symbols may be repeated (multiple lines may have the same gene symbol). Since Ensembl gene Ids are more stable, and Illumina Connected Annotations transcript cache data contains Ensembl gene ids, we use these ids to extract the gene symbols from the transcript cache. For example, if ENSG0001 has gene symbol GENE1 in the input but Illumina Connected Annotations cache say ENSG0001 maps to GENE2, we use GENE2 as the gene symbol for that entry."),(0,l.kt)("h3",{id:"conflict-resolution"},"Conflict resolution"),(0,l.kt)("p",null,"gnomAD uses Ensembl GeneID as unique identifiers in the ",(0,l.kt)("a",{parentName:"p",href:"https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz"},"source file")," but Illumina Connected Annotations uses HGNC gene symbols. Multiple Ensembl GeneIDs can map to the same HGNC symbol and therefore may result is conflict."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"MDGA2 ENST00000426342 306 4.0043e+02 7.6419e-01 2.1096e-05 4724 78 1.6525e+02 4.7202e-01 1923 125 1.3737e+02 9.0993e-01 7.1973e-06 1413 4 2.0926e-06 453 3.8316e+01 9.9922e-01 8.6490e-12 7.8128e-04 1.0440e-01 7.8600e-01 1.0560e+00 6.9500e-01 8.4000e-01 5.0000e-02 2.3900e-01 8.2988e-01 1.6769e+00 5.1372e+00 1529 0 0 7 2.8103e-05 4.0317e-06 124784 7 0 124791 2.8047e-05 9.8167e-05 0.0000e+00 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 3.5391e-05 1.6672e-04 3.2680e-05 0.0000e+00 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 3.5308e-05 1.6492e-04 3.2678e-05 protein_coding ENSG00000139915 2 2181 13 protein_coding 835332 9.9322e-01 3 2.7833e+01 1.0779e-01 NA 14 47308826 48144157\nMDGA2 ENST00000439988 438 5.5311e+02 7.9189e-01 2.9490e-05 6608 105 2.0496e+02 5.1228e-01 2386 180 1.9491e+02 9.2351e-01 9.8371e-06 2048 11 2.8074e-06 627 5.1882e+01 6.6457e-01 5.5841e-10 3.3543e-01 2.1202e-01 8.1700e-01 1.0450e+00 7.3100e-01 8.5700e-01 1.3200e-01 3.5100e-01 8.3940e-01 1.7393e+00 5.2595e+00 2989 1 0 9 3.6173e-05 4.0463e-06 124782 9 0 124791 3.6061e-05 1.6228e-04 6.4986e-05 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 4.4275e-05 1.6672e-04 3.2680e-05 6.4577e-05 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 4.4135e-05 1.6492e-04 3.2678e-05 protein_coding ENSG00000272781 3 3075 17 protein_coding 832866 NA NA NA NA NA 14 47311134 48143999\n")),(0,l.kt)("p",null,'In such cases, Illumina Connected Annotations chooses the entry with the smallest "LOEUF" value. The reason for choosing this value can be highlighted by the following table:'),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"right"},"LOEUF decile"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Haplo-insufficient"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Autosomal Dominant"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Autosomal Recessive"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Olfactory Genes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"0-10%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"104"),(0,l.kt)("td",{parentName:"tr",align:"right"},"140"),(0,l.kt)("td",{parentName:"tr",align:"right"},"36"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"10-20%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"47"),(0,l.kt)("td",{parentName:"tr",align:"right"},"128"),(0,l.kt)("td",{parentName:"tr",align:"right"},"72"),(0,l.kt)("td",{parentName:"tr",align:"right"},"1")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"20-30%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"17"),(0,l.kt)("td",{parentName:"tr",align:"right"},"86"),(0,l.kt)("td",{parentName:"tr",align:"right"},"112"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"30-40%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"8"),(0,l.kt)("td",{parentName:"tr",align:"right"},"80"),(0,l.kt)("td",{parentName:"tr",align:"right"},"173"),(0,l.kt)("td",{parentName:"tr",align:"right"},"4")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"40-50%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"7"),(0,l.kt)("td",{parentName:"tr",align:"right"},"65"),(0,l.kt)("td",{parentName:"tr",align:"right"},"206"),(0,l.kt)("td",{parentName:"tr",align:"right"},"8")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"50-60%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"4"),(0,l.kt)("td",{parentName:"tr",align:"right"},"54"),(0,l.kt)("td",{parentName:"tr",align:"right"},"207"),(0,l.kt)("td",{parentName:"tr",align:"right"},"6")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"60-70%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0"),(0,l.kt)("td",{parentName:"tr",align:"right"},"46"),(0,l.kt)("td",{parentName:"tr",align:"right"},"154"),(0,l.kt)("td",{parentName:"tr",align:"right"},"18")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"70-80%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"2"),(0,l.kt)("td",{parentName:"tr",align:"right"},"49"),(0,l.kt)("td",{parentName:"tr",align:"right"},"120"),(0,l.kt)("td",{parentName:"tr",align:"right"},"49")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"80-90%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0"),(0,l.kt)("td",{parentName:"tr",align:"right"},"34"),(0,l.kt)("td",{parentName:"tr",align:"right"},"58"),(0,l.kt)("td",{parentName:"tr",align:"right"},"96")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"90-100%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0"),(0,l.kt)("td",{parentName:"tr",align:"right"},"26"),(0,l.kt)("td",{parentName:"tr",align:"right"},"40"),(0,l.kt)("td",{parentName:"tr",align:"right"},"174")))),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Note")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("ul",{parentName:"div"},(0,l.kt)("li",{parentName:"ul"},"Table source: ",(0,l.kt)("a",{parentName:"li",href:"https://www.biorxiv.org/content/biorxiv/early/2019/01/28/531210.full-text.pdf"},"https://www.biorxiv.org/content/biorxiv/early/2019/01/28/531210.full-text.pdf")),(0,l.kt)("li",{parentName:"ul"},"This table indicates that lower LOEUF scores have more deleterious effect on genes."),(0,l.kt)("li",{parentName:"ul"},"Only 15 out of 19685 genes have conflicting entries.")))),(0,l.kt)("p",null,(0,l.kt)("strong",{parentName:"p"},"List of genes with conflicting entries")),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},'MDGA2:\n {"pLI":9.99e-1,"pRec":7.81e-4,"pNull":8.65e-12,"synZ":8.30e-1,"misZ":1.68e0,"loeuf":2.39e-1}\n {"pLI":6.65e-1,"pRec":3.35e-1,"pNull":5.58e-10,"synZ":8.39e-1,"misZ":1.74e0,"loeuf":3.51e-1}\nCRYBG3:\n {"pLI":9.27e-5,"pRec":1.00e0,"pNull":1.88e-7,"synZ":1.82e0,"misZ":4.68e-1,"loeuf":4.93e-1}\n {"pLI":2.69e-4,"pRec":1.00e0,"pNull":1.20e-4,"synZ":2.63e0,"misZ":9.80e-1,"loeuf":5.98e-1}\nCHTF8:\n {"pLI":8.29e-1,"pRec":1.67e-1,"pNull":3.21e-3,"synZ":1.94e0,"misZ":9.48e-1,"loeuf":5.13e-1}\n {"pLI":3.73e-1,"pRec":5.84e-1,"pNull":4.29e-2,"synZ":3.33e-1,"misZ":2.91e-1,"loeuf":9.92e-1}\nSEPT1:\n {"pLI":6.77e-8,"pRec":8.90e-1,"pNull":1.10e-1,"synZ":1.58e-1,"misZ":1.57e0,"loeuf":9.68e-1}\n {"pLI":1.96e-8,"pRec":6.71e-1,"pNull":3.29e-1,"synZ":1.68e-1,"misZ":1.41e0,"loeuf":1.08e0}\nARL14EPL:\n {"pLI":3.48e-2,"pRec":8.38e-1,"pNull":1.28e-1,"synZ":3.56e-1,"misZ":-1.87e-1,"loeuf":1.23e0}\n {"pLI":3.23e-2,"pRec":8.29e-1,"pNull":1.38e-1,"synZ":1.15e0,"misZ":-4.05e-1,"loeuf":1.26e0}\nUGT2A1:\n {"pLI":2.90e-13,"pRec":1.40e-1,"pNull":8.60e-1,"synZ":-1.29e0,"misZ":-1.77e0,"loeuf":1.18e0}\n {"pLI":3.88e-17,"pRec":2.87e-3,"pNull":9.97e-1,"synZ":-8.00e-1,"misZ":-1.40e0,"loeuf":1.53e0}\nLTB4R2:\n {"pLI":4.39e-4,"pRec":6.71e-1,"pNull":3.29e-1,"synZ":-5.24e-1,"misZ":-2.96e-1,"loeuf":1.40e0}\n {"pLI":1.38e-5,"pRec":4.12e-1,"pNull":5.88e-1,"synZ":-4.58e-1,"misZ":-2.02e-1,"loeuf":1.54e0}\nCDRT1:\n {"pLI":4.98e-14,"pRec":5.31e-1,"pNull":4.69e-1,"synZ":8.18e-1,"misZ":6.57e-1,"loeuf":1.00e0}\n {"pLI":3.50e-3,"pRec":6.37e-1,"pNull":3.59e-1,"synZ":4.89e-1,"misZ":6.90e-1,"loeuf":1.63e0}\nMUC3A:\n {"pLI":1.48e-10,"pRec":5.76e-1,"pNull":4.24e-1,"synZ":5.81e-2,"misZ":-6.01e-1,"loeuf":1.06e0}\n {"pLI":4.03e-1,"pRec":4.79e-1,"pNull":1.17e-1,"synZ":4.05e-2,"misZ":-1.60e-1,"loeuf":1.70e0}\nCOG8:\n {"pLI":2.97e-9,"pRec":5.04e-1,"pNull":4.96e-1,"synZ":-1.35e0,"misZ":-9.37e-2,"loeuf":1.13e0}\n {"pLI":2.31e-3,"pRec":5.47e-1,"pNull":4.50e-1,"synZ":-4.94e-1,"misZ":-1.48e-1,"loeuf":1.76e0}\nAC006486.1:\n {"pLI":9.37e-1,"pRec":6.27e-2,"pNull":2.47e-4,"synZ":1.44e0,"misZ":2.12e0,"loeuf":3.41e-1}\n {"pLI":1.14e-1,"pRec":6.16e-1,"pNull":2.70e-1,"synZ":-7.57e-2,"misZ":8.33e-2,"loeuf":1.84e0}\nAL645922.1:\n {"pLI":4.67e-16,"pRec":1.00e0,"pNull":4.15e-5,"synZ":7.99e-1,"misZ":1.61e0,"loeuf":6.92e-1}\n {"pLI":1.60e-3,"pRec":2.78e-1,"pNull":7.21e-1,"synZ":-7.30e-2,"misZ":3.21e-1,"loeuf":1.96e0}\nNBPF20:\n {"pLI":1.42e-7,"pRec":3.40e-2,"pNull":9.66e-1,"synZ":-1.86e0,"misZ":-2.88e0,"loeuf":1.97e0}\n {"pLI":1.92e-22,"pRec":7.96e-6,"pNull":1.00e0,"synZ":-9.73e0,"misZ":-7.67e0,"loeuf":1.97e0}\nPRAMEF11:\n {"pLI":6.16e-4,"pRec":7.42e-1,"pNull":2.58e-1,"synZ":-4.02e0,"misZ":-3.69e0,"loeuf":1.31e0}\n {"synZ":-3.33e0,"misZ":-2.59e0}\nFAM231D:\n {"synZ":-1.98e0,"misZ":-1.44e0}\n {"synZ":1.07e0,"misZ":3.13e-1}\n')),(0,l.kt)("p",null,(0,l.kt)("strong",{parentName:"p"},"Conflict resolution")),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"Pick the entry with the lowest LOEUF score"),(0,l.kt)("li",{parentName:"ul"},"If the same, pick the lowest pLI"),(0,l.kt)("li",{parentName:"ul"},"Otherwise pick the entry with the max absolute value of synZ + misZ")),(0,l.kt)("h3",{id:"download-url"},"Download URL"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz"},"https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz")),(0,l.kt)("h3",{id:"json-output-1"},"JSON output"),(0,l.kt)(i.default,{mdxType:"JSONG"}),(0,l.kt)("h2",{id:"structural-variants"},"Structural Variants"),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Collins, R.L., Brand, H., Karczewski, K.J. et al. 2020. A structural variation reference for medical and population genetics. ",(0,l.kt)("em",{parentName:"p"},"Nature")," ",(0,l.kt)("strong",{parentName:"p"},"581"),", pp.444\u2013451. ",(0,l.kt)("a",{parentName:"p",href:"https://doi.org/10.1038/s41586-020-2287-8"},"https://doi.org/10.1038/s41586-020-2287-8")))),(0,l.kt)("p",null,(0,l.kt)("strong",{parentName:"p"},"Note"),"\nThe gnomAD structural variant annotations are in a preview stage at the moment.\nCurrently, the annotations do not include translocation breakends.\nFuture updates will include a better way of annotating the structural variants."),(0,l.kt)("h3",{id:"source-files"},"Source Files"),(0,l.kt)(p.default,{mdxType:"SVDATADESCRIPTION"}),(0,l.kt)("h3",{id:"download-urls"},"Download URLs"),(0,l.kt)("h4",{id:"grch37"},"GRCh37"),(0,l.kt)("p",null,"The GRCh37 file was downloaded from the original source. Following table gives some essential data metrics:"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://storage.googleapis.com/gcp-public-data--gnomad/papers/2019-sv/gnomad_v2.1_sv.sites.bed.gz"},"https://storage.googleapis.com/gcp-public-data--gnomad/papers/2019-sv/gnomad_v2.1_sv.sites.bed.gz")),(0,l.kt)("h4",{id:"grch38"},"GRCh38"),(0,l.kt)("p",null,"Note: The data was unavailable from gnomAD 2.1 original source, however the lifted over structural variant dataset was created by dbVar and was obtained from them ",(0,l.kt)("a",{parentName:"p",href:"https://www.ncbi.nlm.nih.gov/sites/dbvarapp/studies/nstd166/"},"https://www.ncbi.nlm.nih.gov/sites/dbvarapp/studies/nstd166/"),"."),(0,l.kt)("h4",{id:"download-url-1"},"Download URL"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/tsv/nstd166.GRCh38.variant_call.tsv.gz"},"https://ftp.ncbi.nlm.nih.gov/pub/dbVar/data/Homo_sapiens/by_study/tsv/nstd166.GRCh38.variant_call.tsv.gz")),(0,l.kt)("h3",{id:"json-output-2"},"JSON output"),(0,l.kt)(o.default,{mdxType:"JSONSV"}))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/996b3ed9.d6b8ddba.js b/assets/js/996b3ed9.d6b8ddba.js deleted file mode 100644 index 6cb37857..00000000 --- a/assets/js/996b3ed9.d6b8ddba.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4006,3460],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>u});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function r(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),m=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):r(r({},t),e)),n},p=function(e){var t=m(e.components);return a.createElement(s.Provider,{value:t},e.children)},d="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},h=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,o=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),d=m(n),h=i,u=d["".concat(s,".").concat(h)]||d[h]||c[h]||o;return n?a.createElement(u,r(r({ref:t},p),{},{components:n})):a.createElement(u,r({ref:t},p))}));function u(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var o=n.length,r=new Array(o);r[0]=h;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[d]="string"==typeof e?e:i,r[1]=l;for(var m=2;m{n.r(t),n.d(t,{contentTitle:()=>r,default:()=>d,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const o={},r=void 0,l={unversionedId:"data-sources/omim-json",id:"version-3.17/data-sources/omim-json",title:"omim-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/omim-json.md",sourceDirName:"data-sources",slug:"/data-sources/omim-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/omim-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/omim-json.md",tags:[],version:"3.17",frontMatter:{}},s=[{value:"Phenotype",id:"phenotype",children:[],level:4},{value:"Mapping",id:"mapping",children:[],level:4},{value:"Inheritance",id:"inheritance",children:[],level:4},{value:"Comments",id:"comments",children:[],level:4}],m={toc:s},p="wrapper";function d(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"omim":[ \n { \n "mimNumber":600678,\n "geneName":"MutS, E. coli, homolog of, 6",\n "description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",\n "phenotypes":[ \n { \n "mimNumber":614350,\n "phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",\n "description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal dominant"\n ]\n },\n { \n "mimNumber":608089,\n "phenotype":"Endometrial cancer, familial",\n "mapping":"molecular basis of the disorder is known"\n },\n { \n "mimNumber":276300,\n "phenotype":"Mismatch repair cancer syndrome",\n "description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal recessive"\n ],\n "comments" : [\n "contribute to susceptibility to multifactorial disorders or to susceptibility to infection",\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"},"OMIM ID for gene")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"geneName"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"gene name")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,i.kt)("td",{parentName:"tr",align:"left"},"object array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#phenotype"},"Phenotype entry below"))))),(0,i.kt)("h4",{id:"phenotype"},"Phenotype"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"mapping"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#mapping"},"possible values below"))),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"inheritance"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#inheritance"},"possible values below"))),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"comments"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#comments"},"possible values below"))))),(0,i.kt)("h4",{id:"mapping"},"Mapping"),(0,i.kt)("ol",null,(0,i.kt)("li",{parentName:"ol"},"disorder was positioned by mapping of the wild type gene"),(0,i.kt)("li",{parentName:"ol"},"disease phenotype itself was mapped"),(0,i.kt)("li",{parentName:"ol"},"molecular basis of the disorder is known"),(0,i.kt)("li",{parentName:"ol"},"disorder is a chromosome deletion or duplication syndrome")),(0,i.kt)("h4",{id:"inheritance"},"Inheritance"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"autosomal recessive"),(0,i.kt)("li",{parentName:"ul"},"autosomal dominant")),(0,i.kt)("h4",{id:"comments"},"Comments"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"contributes to the susceptibility to multifactorial disorders"),(0,i.kt)("li",{parentName:"ul"},"variations that lead to apparently abnormal laboratory test values"),(0,i.kt)("li",{parentName:"ul"},"unconfirmed mapping")))}d.isMDXComponent=!0},52380:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>c,frontMatter:()=>r,metadata:()=>s,toc:()=>m});var a=n(87462),i=(n(67294),n(3905)),o=n(55074);const r={title:"OMIM"},l=void 0,s={unversionedId:"data-sources/omim",id:"version-3.17/data-sources/omim",title:"OMIM",description:"Overview",source:"@site/versioned_docs/version-3.17/data-sources/omim.mdx",sourceDirName:"data-sources",slug:"/data-sources/omim",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/omim",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/omim.mdx",tags:[],version:"3.17",frontMatter:{title:"OMIM"},sidebar:"version-3.17/docs",previous:{title:"MITOMAP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/mitomap"},next:{title:"PhyloP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/phylop"}},m=[{value:"Overview",id:"overview",children:[],level:2},{value:"Parse OMIM data",id:"parse-omim-data",children:[{value:"mim2gene.txt",id:"mim2genetxt",children:[],level:3},{value:"OMIM API",id:"omim-api",children:[{value:"Mapping key to content",id:"mapping-key-to-content",children:[],level:4},{value:"Phenotype character to comment",id:"phenotype-character-to-comment",children:[],level:4}],level:3},{value:"Remove links in OMIM descriptions",id:"remove-links-in-omim-descriptions",children:[],level:3}],level:2},{value:"JSON output",id:"json-output",children:[],level:2},{value:"Building the supplementary files",id:"building-the-supplementary-files",children:[],level:2}],p={toc:m},d="wrapper";function c(e){let{components:t,...n}=e;return(0,i.kt)(d,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"OMIM is a comprehensive, authoritative compendium of human genes and genetic phenotypes that is freely available and updated daily."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publications")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Amberger JS, Bocchini CA, Scott AF, Hamosh A. OMIM.org: leveraging knowledge across phenotype-gene relationships. Nucleic Acids Res. 2019 Jan 8;47(D1):D1038-D1043. doi:10.1093/nar/gky1151. PMID: ",(0,i.kt)("a",{parentName:"p",href:"https://pubmed.ncbi.nlm.nih.gov/30445645/"},"30445645"),"."),(0,i.kt)("p",{parentName:"div"},"Amberger JS, Bocchini CA, Schiettecatte FJM, Scott AF, Hamosh A. OMIM.org: Online Mendelian Inheritance in Man (OMIM\xae), an online catalog of human genes and genetic disorders. Nucleic Acids Res. 2015 Jan;43(Database issue):D789-98. PMID: ",(0,i.kt)("a",{parentName:"p",href:"https://pubmed.ncbi.nlm.nih.gov/25428349/"},"25428349"),"."))),(0,i.kt)("h2",{id:"parse-omim-data"},"Parse OMIM data"),(0,i.kt)("p",null,"Nirvana uses gene symbols as the gene identifiers internally. To generate the OMIM database, we first map the MIM numbers, which are the primary identifiers used by OMIM, to gene symbols supported by Nirvana. Please note that there can be multiple MIM numbers mapped to one gene symbol. Only MIM numbers successfully mapped to a Nirvana gene symbol are further processed. The OMIM API is used to fetch all the information associated with a gene MIM number, except the gene symbols."),(0,i.kt)("h3",{id:"mim2genetxt"},"mim2gene.txt"),(0,i.kt)("p",null,"This mim2gene.txt (",(0,i.kt)("a",{parentName:"p",href:"http://omim.org/static/omim/data/mim2gene.txt"},"http://omim.org/static/omim/data/mim2gene.txt"),") file provides the mapping between MIM numbers and gene symbols. An example of this file is given below:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre"},"# MIM Number MIM Entry Type (see FAQ 1.3 at https://omim.org/help/faq) Entrez Gene ID (NCBI) Approved Gene Symbol (HGNC) Ensembl Gene ID (Ensembl)\n100050 predominantly phenotypes\n100070 phenotype 100329167\n100100 phenotype\n100200 predominantly phenotypes\n100300 phenotype\n100500 moved/removed\n100600 phenotype\n100640 gene 216 ALDH1A1 ENSG00000165092\n100650 gene/phenotype 217 ALDH2 ENSG00000111275\n100660 gene 218 ALDH3A1 ENSG00000108602\n100670 gene 219 ALDH1B1 ENSG00000137124\n100675 predominantly phenotypes\n100678 gene 39 ACAT2 ENSG00000120437\n")),(0,i.kt)("p",null,'The information in the "Entrez Gene ID (NCBI)", "Approved Gene Symbol (HGNC)" and "Ensembl Gene ID (Ensembl)" columns are used to find the proper gene symbol supported by Nirvana, which may or may not be the same as the gene symbol listed here.'),(0,i.kt)("h3",{id:"omim-api"},"OMIM API"),(0,i.kt)("p",null,"Nirvana retrieves the OMIM annotations from the ",(0,i.kt)("a",{parentName:"p",href:"https://www.omim.org/api"},"OMIM API"),' JSON responses. The "entry" handler is used to fetch all the annotations associated with a given OMIM gene. A sample JSON response from the API is provided there.'),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{\n "omim": {\n "version": "1.0",\n "entryList": [\n {\n "entry": {\n "prefix": "*",\n "mimNumber": 100640,\n "status": "live",\n "titles": {\n "preferredTitle": "ALDEHYDE DEHYDROGENASE 1 FAMILY, MEMBER A1; ALDH1A1",\n "alternativeTitles": "ALDEHYDE DEHYDROGENASE 1; ALDH1;;\\nACETALDEHYDE DEHYDROGENASE 1;;\\nALDH, LIVER CYTOSOLIC;;\\nRETINAL DEHYDROGENASE 1; RALDH1"\n },\n "textSectionList": [\n {\n "textSection": {\n "textSectionName": "description",\n "textSectionTitle": "Description",\n "textSectionContent": "The ALDH1A1 gene encodes a liver cytosolic isoform of acetaldehyde dehydrogenase ({EC 1.2.1.3}), an enzyme involved in the major pathway of alcohol metabolism after alcohol dehydrogenase (ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650}), variation in which has been implicated in different responses to alcohol ingestion.\\n\\nALDH1 is associated with a low Km for NAD, a high Km for acetaldehyde, and is strongly inactivated by disulfiram. ALDH2 is associated with a high Km for NAD, and low Km for acetaldehyde, and is insensitive to inhibition by disulfiram ({4:Hsu et al., 1985})."\n }\n }\n ],\n "geneMap": {\n "sequenceID": 7709,\n "chromosome": 9,\n "chromosomeSymbol": "9",\n "chromosomeSort": 225,\n "chromosomeLocationStart": 72900670,\n "chromosomeLocationEnd": 72953052,\n "transcript": "ENST00000297785.7",\n "cytoLocation": "9q21",\n "computedCytoLocation": "9q21.13",\n "mimNumber": 100640,\n "geneSymbols": "ALDH1A1",\n "geneName": "Aldehyde dehydrogenase-1 family, member A1, soluble",\n "mappingMethod": "REa, A",\n "confidence": "P",\n "mouseGeneSymbol": "Aldh1a1",\n "mouseMgiID": "MGI:1353450",\n "geneInheritance": null\n },\n "externalLinks": {\n "geneIDs": "216",\n "hgncID": "402",\n "ensemblIDs": "ENSG00000165092,ENST00000297785.8",\n "approvedGeneSymbols": "ALDH1A1",\n "ncbiReferenceSequences": "1519246465",\n "proteinSequences": "194378740,211947843,2183299,178400,119582947,119582948,178372,40807656,194375548,30582681,209402710,4262707,194739599,4261625,178394,261487497,16306661,21361176,32815082,118495,62089228",\n "uniGenes": "Hs.76392",\n "swissProtIDs": "P00352",\n "decipherGene": false,\n "umlsIDs": "C1412333",\n "gtr": true,\n "cmgGene": false,\n "keggPathways": true,\n "gwasCatalog": false,\n\n }\n }\n },\n {\n "entry": {\n "prefix": "*",\n "mimNumber": 102560,\n "status": "live",\n "titles": {\n "preferredTitle": "ACTIN, GAMMA-1; ACTG1",\n "alternativeTitles": "ACTIN, GAMMA; ACTG;;\\nCYTOSKELETAL GAMMA-ACTIN;;\\nACTIN, CYTOPLASMIC, 2"\n },\n "textSectionList": [\n {\n "textSection": {\n "textSectionName": "description",\n "textSectionTitle": "Description",\n "textSectionContent": "Actins are a family of highly conserved cytoskeletal proteins that play fundamental roles in nearly all aspects of eukaryotic cell biology. The ability of a cell to divide, move, endocytose, generate contractile force, and maintain shape is reliant upon functional actin-based structures. Actin isoforms are grouped according to expression patterns: muscle actins predominate in striated and smooth muscle (e.g., ACTA1, {102610}, and ACTA2, {102620}, respectively), whereas the 2 cytoplasmic nonmuscle actins, gamma-actin (ACTG1) and beta-actin (ACTB; {102630}), are found in all cells ({13:Sonnemann et al., 2006})."\n }\n }\n ],\n "geneMap": {\n "sequenceID": 13666,\n "chromosome": 17,\n "chromosomeSymbol": "17",\n "chromosomeSort": 947,\n "chromosomeLocationStart": 81509970,\n "chromosomeLocationEnd": 81512798,\n "transcript": "ENST00000331925.7",\n "cytoLocation": "17q25.3",\n "computedCytoLocation": "17q25.3",\n "mimNumber": 102560,\n "geneSymbols": "ACTG1, DFNA20, DFNA26, BRWS2",\n "geneName": "Actin, gamma-1",\n "mappingMethod": "REa, A, Fd",\n "confidence": "C",\n "mouseGeneSymbol": "Actg1",\n "mouseMgiID": "MGI:87906",\n "geneInheritance": null,\n "phenotypeMapList": [\n {\n "phenotypeMap": {\n "mimNumber": 102560,\n "phenotype": "Baraitser-Winter syndrome 2",\n "phenotypeMimNumber": 614583,\n "phenotypicSeriesNumber": "PS243310",\n "phenotypeMappingKey": 3,\n "phenotypeInheritance": "Autosomal dominant"\n }\n },\n {\n "phenotypeMap": {\n "mimNumber": 102560,\n "phenotype": "Deafness, autosomal dominant 20/26",\n "phenotypeMimNumber": 604717,\n "phenotypicSeriesNumber": "PS124900",\n "phenotypeMappingKey": 3,\n "phenotypeInheritance": "Autosomal dominant"\n }\n }\n ]\n }\n }\n }\n ]\n }\n}\n')),(0,i.kt)("p",null,"Content from the OMIM API JSON response is reorganized as shown in the Nirvana ",(0,i.kt)("a",{parentName:"p",href:"#json-output"},"JSON Output")),(0,i.kt)("p",null,"Mappings between the Nirvana JSON output and OMIM JSON API are listed in the table below:"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Nirvana JSON key chain"),(0,i.kt)("th",{parentName:"tr",align:"left"},"OMIM API JSON key chain"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:mimNumber")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:geneName"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:geneName")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:textSectionList:textSection:textSectionContent")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:mimNumber")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:phenotype"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:textSectionList:textSection:textSectionContent")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:mapping"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotypeMappingKey (",(0,i.kt)("a",{parentName:"td",href:"#mapping-key-to-content"},"see mapping below"),")")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:inheritances"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotypeInheritance")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:comments"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotype (",(0,i.kt)("a",{parentName:"td",href:"#phenotype-character-to-comment"},"see mapping below"),")")))),(0,i.kt)("h4",{id:"mapping-key-to-content"},"Mapping key to content"),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"1")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"disorder was positioned by mapping of the wild type gene"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"2")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"disease phenotype itself was mapped"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"3")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"molecular basis of the disorder is known"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"4")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"disorder is a chromosome deletion or duplication syndrome"),(0,i.kt)("br",null)),(0,i.kt)("h4",{id:"phenotype-character-to-comment"},"Phenotype character to comment"),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"?")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"unconfirmed or possibly spurious mapping"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"["),"/",(0,i.kt)("inlineCode",{parentName:"p"},"]")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"nondiseases"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"{"),"/",(0,i.kt)("inlineCode",{parentName:"p"},"}")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"contribute to susceptibility to multifactorial disorders or to susceptibility to infection"),(0,i.kt)("br",null)),(0,i.kt)("h3",{id:"remove-links-in-omim-descriptions"},"Remove links in OMIM descriptions"),(0,i.kt)("p",null,"There are different types of link in the OMIM description section. For example, in above JSON response, we have the description of MIM entry 100640:"),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"The ALDH1A1 gene encodes a liver cytosolic isoform of acetaldehyde dehydrogenase ({EC 1.2.1.3}), an enzyme involved in the major pathway of alcohol metabolism after alcohol dehydrogenase (ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650}), variation in which has been implicated in different responses to alcohol ingestion.\\n\\nALDH1 is associated with a low Km for NAD, a high Km for acetaldehyde, and is strongly inactivated by disulfiram. ALDH2 is associated with a high Km for NAD, and low Km for acetaldehyde, and is insensitive to inhibition by disulfiram ({4:Hsu et al., 1985}).")),(0,i.kt)("p",null,"As the descriptions will be shown as plain text, we remove the curry brackets surrounding links and try to make the text still readable with minimal modifications. Briefly:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},'Links referring to another MIM entry (e.g. {100650}) will be removed. Any word(s) specifically associated with the removed link will also be removed. For example, "(ADH, see {103700})" will become "(ADH)" after the process.'),(0,i.kt)("li",{parentName:"ul"},'Links referring to a literature reference will be processed to remove the internal index and curry brackets. For example, "{4:Hsu et al., 1985}" becomes "Hsu et al., 1985".'),(0,i.kt)("li",{parentName:"ul"},'All the other links will simple have their curry brackets removed. For example, "{EC 1.2.1.3}" becomes "EC 1.2.1.3".'),(0,i.kt)("li",{parentName:"ul"},'If the content within a pair of parentheses becomes empty after being processed, the parentheses need to be removed as well and its surrounding white spaces should be properly processed. For example, "ALDH2 ({100650})," will become "ALDH2,".')),(0,i.kt)("p",null,"Here is a list of examples about how the description section supposed to be processed:"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Original text"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Processed text"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"({516030}, {516040}, and {516050})"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., D1, {168461}; D2, {123833}; D3, {123834})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., D1; D2; D3)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(desmocollins; see DSC2, {125645})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(desmocollins; see DSC2)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., see {102700}, {300755})"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(ADH). See also liver mitochondrial ALDH2")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(see, e.g., CACNA1A; {601011})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(see, e.g., CACNA1A)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., GSTA1; {138359}), mu (e.g., {138350})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., GSTA1), mu")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(NFKB; see {164011})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(NFKB)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(see ISGF3G, {147574})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(see ISGF3G)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(DCK; {EC 2.7.1.74}; {125450})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(DCK; EC 2.7.1.74)")))),(0,i.kt)("h2",{id:"json-output"},"JSON output"),(0,i.kt)(o.default,{mdxType:"JSON"}),(0,i.kt)("h2",{id:"building-the-supplementary-files"},"Building the supplementary files"),(0,i.kt)("p",null,"The first step in builing the OMIM ",(0,i.kt)("inlineCode",{parentName:"p"},".nga")," files is to use the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's subcommand ",(0,i.kt)("inlineCode",{parentName:"p"},"downloadOMIM")," to download the necessary data. In order to download the data the user must possess an API key obtained from OMIM. This key has to be set as the environment variable ",(0,i.kt)("em",{parentName:"p"},"OmimApiKey"),"."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"export OmimApiKey=\ndotnet NirvanaBuild/SAUtils.dll downloadOMIM\n---------------------------------------------------------------------------\nSAUtils (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll downloadomim [options]\nDownload the OMIM gene annotation data\n\nOPTIONS:\n --uga, -u universal gene archive path\n --ref, -r input reference filename\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\ndotnet NirvanaBuild/SAUtils.dll downloadOMIM --ref References/7/Homo_sapiens.GRCh38.Nirvana.dat --uga Cache/27/UGA.tsv.gz --out ExternalDataSources/OMIM/2021-06-14\n---------------------------------------------------------------------------\nSAUtils (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0\n---------------------------------------------------------------------------\n\nUnable to resolve gene symbol conflict for CD300H: Ensembl: [ENSG00000284690]: AC079325.2, Entrez Gene: [100130520]: LOC100130520\nUnable to resolve gene symbol conflict for STRIT1: Ensembl: [ENSG00000240045]: DWORF, Entrez Gene: [100507537]: LOC100507537\nUnable to resolve gene symbol conflict for WAKMAR2: Ensembl: [ENSG00000237499]: AL357060.2, Entrez Gene: [100130476]: LOC100130476\nUnable to resolve gene symbol conflict for PERCC1: Ensembl: [ENSG00000284395]: AL032819.3, Entrez Gene: [105371045]: LOC105371045\nUnable to resolve gene symbol conflict for LASTR: Ensembl: [ENSG00000242147]: AL365356.5, Entrez Gene: [105376382]: LOC105376382\nUnable to resolve gene symbol conflict for PRANCR: Ensembl: [ENSG00000257815]: LINC01481, Entrez Gene: [101928062]: LOC101928062\nUnable to resolve gene symbol conflict for THORLNC: Ensembl: [ENSG00000226856]: AC093901.1, Entrez Gene: [100506797]: LOC100506797\nGene Symbol Update Statistics\n============================================\n# of gene symbols already up-to-date: 15,952\n# of gene symbols updated: 330\n# of genes where both IDs are null: 0\n# of gene symbols not in cache: 148\n# of resolved gene symbol conflicts: 15\n# of unresolved gene symbol conflicts: 7\n\nTime: 00:02:38.2\n")),(0,i.kt)("p",null,"Once the download has succeeded, the ",(0,i.kt)("inlineCode",{parentName:"p"},"nga")," files can be produced using the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's subcommand ",(0,i.kt)("inlineCode",{parentName:"p"},"omim"),"."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet NirvanaBuild/SAUtils.dll omim\n---------------------------------------------------------------------------\nSAUtils (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll omim [options]\nCreates a gene annotation database from OMIM data\n\nOPTIONS:\n --m2g, -m MimToGeneSymbol tsv file\n --json, -j OMIM entry json file\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\n\ndotnet NirvanaBuild/SAUtils.dll omim --m2g ExternalDataSources/OMIM/2021-06-14/MimToGeneSymbol.tsv --json ExternalDataSources/OMIM/2021-06-14/MimEntries.json.gz --out SupplementaryDatabase/63/\n---------------------------------------------------------------------------\nSAUtils (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0\n---------------------------------------------------------------------------\n\n\nTime: 00:00:04.5\n")))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/9a7a0272.64388729.js b/assets/js/9a7a0272.64388729.js deleted file mode 100644 index 39429b1a..00000000 --- a/assets/js/9a7a0272.64388729.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6819],{3905:(e,t,r)=>{r.d(t,{Zo:()=>p,kt:()=>f});var n=r(67294);function a(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function o(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function c(e){for(var t=1;t=0||(a[r]=e[r]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(a[r]=e[r])}return a}var i=n.createContext({}),s=function(e){var t=n.useContext(i),r=t;return e&&(r="function"==typeof e?e(t):c(c({},t),e)),r},p=function(e){var t=s(e.components);return n.createElement(i.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},d=n.forwardRef((function(e,t){var r=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),u=s(r),d=a,f=u["".concat(i,".").concat(d)]||u[d]||m[d]||o;return r?n.createElement(f,c(c({ref:t},p),{},{components:r})):n.createElement(f,c({ref:t},p))}));function f(e,t){var r=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=r.length,c=new Array(o);c[0]=d;var l={};for(var i in t)hasOwnProperty.call(t,i)&&(l[i]=t[i]);l.originalType=e,l[u]="string"==typeof e?e:a,c[1]=l;for(var s=2;s{r.r(t),r.d(t,{contentTitle:()=>c,default:()=>u,frontMatter:()=>o,metadata:()=>l,toc:()=>i});var n=r(87462),a=(r(67294),r(3905));const o={},c=void 0,l={unversionedId:"data-sources/revel-json",id:"version-3.21/data-sources/revel-json",title:"revel-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/revel-json.md",sourceDirName:"data-sources",slug:"/data-sources/revel-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/revel-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/revel-json.md",tags:[],version:"3.21",frontMatter:{}},i=[],s={toc:i},p="wrapper";function u(e){let{components:t,...r}=e;return(0,a.kt)(p,(0,n.Z)({},s,r,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"revel":{ \n "score":0.027\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"score"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1.0")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/9a946f68.251ba48f.js b/assets/js/9a946f68.251ba48f.js new file mode 100644 index 00000000..cd90c411 --- /dev/null +++ b/assets/js/9a946f68.251ba48f.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5198],{3905:(a,e,t)=>{t.d(e,{Zo:()=>d,kt:()=>A});var n=t(7294);function c(a,e,t){return e in a?Object.defineProperty(a,e,{value:t,enumerable:!0,configurable:!0,writable:!0}):a[e]=t,a}function i(a,e){var t=Object.keys(a);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(a);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(a,e).enumerable}))),t.push.apply(t,n)}return t}function r(a){for(var e=1;e=0||(c[t]=a[t]);return c}(a,e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(a);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(a,t)&&(c[t]=a[t])}return c}var s=n.createContext({}),l=function(a){var e=n.useContext(s),t=e;return a&&(t="function"==typeof a?a(e):r(r({},e),a)),t},d=function(a){var e=l(a.components);return n.createElement(s.Provider,{value:e},a.children)},u="mdxType",m={inlineCode:"code",wrapper:function(a){var e=a.children;return n.createElement(n.Fragment,{},e)}},p=n.forwardRef((function(a,e){var t=a.components,c=a.mdxType,i=a.originalType,s=a.parentName,d=o(a,["components","mdxType","originalType","parentName"]),u=l(t),p=c,A=u["".concat(s,".").concat(p)]||u[p]||m[p]||i;return t?n.createElement(A,r(r({ref:e},d),{},{components:t})):n.createElement(A,r({ref:e},d))}));function A(a,e){var t=arguments,c=e&&e.mdxType;if("string"==typeof a||c){var i=t.length,r=new Array(i);r[0]=p;var o={};for(var s in e)hasOwnProperty.call(e,s)&&(o[s]=e[s]);o.originalType=a,o[u]="string"==typeof a?a:c,r[1]=o;for(var l=2;l{t.r(e),t.d(e,{contentTitle:()=>r,default:()=>u,frontMatter:()=>i,metadata:()=>o,toc:()=>s});var n=t(7462),c=(t(7294),t(3905));const i={title:"Cancer Hotspots"},r=void 0,o={unversionedId:"data-sources/cancer-hotspots",id:"data-sources/cancer-hotspots",title:"Cancer Hotspots",description:"Overview",source:"@site/docs/data-sources/cancer-hotspots.mdx",sourceDirName:"data-sources",slug:"/data-sources/cancer-hotspots",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cancer-hotspots",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/cancer-hotspots.mdx",tags:[],version:"current",frontMatter:{title:"Cancer Hotspots"},sidebar:"docs",previous:{title:"Amino Acid Conservation",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/amino-acid-conservation"},next:{title:"ClinGen",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Data extraction",id:"data-extraction",children:[{value:"Example",id:"example",children:[{value:"SNV",id:"snv",children:[],level:4},{value:"Indel",id:"indel",children:[],level:4}],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],l={toc:s},d="wrapper";function u(a){let{components:e,...t}=a;return(0,c.kt)(d,(0,n.Z)({},l,t,{components:e,mdxType:"MDXLayout"}),(0,c.kt)("h2",{id:"overview"},"Overview"),(0,c.kt)("p",null,"Cancer Hotspots, a resource for statistically significant mutations in cancer. It provides information about statistically significantly recurrent mutations identified in large scale cancer genomics data."),(0,c.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,c.kt)("div",{parentName:"div",className:"admonition-heading"},(0,c.kt)("h5",{parentName:"div"},(0,c.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,c.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,c.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,c.kt)("div",{parentName:"div",className:"admonition-content"},(0,c.kt)("p",{parentName:"div"},"Chang MT, Bhattarai TS, Schram AM, Bielski CM, Donoghue MTA, Jonsson P, Chakravarty D, Phillips S, Kandoth C, Penson A, Gorelick A, Shamu T, Patel S, Harris C, Gao J, Sumer SO, Kundra R, Razavi P, Li BT, Reales DN, Socci ND, Jayakumaran G, Zehir A, Benayed R, Arcila ME, Chandarlapaty S, Ladanyi M, Schultz N, Baselga J, Berger MF, Rosen N, Solit DB, Hyman DM, Taylor BS. Accelerating Discovery of Functional Mutant Alleles in Cancer. Cancer Discov. 2018 Feb;8(2):174-183. doi: 10.1158/2159-8290.CD-17-0321. Epub 2017 Dec 15. PMID: 29247016; PMCID: PMC5809279."),(0,c.kt)("p",{parentName:"div"},"Chang MT, Asthana S, Gao SP, Lee BH, Chapman JS, Kandoth C, Gao J, Socci ND, Solit DB, Olshen AB, Schultz N, Taylor BS. Identifying recurrent mutations in cancer reveals widespread lineage diversity and mutational specificity. Nat Biotechnol. 2016 Feb;34(2):155-63. doi: 10.1038/nbt.3391. Epub 2015 Nov 30. PMID: 26619011; PMCID: PMC4744099."))),(0,c.kt)("h2",{id:"data-extraction"},"Data extraction"),(0,c.kt)("p",null,"Illumina Connected Annotations currently parses SNV and indel tabs from hotspots_v2.xls file to extract the relevant content."),(0,c.kt)("h3",{id:"example"},"Example"),(0,c.kt)("h4",{id:"snv"},"SNV"),(0,c.kt)("pre",null,(0,c.kt)("code",{parentName:"pre",className:"language-scss"},'Hugo_Symbol Amino_Acid_Position log10_pvalue Mutation_Count Reference_Amino_Acid Total_Mutations_in_Gene Median_Allele_Freq_Rank Allele_Freq_Rank Variant_Amino_Acid Codon_Change Genomic_Position Detailed_Cancer_Types Organ_Types Tri-nucleotides Mutability mu_protein Total_Samples Analysis_Type qvalue tm qvalue_pancanIs_repeat seq length align100 pad12entropy pad24entropy pad36entropy TP reason n_MSK n_Retro judgement inNBT inOncokb ref qvaluect ct Samples\nNRAS 61 -1237.69143477067 422 Q:422 620 0.333333333333333 295|0.692307692307692:0.733333333333333:0.2:0.933333333333333:1:0.25:0.666666666666667:1:0.25:0.571428571428571:1:1:0.5:0.363636363636364:0.428571428571429:0.0833333333333333:1:1:1:1:0.5:1:0.125:0.363636363636364:0.173913043478261:0.25:1:0.8:0.153846153846154:0.857142857142857:0.5:0.5:0.5:1:0.272727272727273:0.214285714285714:1:0.5:1:1:0.2:0.333333333333333:0.6875:0.708333333333333:0.25:0.266666666666667:0.111111111111111:1:1:0.333333333333333:0.428571428571429:0.666666666666667:0.25:0.5:0.833333333333333:0.5:0.735294117647059:0.0476190476190476:0.1:0.133333333333333:0.230769230769231:0.25:1:0.5:0.294117647058824:0.217391304347826:0.46875:0.5:1:0.2:0.166666666666667:0.666666666666667:1:0.8:0.407407407407407:1:0.0212765957446809:0.285714285714286:0.0909090909090909:0.333333333333333:0.2:0.333333333333333:0.5:0.5:1:0.111111111111111:0.5:0.903846153846154:0.5:0.2:1:1:0.0909090909090909:0.4:0.428571428571429:0.0625:0.25:0.833333333333333:1:0.956521739130435:0.111111111111111:0.6:0.212765957446809:0.5:0.207547169811321:1:0.75:0.294117647058824:0.666666666666667:1:0.333333333333333:0.714285714285714:0.142857142857143:1:0.3:0.416666666666667:0.272727272727273:0.25:0.333333333333333:0.345454545454545:0.0952380952380952:0.166666666666667:0.111111111111111:0.454545454545455:0.0666666666666667:1:0.636363636363636:0.636363636363636:0.25:0.272727272727273:0.824324324324324:1:0.75:0.545454545454545:1:1:0.0769230769230769:0.363636363636364:0.290322580645161:0.333333333333333:0.179487179487179:1:0.0666666666666667:0.333333333333333:1:0.478260869565217:0.166666666666667:1:1:0.0276497695852535:0.0716845878136201:0.0263736263736264:0.933333333333333:1:0.5:1:1:0.8125:0.361788617886179:0.113761467889908:0.113761467889908:0.157894736842105:0.333333333333333:0.0555555555555556:0.0357142857142857:0.375:0.111111111111111:0.584415584415584:0.0350877192982456:0.751111111111111:0.761245674740484:0.164989939637827:0.196652719665272:0.135549872122762:0.172113289760349:0.0240963855421687:0.0620767494356659:0.142268041237113:0.147441457068517:0.147959183673469:0.038961038961039:0.686274509803922:0.0929054054054054:0.364787111622555:0.331306990881459:0.691449814126394:0.691449814126394:0.0769230769230769:0.347826086956522:0.117647058823529:0.148148148148148:0.05:0.290030211480363:0.680272108843537:0.188679245283019:0.0701754385964912:0.801526717557252:0.236842105263158:0.1953125:0.0539906103286385:0.015625:0.0390492359932088:0.00790513833992095:0.0597826086956522:0.136783733826248:0.362359550561798:0.0713719270420301:0.328621908127208:0.0657672849915683:0.320099255583127:0.075:0.433021806853583:0.524818401937046:0.524818401937046:0.259259259259259:0.483695652173913:0.0269360269360269:0.100486223662885:0.785507246376812:0.137870855148342:0.472340425531915:0.194331983805668:0.0830769230769231:0.418055555555556:0.546296296296296:0.247596153846154:0.52:0.39832285115304:0.601866251944012:0.234016887816647:0.214007782101167:0.153153153153153:0.137180700094607:0.0666666666666667:0.037037037037037:0.1:0.2:0.458333333333333:0.0588235294117647:0.111111111111111:0.333333333333333:0.181818181818182:0.473684210526316:0.5:0.2:0.136363636363636:0.0769230769230769:0.142857142857143:0.285714285714286:0.25:0.445714285714286:0.149377593360996:0.0227790432801822:0.182278481012658:0.540123456790123:0.021505376344086:0.541666666666667:0.00429184549356223:0.473684210526316:0.103508771929825:0.0930232558139535:0.391304347826087:0.072:0.0113636363636364:0.148837209302326:0.448051948051948:0.761038961038961:0.530373831775701:0.222857142857143:0.433862433862434:0.0810810810810811:0.0723327305605787:0.410714285714286:0.247910863509749:0.384615384615385:0.125:0.24:0.783582089552239:0.0646651270207852:0.445569620253165:0.754777070063694:0.165137614678899:0.10732538330494:0.0375:0.538461538461538:0.0981387478849408:0.029126213592233:0.0833333333333333:0.443514644351464:0.0917431192660551:0.03125:0.674418604651163:0.3125:0.375:0.314285714285714 R:204 cAa/cGa:203|Caa/Aaa:140|cAa/cTa:46|caA/caT:14|caA/caC:13|ggACaa/ggCAaa:2|cAa/cCa:2|Caa/Taa:1|CAa/AGa:1 1:115256529_252|1:115256530_143|1:115256528_27 skcm:787:186|thpa:486:43|mm:275:27|thpd:58:18|coadread:683:16|luad:2057:15|coad:712:13|mup:42:7|aml:198:6|blca:852:5|thap:33:5|read:149:5|rms:50:5|uec:339:5|nsgct:152:5|cll:283:4|ihch:104:4|lgsoc:17:3|sem:59:3|thhc:21:3|erms:8:3|lggnos:544:3|utuc:76:2|cup:135:2|thfo:5:2|sarcl:13:2|mfh:53:2|gbm:688:2|soc:468:2|stad:748:2|thym:125:2|es:229:1|npc:66:1|unk:146:1|panet:86:1|hnsc:643:1|armm:21:1|tmt:3:1|acrm:23:1|thyc:9:1|odg:36:1|paasc:8:1|hnmucm:11:1|blad:7:1|esca:556:1|mixed:3:1|chol:152:1|hcc:620:1|sarc:280:1|chrcc:88:1|aca:93:1 skin:974:187|thyroid:618:71|blood:890:37|bowel:1782:35|lung:2761:17|unk:357:11|softtissue:739:11|testis:217:9|bladder:958:8|cnsbrain:2270:6|ovaryfallopiantube:699:5|biliarytract:358:5|uterus:618:5|headandneck:988:3|thymus:162:3|esophagusstomach:1407:3|pancreas:1059:2|bone:297:1|liver:636:1|kidney:1304:1|adrenalgland:291:1 TTG|ACA|CTT|TCG|CCC|CCA 0.0120300464273379 0.0267810594223141 24592 "pancan,skin,thyroid,bowel,blood,lung,softtissue,testis,bladder,cnsbrain,biliarytract,ovaryfallopiantube,uterus,thymus,headandneck,esophagusstomach" 0 NRAS 61 0 FALSE NA 1 1.16795714944678 1.26187131041539 1.29838371117394 TRUE 165 257 RETAIN TRUE TRUE Q 0 skin skin:88|thyroid:54|blood:15|bowel:8|testis:5|biliarytract:4|bladder:4|lung:4|ovaryfallopiantube:4|softtissue:3|unk:3|uterus:3|cnsbrain:2|esophagusstomach:2|headandneck:2|bone:1|pancreas:1|thymus:1\nNRAS 61 -1237.69143477067 422 Q:422 620 0.333333333333333 295|0.692307692307692:0.733333333333333:0.2:0.933333333333333:1:0.25:0.666666666666667:1:0.25:0.571428571428571:1:1:0.5:0.363636363636364:0.428571428571429:0.0833333333333333:1:1:1:1:0.5:1:0.125:0.363636363636364:0.173913043478261:0.25:1:0.8:0.153846153846154:0.857142857142857:0.5:0.5:0.5:1:0.272727272727273:0.214285714285714:1:0.5:1:1:0.2:0.333333333333333:0.6875:0.708333333333333:0.25:0.266666666666667:0.111111111111111:1:1:0.333333333333333:0.428571428571429:0.666666666666667:0.25:0.5:0.833333333333333:0.5:0.735294117647059:0.0476190476190476:0.1:0.133333333333333:0.230769230769231:0.25:1:0.5:0.294117647058824:0.217391304347826:0.46875:0.5:1:0.2:0.166666666666667:0.666666666666667:1:0.8:0.407407407407407:1:0.0212765957446809:0.285714285714286:0.0909090909090909:0.333333333333333:0.2:0.333333333333333:0.5:0.5:1:0.111111111111111:0.5:0.903846153846154:0.5:0.2:1:1:0.0909090909090909:0.4:0.428571428571429:0.0625:0.25:0.833333333333333:1:0.956521739130435:0.111111111111111:0.6:0.212765957446809:0.5:0.207547169811321:1:0.75:0.294117647058824:0.666666666666667:1:0.333333333333333:0.714285714285714:0.142857142857143:1:0.3:0.416666666666667:0.272727272727273:0.25:0.333333333333333:0.345454545454545:0.0952380952380952:0.166666666666667:0.111111111111111:0.454545454545455:0.0666666666666667:1:0.636363636363636:0.636363636363636:0.25:0.272727272727273:0.824324324324324:1:0.75:0.545454545454545:1:1:0.0769230769230769:0.363636363636364:0.290322580645161:0.333333333333333:0.179487179487179:1:0.0666666666666667:0.333333333333333:1:0.478260869565217:0.166666666666667:1:1:0.0276497695852535:0.0716845878136201:0.0263736263736264:0.933333333333333:1:0.5:1:1:0.8125:0.361788617886179:0.113761467889908:0.113761467889908:0.157894736842105:0.333333333333333:0.0555555555555556:0.0357142857142857:0.375:0.111111111111111:0.584415584415584:0.0350877192982456:0.751111111111111:0.761245674740484:0.164989939637827:0.196652719665272:0.135549872122762:0.172113289760349:0.0240963855421687:0.0620767494356659:0.142268041237113:0.147441457068517:0.147959183673469:0.038961038961039:0.686274509803922:0.0929054054054054:0.364787111622555:0.331306990881459:0.691449814126394:0.691449814126394:0.0769230769230769:0.347826086956522:0.117647058823529:0.148148148148148:0.05:0.290030211480363:0.680272108843537:0.188679245283019:0.0701754385964912:0.801526717557252:0.236842105263158:0.1953125:0.0539906103286385:0.015625:0.0390492359932088:0.00790513833992095:0.0597826086956522:0.136783733826248:0.362359550561798:0.0713719270420301:0.328621908127208:0.0657672849915683:0.320099255583127:0.075:0.433021806853583:0.524818401937046:0.524818401937046:0.259259259259259:0.483695652173913:0.0269360269360269:0.100486223662885:0.785507246376812:0.137870855148342:0.472340425531915:0.194331983805668:0.0830769230769231:0.418055555555556:0.546296296296296:0.247596153846154:0.52:0.39832285115304:0.601866251944012:0.234016887816647:0.214007782101167:0.153153153153153:0.137180700094607:0.0666666666666667:0.037037037037037:0.1:0.2:0.458333333333333:0.0588235294117647:0.111111111111111:0.333333333333333:0.181818181818182:0.473684210526316:0.5:0.2:0.136363636363636:0.0769230769230769:0.142857142857143:0.285714285714286:0.25:0.445714285714286:0.149377593360996:0.0227790432801822:0.182278481012658:0.540123456790123:0.021505376344086:0.541666666666667:0.00429184549356223:0.473684210526316:0.103508771929825:0.0930232558139535:0.391304347826087:0.072:0.0113636363636364:0.148837209302326:0.448051948051948:0.761038961038961:0.530373831775701:0.222857142857143:0.433862433862434:0.0810810810810811:0.0723327305605787:0.410714285714286:0.247910863509749:0.384615384615385:0.125:0.24:0.783582089552239:0.0646651270207852:0.445569620253165:0.754777070063694:0.165137614678899:0.10732538330494:0.0375:0.538461538461538:0.0981387478849408:0.029126213592233:0.0833333333333333:0.443514644351464:0.0917431192660551:0.03125:0.674418604651163:0.3125:0.375:0.314285714285714 K:142 cAa/cGa:203|Caa/Aaa:140|cAa/cTa:46|caA/caT:14|caA/caC:13|ggACaa/ggCAaa:2|cAa/cCa:2|Caa/Taa:1|CAa/AGa:1 1:115256529_252|1:115256530_143|1:115256528_27 skcm:787:186|thpa:486:43|mm:275:27|thpd:58:18|coadread:683:16|luad:2057:15|coad:712:13|mup:42:7|aml:198:6|blca:852:5|thap:33:5|read:149:5|rms:50:5|uec:339:5|nsgct:152:5|cll:283:4|ihch:104:4|lgsoc:17:3|sem:59:3|thhc:21:3|erms:8:3|lggnos:544:3|utuc:76:2|cup:135:2|thfo:5:2|sarcl:13:2|mfh:53:2|gbm:688:2|soc:468:2|stad:748:2|thym:125:2|es:229:1|npc:66:1|unk:146:1|panet:86:1|hnsc:643:1|armm:21:1|tmt:3:1|acrm:23:1|thyc:9:1|odg:36:1|paasc:8:1|hnmucm:11:1|blad:7:1|esca:556:1|mixed:3:1|chol:152:1|hcc:620:1|sarc:280:1|chrcc:88:1|aca:93:1 skin:974:187|thyroid:618:71|blood:890:37|bowel:1782:35|lung:2761:17|unk:357:11|softtissue:739:11|testis:217:9|bladder:958:8|cnsbrain:2270:6|ovaryfallopiantube:699:5|biliarytract:358:5|uterus:618:5|headandneck:988:3|thymus:162:3|esophagusstomach:1407:3|pancreas:1059:2|bone:297:1|liver:636:1|kidney:1304:1|adrenalgland:291:1 TTG|ACA|CTT|TCG|CCC|CCA 0.0120300464273379 0.0267810594223141 24592 "pancan,skin,thyroid,bowel,blood,lung,softtissue,testis,bladder,cnsbrain,biliarytract,ovaryfallopiantube,uterus,thymus,headandneck,esophagusstomach" 0 NRAS 61 0 FALSE NA 1 1.16795714944678 1.26187131041539 1.29838371117394 TRUE 165 257 RETAIN TRUE TRUE Q 0 skin skin:62|bowel:18|thyroid:17|blood:12|softtissue:6|lung:5|unk:5|bladder:3|cnsbrain:2|thymus:2|adrenalgland:1|biliarytract:1|esophagusstomach:1|headandneck:1|kidney:1|liver:1|ovaryfallopiantube:1|pancreas:1|testis:1|uterus:1\nNRAS 61 -1237.69143477067 422 Q:422 620 0.333333333333333 295|0.692307692307692:0.733333333333333:0.2:0.933333333333333:1:0.25:0.666666666666667:1:0.25:0.571428571428571:1:1:0.5:0.363636363636364:0.428571428571429:0.0833333333333333:1:1:1:1:0.5:1:0.125:0.363636363636364:0.173913043478261:0.25:1:0.8:0.153846153846154:0.857142857142857:0.5:0.5:0.5:1:0.272727272727273:0.214285714285714:1:0.5:1:1:0.2:0.333333333333333:0.6875:0.708333333333333:0.25:0.266666666666667:0.111111111111111:1:1:0.333333333333333:0.428571428571429:0.666666666666667:0.25:0.5:0.833333333333333:0.5:0.735294117647059:0.0476190476190476:0.1:0.133333333333333:0.230769230769231:0.25:1:0.5:0.294117647058824:0.217391304347826:0.46875:0.5:1:0.2:0.166666666666667:0.666666666666667:1:0.8:0.407407407407407:1:0.0212765957446809:0.285714285714286:0.0909090909090909:0.333333333333333:0.2:0.333333333333333:0.5:0.5:1:0.111111111111111:0.5:0.903846153846154:0.5:0.2:1:1:0.0909090909090909:0.4:0.428571428571429:0.0625:0.25:0.833333333333333:1:0.956521739130435:0.111111111111111:0.6:0.212765957446809:0.5:0.207547169811321:1:0.75:0.294117647058824:0.666666666666667:1:0.333333333333333:0.714285714285714:0.142857142857143:1:0.3:0.416666666666667:0.272727272727273:0.25:0.333333333333333:0.345454545454545:0.0952380952380952:0.166666666666667:0.111111111111111:0.454545454545455:0.0666666666666667:1:0.636363636363636:0.636363636363636:0.25:0.272727272727273:0.824324324324324:1:0.75:0.545454545454545:1:1:0.0769230769230769:0.363636363636364:0.290322580645161:0.333333333333333:0.179487179487179:1:0.0666666666666667:0.333333333333333:1:0.478260869565217:0.166666666666667:1:1:0.0276497695852535:0.0716845878136201:0.0263736263736264:0.933333333333333:1:0.5:1:1:0.8125:0.361788617886179:0.113761467889908:0.113761467889908:0.157894736842105:0.333333333333333:0.0555555555555556:0.0357142857142857:0.375:0.111111111111111:0.584415584415584:0.0350877192982456:0.751111111111111:0.761245674740484:0.164989939637827:0.196652719665272:0.135549872122762:0.172113289760349:0.0240963855421687:0.0620767494356659:0.142268041237113:0.147441457068517:0.147959183673469:0.038961038961039:0.686274509803922:0.0929054054054054:0.364787111622555:0.331306990881459:0.691449814126394:0.691449814126394:0.0769230769230769:0.347826086956522:0.117647058823529:0.148148148148148:0.05:0.290030211480363:0.680272108843537:0.188679245283019:0.0701754385964912:0.801526717557252:0.236842105263158:0.1953125:0.0539906103286385:0.015625:0.0390492359932088:0.00790513833992095:0.0597826086956522:0.136783733826248:0.362359550561798:0.0713719270420301:0.328621908127208:0.0657672849915683:0.320099255583127:0.075:0.433021806853583:0.524818401937046:0.524818401937046:0.259259259259259:0.483695652173913:0.0269360269360269:0.100486223662885:0.785507246376812:0.137870855148342:0.472340425531915:0.194331983805668:0.0830769230769231:0.418055555555556:0.546296296296296:0.247596153846154:0.52:0.39832285115304:0.601866251944012:0.234016887816647:0.214007782101167:0.153153153153153:0.137180700094607:0.0666666666666667:0.037037037037037:0.1:0.2:0.458333333333333:0.0588235294117647:0.111111111111111:0.333333333333333:0.181818181818182:0.473684210526316:0.5:0.2:0.136363636363636:0.0769230769230769:0.142857142857143:0.285714285714286:0.25:0.445714285714286:0.149377593360996:0.0227790432801822:0.182278481012658:0.540123456790123:0.021505376344086:0.541666666666667:0.00429184549356223:0.473684210526316:0.103508771929825:0.0930232558139535:0.391304347826087:0.072:0.0113636363636364:0.148837209302326:0.448051948051948:0.761038961038961:0.530373831775701:0.222857142857143:0.433862433862434:0.0810810810810811:0.0723327305605787:0.410714285714286:0.247910863509749:0.384615384615385:0.125:0.24:0.783582089552239:0.0646651270207852:0.445569620253165:0.754777070063694:0.165137614678899:0.10732538330494:0.0375:0.538461538461538:0.0981387478849408:0.029126213592233:0.0833333333333333:0.443514644351464:0.0917431192660551:0.03125:0.674418604651163:0.3125:0.375:0.314285714285714 L:46 cAa/cGa:203|Caa/Aaa:140|cAa/cTa:46|caA/caT:14|caA/caC:13|ggACaa/ggCAaa:2|cAa/cCa:2|Caa/Taa:1|CAa/AGa:1 1:115256529_252|1:115256530_143|1:115256528_27 skcm:787:186|thpa:486:43|mm:275:27|thpd:58:18|coadread:683:16|luad:2057:15|coad:712:13|mup:42:7|aml:198:6|blca:852:5|thap:33:5|read:149:5|rms:50:5|uec:339:5|nsgct:152:5|cll:283:4|ihch:104:4|lgsoc:17:3|sem:59:3|thhc:21:3|erms:8:3|lggnos:544:3|utuc:76:2|cup:135:2|thfo:5:2|sarcl:13:2|mfh:53:2|gbm:688:2|soc:468:2|stad:748:2|thym:125:2|es:229:1|npc:66:1|unk:146:1|panet:86:1|hnsc:643:1|armm:21:1|tmt:3:1|acrm:23:1|thyc:9:1|odg:36:1|paasc:8:1|hnmucm:11:1|blad:7:1|esca:556:1|mixed:3:1|chol:152:1|hcc:620:1|sarc:280:1|chrcc:88:1|aca:93:1 skin:974:187|thyroid:618:71|blood:890:37|bowel:1782:35|lung:2761:17|unk:357:11|softtissue:739:11|testis:217:9|bladder:958:8|cnsbrain:2270:6|ovaryfallopiantube:699:5|biliarytract:358:5|uterus:618:5|headandneck:988:3|thymus:162:3|esophagusstomach:1407:3|pancreas:1059:2|bone:297:1|liver:636:1|kidney:1304:1|adrenalgland:291:1 TTG|ACA|CTT|TCG|CCC|CCA 0.0120300464273379 0.0267810594223141 24592 "pancan,skin,thyroid,bowel,blood,lung,softtissue,testis,bladder,cnsbrain,biliarytract,ovaryfallopiantube,uterus,thymus,headandneck,esophagusstomach" 0 NRAS 61 0 FALSE NA 1 1.16795714944678 1.26187131041539 1.29838371117394 TRUE 165 257 RETAIN TRUE TRUE Q 0 skin skin:24|bowel:7|lung:6|blood:2|cnsbrain:2|unk:2|bladder:1|softtissue:1|uterus:1\nNRAS 61 -1237.69143477067 422 Q:422 620 0.333333333333333 295|0.692307692307692:0.733333333333333:0.2:0.933333333333333:1:0.25:0.666666666666667:1:0.25:0.571428571428571:1:1:0.5:0.363636363636364:0.428571428571429:0.0833333333333333:1:1:1:1:0.5:1:0.125:0.363636363636364:0.173913043478261:0.25:1:0.8:0.153846153846154:0.857142857142857:0.5:0.5:0.5:1:0.272727272727273:0.214285714285714:1:0.5:1:1:0.2:0.333333333333333:0.6875:0.708333333333333:0.25:0.266666666666667:0.111111111111111:1:1:0.333333333333333:0.428571428571429:0.666666666666667:0.25:0.5:0.833333333333333:0.5:0.735294117647059:0.0476190476190476:0.1:0.133333333333333:0.230769230769231:0.25:1:0.5:0.294117647058824:0.217391304347826:0.46875:0.5:1:0.2:0.166666666666667:0.666666666666667:1:0.8:0.407407407407407:1:0.0212765957446809:0.285714285714286:0.0909090909090909:0.333333333333333:0.2:0.333333333333333:0.5:0.5:1:0.111111111111111:0.5:0.903846153846154:0.5:0.2:1:1:0.0909090909090909:0.4:0.428571428571429:0.0625:0.25:0.833333333333333:1:0.956521739130435:0.111111111111111:0.6:0.212765957446809:0.5:0.207547169811321:1:0.75:0.294117647058824:0.666666666666667:1:0.333333333333333:0.714285714285714:0.142857142857143:1:0.3:0.416666666666667:0.272727272727273:0.25:0.333333333333333:0.345454545454545:0.0952380952380952:0.166666666666667:0.111111111111111:0.454545454545455:0.0666666666666667:1:0.636363636363636:0.636363636363636:0.25:0.272727272727273:0.824324324324324:1:0.75:0.545454545454545:1:1:0.0769230769230769:0.363636363636364:0.290322580645161:0.333333333333333:0.179487179487179:1:0.0666666666666667:0.333333333333333:1:0.478260869565217:0.166666666666667:1:1:0.0276497695852535:0.0716845878136201:0.0263736263736264:0.933333333333333:1:0.5:1:1:0.8125:0.361788617886179:0.113761467889908:0.113761467889908:0.157894736842105:0.333333333333333:0.0555555555555556:0.0357142857142857:0.375:0.111111111111111:0.584415584415584:0.0350877192982456:0.751111111111111:0.761245674740484:0.164989939637827:0.196652719665272:0.135549872122762:0.172113289760349:0.0240963855421687:0.0620767494356659:0.142268041237113:0.147441457068517:0.147959183673469:0.038961038961039:0.686274509803922:0.0929054054054054:0.364787111622555:0.331306990881459:0.691449814126394:0.691449814126394:0.0769230769230769:0.347826086956522:0.117647058823529:0.148148148148148:0.05:0.290030211480363:0.680272108843537:0.188679245283019:0.0701754385964912:0.801526717557252:0.236842105263158:0.1953125:0.0539906103286385:0.015625:0.0390492359932088:0.00790513833992095:0.0597826086956522:0.136783733826248:0.362359550561798:0.0713719270420301:0.328621908127208:0.0657672849915683:0.320099255583127:0.075:0.433021806853583:0.524818401937046:0.524818401937046:0.259259259259259:0.483695652173913:0.0269360269360269:0.100486223662885:0.785507246376812:0.137870855148342:0.472340425531915:0.194331983805668:0.0830769230769231:0.418055555555556:0.546296296296296:0.247596153846154:0.52:0.39832285115304:0.601866251944012:0.234016887816647:0.214007782101167:0.153153153153153:0.137180700094607:0.0666666666666667:0.037037037037037:0.1:0.2:0.458333333333333:0.0588235294117647:0.111111111111111:0.333333333333333:0.181818181818182:0.473684210526316:0.5:0.2:0.136363636363636:0.0769230769230769:0.142857142857143:0.285714285714286:0.25:0.445714285714286:0.149377593360996:0.0227790432801822:0.182278481012658:0.540123456790123:0.021505376344086:0.541666666666667:0.00429184549356223:0.473684210526316:0.103508771929825:0.0930232558139535:0.391304347826087:0.072:0.0113636363636364:0.148837209302326:0.448051948051948:0.761038961038961:0.530373831775701:0.222857142857143:0.433862433862434:0.0810810810810811:0.0723327305605787:0.410714285714286:0.247910863509749:0.384615384615385:0.125:0.24:0.783582089552239:0.0646651270207852:0.445569620253165:0.754777070063694:0.165137614678899:0.10732538330494:0.0375:0.538461538461538:0.0981387478849408:0.029126213592233:0.0833333333333333:0.443514644351464:0.0917431192660551:0.03125:0.674418604651163:0.3125:0.375:0.314285714285714 H:27 cAa/cGa:203|Caa/Aaa:140|cAa/cTa:46|caA/caT:14|caA/caC:13|ggACaa/ggCAaa:2|cAa/cCa:2|Caa/Taa:1|CAa/AGa:1 1:115256529_252|1:115256530_143|1:115256528_27 skcm:787:186|thpa:486:43|mm:275:27|thpd:58:18|coadread:683:16|luad:2057:15|coad:712:13|mup:42:7|aml:198:6|blca:852:5|thap:33:5|read:149:5|rms:50:5|uec:339:5|nsgct:152:5|cll:283:4|ihch:104:4|lgsoc:17:3|sem:59:3|thhc:21:3|erms:8:3|lggnos:544:3|utuc:76:2|cup:135:2|thfo:5:2|sarcl:13:2|mfh:53:2|gbm:688:2|soc:468:2|stad:748:2|thym:125:2|es:229:1|npc:66:1|unk:146:1|panet:86:1|hnsc:643:1|armm:21:1|tmt:3:1|acrm:23:1|thyc:9:1|odg:36:1|paasc:8:1|hnmucm:11:1|blad:7:1|esca:556:1|mixed:3:1|chol:152:1|hcc:620:1|sarc:280:1|chrcc:88:1|aca:93:1 skin:974:187|thyroid:618:71|blood:890:37|bowel:1782:35|lung:2761:17|unk:357:11|softtissue:739:11|testis:217:9|bladder:958:8|cnsbrain:2270:6|ovaryfallopiantube:699:5|biliarytract:358:5|uterus:618:5|headandneck:988:3|thymus:162:3|esophagusstomach:1407:3|pancreas:1059:2|bone:297:1|liver:636:1|kidney:1304:1|adrenalgland:291:1 TTG|ACA|CTT|TCG|CCC|CCA 0.0120300464273379 0.0267810594223141 24592 "pancan,skin,thyroid,bowel,blood,lung,softtissue,testis,bladder,cnsbrain,biliarytract,ovaryfallopiantube,uterus,thymus,headandneck,esophagusstomach" 0 NRAS 61 0 FALSE NA 1 1.16795714944678 1.26187131041539 1.29838371117394 TRUE 165 257 RETAIN TRUE TRUE Q 0 skin skin:12|blood:7|bowel:2|lung:2|testis:2|softtissue:1|unk:1\n')),(0,c.kt)("h4",{id:"indel"},"Indel"),(0,c.kt)("pre",null,(0,c.kt)("code",{parentName:"pre",className:"language-scss"},"Hugo_Symbol Amino_Acid_Position log10_pvalue Mutation_Count Reference_Amino_Acid Total_Mutations_in_Gene Median_Allele_Freq_Rank Allele_Freq_Rank SNP_ID Variant_Amino_Acid Codon_Change Genomic_Position Detailed_Cancer_Types Organ_Types Tri-nucleotides Mutability mu_protein ccf Total_Samples indel_size qvalue tm Is_repeat seq length align100 pad12entropy pad24entropy pad36entropy TP reason n_MSK n_Retro judgement inNBT inOncokb Samples\nSMARCA4 546 -7.75235638169585 5 QK:5 101 NA NA :NA K546del:5 cAGAag/cag:5 19:11106926_5 lgg:536:4|dlbcl:246:1 cnsbrain:2283:4|lymph:366:1 NA 0.0573226243518208 0.0473351872460284 NA 24592 1 0.000230672905611517 SMARCA4 546 FALSE NA NA 1 0.91489630957268 1.2950060272429 1.33965330506364 FALSE LOCAL_ENTROPY 1 4 RETAIN FALSE FALSE cnsbrain:4|lymph:1\nCDKN2A 27-42 -6.82111516846557 12 VRALLEA:4|LEAGALP:3|ALPN:1|EV:1|GA:1|PNAPN:1|RALLEA:1 219 NA NA :NA V28_E33del:4 gTGCGGGCGCTGCTGGAGGcg/gcg:4|cTGGAGGCGGGGGCGCTGCcc/ccc:3|GGGGCG/-:1|gCGCTGCCCAac/gac:1|gAGGtg/gtg:1|CGGGCGCTGCTGGAGGCG/-:1|ccCAACGCACCGAAt/cct:1 9:21974727_4|9:21974715_3|9:21974745_1|9:21974725_1|9:21974719_1|9:21974712_1|9:21974702_1 luad:2071:3|esca:556:2|blca:852:1|skcm:192:1|icemu:1:1|paad:932:1|mel:595:1|stad:748:1|hnsc:650:1 esophagusstomach:1413:3|lung:2767:3|skin:974:2|bladder:955:1|cervix:234:1|pancreas:1059:1|headandneck:988:1 NA 0.0573226243518208 0.0473351872460284 NA 24592 15 8.77193090544841e-05 CDKN2A 27-42 FALSE NA NA 1 0.857780912379927 1.13008762297022 1.1577633500238 FALSE LOCAL_ENTROPY 6 6 RETAIN FALSE FALSE cervix:1|esophagusstomach:1|lung:1|pancreas:1\nCDKN2A 27-42 -6.82111516846557 12 VRALLEA:4|LEAGALP:3|ALPN:1|EV:1|GA:1|PNAPN:1|RALLEA:1 219 NA NA :NA L32_L37del:3 gTGCGGGCGCTGCTGGAGGcg/gcg:4|cTGGAGGCGGGGGCGCTGCcc/ccc:3|GGGGCG/-:1|gCGCTGCCCAac/gac:1|gAGGtg/gtg:1|CGGGCGCTGCTGGAGGCG/-:1|ccCAACGCACCGAAt/cct:1 9:21974727_4|9:21974715_3|9:21974745_1|9:21974725_1|9:21974719_1|9:21974712_1|9:21974702_1 luad:2071:3|esca:556:2|blca:852:1|skcm:192:1|icemu:1:1|paad:932:1|mel:595:1|stad:748:1|hnsc:650:1 esophagusstomach:1413:3|lung:2767:3|skin:974:2|bladder:955:1|cervix:234:1|pancreas:1059:1|headandneck:988:1 NA 0.0573226243518208 0.0473351872460284 NA 24592 15 8.77193090544841e-05 CDKN2A 27-42 FALSE NA NA 1 0.857780912379927 1.13008762297022 1.1577633500238 FALSE LOCAL_ENTROPY 6 6 RETAIN FALSE FALSE skin:2|esophagusstomach:1\nCDKN2A 27-42 -6.82111516846557 12 VRALLEA:4|LEAGALP:3|ALPN:1|EV:1|GA:1|PNAPN:1|RALLEA:1 219 NA NA :NA A36_N39delinsD:1 gTGCGGGCGCTGCTGGAGGcg/gcg:4|cTGGAGGCGGGGGCGCTGCcc/ccc:3|GGGGCG/-:1|gCGCTGCCCAac/gac:1|gAGGtg/gtg:1|CGGGCGCTGCTGGAGGCG/-:1|ccCAACGCACCGAAt/cct:1 9:21974727_4|9:21974715_3|9:21974745_1|9:21974725_1|9:21974719_1|9:21974712_1|9:21974702_1 luad:2071:3|esca:556:2|blca:852:1|skcm:192:1|icemu:1:1|paad:932:1|mel:595:1|stad:748:1|hnsc:650:1 esophagusstomach:1413:3|lung:2767:3|skin:974:2|bladder:955:1|cervix:234:1|pancreas:1059:1|headandneck:988:1 NA 0.0573226243518208 0.0473351872460284 NA 24592 15 8.77193090544841e-05 CDKN2A 27-42 FALSE NA NA 0.857780912379927 1.13008762297022 1.1577633500238 FALSE LOCAL_ENTROPY 6 6 RETAIN FALSE FALSE lung:1\n")),(0,c.kt)("h3",{id:"parsing"},"Parsing"),(0,c.kt)("p",null,"From the file, we're mainly interested in the following columns:"),(0,c.kt)("ul",null,(0,c.kt)("li",{parentName:"ul"},(0,c.kt)("inlineCode",{parentName:"li"},"Hugo_Symbol")),(0,c.kt)("li",{parentName:"ul"},(0,c.kt)("inlineCode",{parentName:"li"},"Amino_Acid_Position")),(0,c.kt)("li",{parentName:"ul"},(0,c.kt)("inlineCode",{parentName:"li"},"Mutation_Count")),(0,c.kt)("li",{parentName:"ul"},(0,c.kt)("inlineCode",{parentName:"li"},"Reference_Amino_Acid")),(0,c.kt)("li",{parentName:"ul"},(0,c.kt)("inlineCode",{parentName:"li"},"Variant_Amino_Acid")),(0,c.kt)("li",{parentName:"ul"},(0,c.kt)("inlineCode",{parentName:"li"},"qvalue"))),(0,c.kt)("p",null,"We map the gene symbol onto the canonical transcripts (RefSeq & Ensembl) for that gene. For SNVs, we obtain position, ref and alt amino acid from source file and generate substitution notation. For indels, we get protein change notation from ",(0,c.kt)("inlineCode",{parentName:"p"},"Reference_Amino_Acid")," column.\nThen we match each entry using these notations."),(0,c.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,c.kt)("div",{parentName:"div",className:"admonition-heading"},(0,c.kt)("h5",{parentName:"div"},(0,c.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,c.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,c.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,c.kt)("div",{parentName:"div",className:"admonition-content"},(0,c.kt)("p",{parentName:"div"},"We currently skip all variants labeled as splice from the source"))),(0,c.kt)("h2",{id:"json-output"},"JSON Output"),(0,c.kt)("p",null,"The data source will be captured under the cancerHotspots key in the transcript section."),(0,c.kt)("pre",null,(0,c.kt)("code",{parentName:"pre",className:"language-json",metastring:"{13-18}","{13-18}":!0},'{\n "transcript":"NM_002524.5",\n "source":"RefSeq",\n "bioType":"mRNA",\n "aminoAcids":"Q/K",\n "proteinPos":"61",\n "geneId":"4893",\n "hgnc":"NRAS",\n "hgvsc":"NM_002524.5:c.181C>A",\n "hgvsp":"NP_002515.1:p.(Gln61Lys)",\n "isCanonical":true,\n "proteinId":"NP_002515.1",\n "cancerHotspots":{\n "residue":"Q61",\n "numSamples":422,\n "numAltAminoAcidSamples":142,\n "qValue":0\n }\n}\n')),(0,c.kt)("table",null,(0,c.kt)("thead",{parentName:"table"},(0,c.kt)("tr",{parentName:"thead"},(0,c.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,c.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,c.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,c.kt)("tbody",{parentName:"table"},(0,c.kt)("tr",{parentName:"tbody"},(0,c.kt)("td",{parentName:"tr",align:"left"},"residue"),(0,c.kt)("td",{parentName:"tr",align:"center"},"string"),(0,c.kt)("td",{parentName:"tr",align:"left"})),(0,c.kt)("tr",{parentName:"tbody"},(0,c.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,c.kt)("td",{parentName:"tr",align:"center"},"int"),(0,c.kt)("td",{parentName:"tr",align:"left"},"how many samples are associated with a variant at the same amino acid position")),(0,c.kt)("tr",{parentName:"tbody"},(0,c.kt)("td",{parentName:"tr",align:"left"},"numAltAminoAcidSamples"),(0,c.kt)("td",{parentName:"tr",align:"center"},"int"),(0,c.kt)("td",{parentName:"tr",align:"left"},"how many samples are associated with a variant with the same position and alternate amino acid position")),(0,c.kt)("tr",{parentName:"tbody"},(0,c.kt)("td",{parentName:"tr",align:"left"},"qValue"),(0,c.kt)("td",{parentName:"tr",align:"center"},"double"),(0,c.kt)("td",{parentName:"tr",align:"left"})))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/9a946f68.3c05bfe1.js b/assets/js/9a946f68.3c05bfe1.js deleted file mode 100644 index 84769c75..00000000 --- a/assets/js/9a946f68.3c05bfe1.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5198],{3905:(a,e,t)=>{t.d(e,{Zo:()=>d,kt:()=>A});var n=t(67294);function c(a,e,t){return e in a?Object.defineProperty(a,e,{value:t,enumerable:!0,configurable:!0,writable:!0}):a[e]=t,a}function i(a,e){var t=Object.keys(a);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(a);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(a,e).enumerable}))),t.push.apply(t,n)}return t}function r(a){for(var e=1;e=0||(c[t]=a[t]);return c}(a,e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(a);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(a,t)&&(c[t]=a[t])}return c}var s=n.createContext({}),l=function(a){var e=n.useContext(s),t=e;return a&&(t="function"==typeof a?a(e):r(r({},e),a)),t},d=function(a){var e=l(a.components);return n.createElement(s.Provider,{value:e},a.children)},u="mdxType",m={inlineCode:"code",wrapper:function(a){var e=a.children;return n.createElement(n.Fragment,{},e)}},p=n.forwardRef((function(a,e){var t=a.components,c=a.mdxType,i=a.originalType,s=a.parentName,d=o(a,["components","mdxType","originalType","parentName"]),u=l(t),p=c,A=u["".concat(s,".").concat(p)]||u[p]||m[p]||i;return t?n.createElement(A,r(r({ref:e},d),{},{components:t})):n.createElement(A,r({ref:e},d))}));function A(a,e){var t=arguments,c=e&&e.mdxType;if("string"==typeof a||c){var i=t.length,r=new Array(i);r[0]=p;var o={};for(var s in e)hasOwnProperty.call(e,s)&&(o[s]=e[s]);o.originalType=a,o[u]="string"==typeof a?a:c,r[1]=o;for(var l=2;l{t.r(e),t.d(e,{contentTitle:()=>r,default:()=>u,frontMatter:()=>i,metadata:()=>o,toc:()=>s});var n=t(87462),c=(t(67294),t(3905));const i={title:"Cancer Hotspots"},r=void 0,o={unversionedId:"data-sources/cancer-hotspots",id:"data-sources/cancer-hotspots",title:"Cancer Hotspots",description:"Overview",source:"@site/docs/data-sources/cancer-hotspots.mdx",sourceDirName:"data-sources",slug:"/data-sources/cancer-hotspots",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cancer-hotspots",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/cancer-hotspots.mdx",tags:[],version:"current",frontMatter:{title:"Cancer Hotspots"},sidebar:"docs",previous:{title:"Amino Acid Conservation",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/amino-acid-conservation"},next:{title:"ClinGen",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Data extraction",id:"data-extraction",children:[{value:"Example",id:"example",children:[{value:"SNV",id:"snv",children:[],level:4},{value:"Indel",id:"indel",children:[],level:4}],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],l={toc:s},d="wrapper";function u(a){let{components:e,...t}=a;return(0,c.kt)(d,(0,n.Z)({},l,t,{components:e,mdxType:"MDXLayout"}),(0,c.kt)("h2",{id:"overview"},"Overview"),(0,c.kt)("p",null,"Cancer Hotspots, a resource for statistically significant mutations in cancer. It provides information about statistically significantly recurrent mutations identified in large scale cancer genomics data."),(0,c.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,c.kt)("div",{parentName:"div",className:"admonition-heading"},(0,c.kt)("h5",{parentName:"div"},(0,c.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,c.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,c.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,c.kt)("div",{parentName:"div",className:"admonition-content"},(0,c.kt)("p",{parentName:"div"},"Chang MT, Bhattarai TS, Schram AM, Bielski CM, Donoghue MTA, Jonsson P, Chakravarty D, Phillips S, Kandoth C, Penson A, Gorelick A, Shamu T, Patel S, Harris C, Gao J, Sumer SO, Kundra R, Razavi P, Li BT, Reales DN, Socci ND, Jayakumaran G, Zehir A, Benayed R, Arcila ME, Chandarlapaty S, Ladanyi M, Schultz N, Baselga J, Berger MF, Rosen N, Solit DB, Hyman DM, Taylor BS. Accelerating Discovery of Functional Mutant Alleles in Cancer. Cancer Discov. 2018 Feb;8(2):174-183. doi: 10.1158/2159-8290.CD-17-0321. Epub 2017 Dec 15. PMID: 29247016; PMCID: PMC5809279."),(0,c.kt)("p",{parentName:"div"},"Chang MT, Asthana S, Gao SP, Lee BH, Chapman JS, Kandoth C, Gao J, Socci ND, Solit DB, Olshen AB, Schultz N, Taylor BS. Identifying recurrent mutations in cancer reveals widespread lineage diversity and mutational specificity. Nat Biotechnol. 2016 Feb;34(2):155-63. doi: 10.1038/nbt.3391. Epub 2015 Nov 30. PMID: 26619011; PMCID: PMC4744099."))),(0,c.kt)("h2",{id:"data-extraction"},"Data extraction"),(0,c.kt)("p",null,"Illumina Connected Annotations currently parses SNV and indel tabs from hotspots_v2.xls file to extract the relevant content."),(0,c.kt)("h3",{id:"example"},"Example"),(0,c.kt)("h4",{id:"snv"},"SNV"),(0,c.kt)("pre",null,(0,c.kt)("code",{parentName:"pre",className:"language-scss"},'Hugo_Symbol Amino_Acid_Position log10_pvalue Mutation_Count Reference_Amino_Acid Total_Mutations_in_Gene Median_Allele_Freq_Rank Allele_Freq_Rank Variant_Amino_Acid Codon_Change Genomic_Position Detailed_Cancer_Types Organ_Types Tri-nucleotides Mutability mu_protein Total_Samples Analysis_Type qvalue tm qvalue_pancanIs_repeat seq length align100 pad12entropy pad24entropy pad36entropy TP reason n_MSK n_Retro judgement inNBT inOncokb ref qvaluect ct Samples\nNRAS 61 -1237.69143477067 422 Q:422 620 0.333333333333333 295|0.692307692307692:0.733333333333333:0.2:0.933333333333333:1:0.25:0.666666666666667:1:0.25:0.571428571428571:1:1:0.5:0.363636363636364:0.428571428571429:0.0833333333333333:1:1:1:1:0.5:1:0.125:0.363636363636364:0.173913043478261:0.25:1:0.8:0.153846153846154:0.857142857142857:0.5:0.5:0.5:1:0.272727272727273:0.214285714285714:1:0.5:1:1:0.2:0.333333333333333:0.6875:0.708333333333333:0.25:0.266666666666667:0.111111111111111:1:1:0.333333333333333:0.428571428571429:0.666666666666667:0.25:0.5:0.833333333333333:0.5:0.735294117647059:0.0476190476190476:0.1:0.133333333333333:0.230769230769231:0.25:1:0.5:0.294117647058824:0.217391304347826:0.46875:0.5:1:0.2:0.166666666666667:0.666666666666667:1:0.8:0.407407407407407:1:0.0212765957446809:0.285714285714286:0.0909090909090909:0.333333333333333:0.2:0.333333333333333:0.5:0.5:1:0.111111111111111:0.5:0.903846153846154:0.5:0.2:1:1:0.0909090909090909:0.4:0.428571428571429:0.0625:0.25:0.833333333333333:1:0.956521739130435:0.111111111111111:0.6:0.212765957446809:0.5:0.207547169811321:1:0.75:0.294117647058824:0.666666666666667:1:0.333333333333333:0.714285714285714:0.142857142857143:1:0.3:0.416666666666667:0.272727272727273:0.25:0.333333333333333:0.345454545454545:0.0952380952380952:0.166666666666667:0.111111111111111:0.454545454545455:0.0666666666666667:1:0.636363636363636:0.636363636363636:0.25:0.272727272727273:0.824324324324324:1:0.75:0.545454545454545:1:1:0.0769230769230769:0.363636363636364:0.290322580645161:0.333333333333333:0.179487179487179:1:0.0666666666666667:0.333333333333333:1:0.478260869565217:0.166666666666667:1:1:0.0276497695852535:0.0716845878136201:0.0263736263736264:0.933333333333333:1:0.5:1:1:0.8125:0.361788617886179:0.113761467889908:0.113761467889908:0.157894736842105:0.333333333333333:0.0555555555555556:0.0357142857142857:0.375:0.111111111111111:0.584415584415584:0.0350877192982456:0.751111111111111:0.761245674740484:0.164989939637827:0.196652719665272:0.135549872122762:0.172113289760349:0.0240963855421687:0.0620767494356659:0.142268041237113:0.147441457068517:0.147959183673469:0.038961038961039:0.686274509803922:0.0929054054054054:0.364787111622555:0.331306990881459:0.691449814126394:0.691449814126394:0.0769230769230769:0.347826086956522:0.117647058823529:0.148148148148148:0.05:0.290030211480363:0.680272108843537:0.188679245283019:0.0701754385964912:0.801526717557252:0.236842105263158:0.1953125:0.0539906103286385:0.015625:0.0390492359932088:0.00790513833992095:0.0597826086956522:0.136783733826248:0.362359550561798:0.0713719270420301:0.328621908127208:0.0657672849915683:0.320099255583127:0.075:0.433021806853583:0.524818401937046:0.524818401937046:0.259259259259259:0.483695652173913:0.0269360269360269:0.100486223662885:0.785507246376812:0.137870855148342:0.472340425531915:0.194331983805668:0.0830769230769231:0.418055555555556:0.546296296296296:0.247596153846154:0.52:0.39832285115304:0.601866251944012:0.234016887816647:0.214007782101167:0.153153153153153:0.137180700094607:0.0666666666666667:0.037037037037037:0.1:0.2:0.458333333333333:0.0588235294117647:0.111111111111111:0.333333333333333:0.181818181818182:0.473684210526316:0.5:0.2:0.136363636363636:0.0769230769230769:0.142857142857143:0.285714285714286:0.25:0.445714285714286:0.149377593360996:0.0227790432801822:0.182278481012658:0.540123456790123:0.021505376344086:0.541666666666667:0.00429184549356223:0.473684210526316:0.103508771929825:0.0930232558139535:0.391304347826087:0.072:0.0113636363636364:0.148837209302326:0.448051948051948:0.761038961038961:0.530373831775701:0.222857142857143:0.433862433862434:0.0810810810810811:0.0723327305605787:0.410714285714286:0.247910863509749:0.384615384615385:0.125:0.24:0.783582089552239:0.0646651270207852:0.445569620253165:0.754777070063694:0.165137614678899:0.10732538330494:0.0375:0.538461538461538:0.0981387478849408:0.029126213592233:0.0833333333333333:0.443514644351464:0.0917431192660551:0.03125:0.674418604651163:0.3125:0.375:0.314285714285714 R:204 cAa/cGa:203|Caa/Aaa:140|cAa/cTa:46|caA/caT:14|caA/caC:13|ggACaa/ggCAaa:2|cAa/cCa:2|Caa/Taa:1|CAa/AGa:1 1:115256529_252|1:115256530_143|1:115256528_27 skcm:787:186|thpa:486:43|mm:275:27|thpd:58:18|coadread:683:16|luad:2057:15|coad:712:13|mup:42:7|aml:198:6|blca:852:5|thap:33:5|read:149:5|rms:50:5|uec:339:5|nsgct:152:5|cll:283:4|ihch:104:4|lgsoc:17:3|sem:59:3|thhc:21:3|erms:8:3|lggnos:544:3|utuc:76:2|cup:135:2|thfo:5:2|sarcl:13:2|mfh:53:2|gbm:688:2|soc:468:2|stad:748:2|thym:125:2|es:229:1|npc:66:1|unk:146:1|panet:86:1|hnsc:643:1|armm:21:1|tmt:3:1|acrm:23:1|thyc:9:1|odg:36:1|paasc:8:1|hnmucm:11:1|blad:7:1|esca:556:1|mixed:3:1|chol:152:1|hcc:620:1|sarc:280:1|chrcc:88:1|aca:93:1 skin:974:187|thyroid:618:71|blood:890:37|bowel:1782:35|lung:2761:17|unk:357:11|softtissue:739:11|testis:217:9|bladder:958:8|cnsbrain:2270:6|ovaryfallopiantube:699:5|biliarytract:358:5|uterus:618:5|headandneck:988:3|thymus:162:3|esophagusstomach:1407:3|pancreas:1059:2|bone:297:1|liver:636:1|kidney:1304:1|adrenalgland:291:1 TTG|ACA|CTT|TCG|CCC|CCA 0.0120300464273379 0.0267810594223141 24592 "pancan,skin,thyroid,bowel,blood,lung,softtissue,testis,bladder,cnsbrain,biliarytract,ovaryfallopiantube,uterus,thymus,headandneck,esophagusstomach" 0 NRAS 61 0 FALSE NA 1 1.16795714944678 1.26187131041539 1.29838371117394 TRUE 165 257 RETAIN TRUE TRUE Q 0 skin skin:88|thyroid:54|blood:15|bowel:8|testis:5|biliarytract:4|bladder:4|lung:4|ovaryfallopiantube:4|softtissue:3|unk:3|uterus:3|cnsbrain:2|esophagusstomach:2|headandneck:2|bone:1|pancreas:1|thymus:1\nNRAS 61 -1237.69143477067 422 Q:422 620 0.333333333333333 295|0.692307692307692:0.733333333333333:0.2:0.933333333333333:1:0.25:0.666666666666667:1:0.25:0.571428571428571:1:1:0.5:0.363636363636364:0.428571428571429:0.0833333333333333:1:1:1:1:0.5:1:0.125:0.363636363636364:0.173913043478261:0.25:1:0.8:0.153846153846154:0.857142857142857:0.5:0.5:0.5:1:0.272727272727273:0.214285714285714:1:0.5:1:1:0.2:0.333333333333333:0.6875:0.708333333333333:0.25:0.266666666666667:0.111111111111111:1:1:0.333333333333333:0.428571428571429:0.666666666666667:0.25:0.5:0.833333333333333:0.5:0.735294117647059:0.0476190476190476:0.1:0.133333333333333:0.230769230769231:0.25:1:0.5:0.294117647058824:0.217391304347826:0.46875:0.5:1:0.2:0.166666666666667:0.666666666666667:1:0.8:0.407407407407407:1:0.0212765957446809:0.285714285714286:0.0909090909090909:0.333333333333333:0.2:0.333333333333333:0.5:0.5:1:0.111111111111111:0.5:0.903846153846154:0.5:0.2:1:1:0.0909090909090909:0.4:0.428571428571429:0.0625:0.25:0.833333333333333:1:0.956521739130435:0.111111111111111:0.6:0.212765957446809:0.5:0.207547169811321:1:0.75:0.294117647058824:0.666666666666667:1:0.333333333333333:0.714285714285714:0.142857142857143:1:0.3:0.416666666666667:0.272727272727273:0.25:0.333333333333333:0.345454545454545:0.0952380952380952:0.166666666666667:0.111111111111111:0.454545454545455:0.0666666666666667:1:0.636363636363636:0.636363636363636:0.25:0.272727272727273:0.824324324324324:1:0.75:0.545454545454545:1:1:0.0769230769230769:0.363636363636364:0.290322580645161:0.333333333333333:0.179487179487179:1:0.0666666666666667:0.333333333333333:1:0.478260869565217:0.166666666666667:1:1:0.0276497695852535:0.0716845878136201:0.0263736263736264:0.933333333333333:1:0.5:1:1:0.8125:0.361788617886179:0.113761467889908:0.113761467889908:0.157894736842105:0.333333333333333:0.0555555555555556:0.0357142857142857:0.375:0.111111111111111:0.584415584415584:0.0350877192982456:0.751111111111111:0.761245674740484:0.164989939637827:0.196652719665272:0.135549872122762:0.172113289760349:0.0240963855421687:0.0620767494356659:0.142268041237113:0.147441457068517:0.147959183673469:0.038961038961039:0.686274509803922:0.0929054054054054:0.364787111622555:0.331306990881459:0.691449814126394:0.691449814126394:0.0769230769230769:0.347826086956522:0.117647058823529:0.148148148148148:0.05:0.290030211480363:0.680272108843537:0.188679245283019:0.0701754385964912:0.801526717557252:0.236842105263158:0.1953125:0.0539906103286385:0.015625:0.0390492359932088:0.00790513833992095:0.0597826086956522:0.136783733826248:0.362359550561798:0.0713719270420301:0.328621908127208:0.0657672849915683:0.320099255583127:0.075:0.433021806853583:0.524818401937046:0.524818401937046:0.259259259259259:0.483695652173913:0.0269360269360269:0.100486223662885:0.785507246376812:0.137870855148342:0.472340425531915:0.194331983805668:0.0830769230769231:0.418055555555556:0.546296296296296:0.247596153846154:0.52:0.39832285115304:0.601866251944012:0.234016887816647:0.214007782101167:0.153153153153153:0.137180700094607:0.0666666666666667:0.037037037037037:0.1:0.2:0.458333333333333:0.0588235294117647:0.111111111111111:0.333333333333333:0.181818181818182:0.473684210526316:0.5:0.2:0.136363636363636:0.0769230769230769:0.142857142857143:0.285714285714286:0.25:0.445714285714286:0.149377593360996:0.0227790432801822:0.182278481012658:0.540123456790123:0.021505376344086:0.541666666666667:0.00429184549356223:0.473684210526316:0.103508771929825:0.0930232558139535:0.391304347826087:0.072:0.0113636363636364:0.148837209302326:0.448051948051948:0.761038961038961:0.530373831775701:0.222857142857143:0.433862433862434:0.0810810810810811:0.0723327305605787:0.410714285714286:0.247910863509749:0.384615384615385:0.125:0.24:0.783582089552239:0.0646651270207852:0.445569620253165:0.754777070063694:0.165137614678899:0.10732538330494:0.0375:0.538461538461538:0.0981387478849408:0.029126213592233:0.0833333333333333:0.443514644351464:0.0917431192660551:0.03125:0.674418604651163:0.3125:0.375:0.314285714285714 K:142 cAa/cGa:203|Caa/Aaa:140|cAa/cTa:46|caA/caT:14|caA/caC:13|ggACaa/ggCAaa:2|cAa/cCa:2|Caa/Taa:1|CAa/AGa:1 1:115256529_252|1:115256530_143|1:115256528_27 skcm:787:186|thpa:486:43|mm:275:27|thpd:58:18|coadread:683:16|luad:2057:15|coad:712:13|mup:42:7|aml:198:6|blca:852:5|thap:33:5|read:149:5|rms:50:5|uec:339:5|nsgct:152:5|cll:283:4|ihch:104:4|lgsoc:17:3|sem:59:3|thhc:21:3|erms:8:3|lggnos:544:3|utuc:76:2|cup:135:2|thfo:5:2|sarcl:13:2|mfh:53:2|gbm:688:2|soc:468:2|stad:748:2|thym:125:2|es:229:1|npc:66:1|unk:146:1|panet:86:1|hnsc:643:1|armm:21:1|tmt:3:1|acrm:23:1|thyc:9:1|odg:36:1|paasc:8:1|hnmucm:11:1|blad:7:1|esca:556:1|mixed:3:1|chol:152:1|hcc:620:1|sarc:280:1|chrcc:88:1|aca:93:1 skin:974:187|thyroid:618:71|blood:890:37|bowel:1782:35|lung:2761:17|unk:357:11|softtissue:739:11|testis:217:9|bladder:958:8|cnsbrain:2270:6|ovaryfallopiantube:699:5|biliarytract:358:5|uterus:618:5|headandneck:988:3|thymus:162:3|esophagusstomach:1407:3|pancreas:1059:2|bone:297:1|liver:636:1|kidney:1304:1|adrenalgland:291:1 TTG|ACA|CTT|TCG|CCC|CCA 0.0120300464273379 0.0267810594223141 24592 "pancan,skin,thyroid,bowel,blood,lung,softtissue,testis,bladder,cnsbrain,biliarytract,ovaryfallopiantube,uterus,thymus,headandneck,esophagusstomach" 0 NRAS 61 0 FALSE NA 1 1.16795714944678 1.26187131041539 1.29838371117394 TRUE 165 257 RETAIN TRUE TRUE Q 0 skin skin:62|bowel:18|thyroid:17|blood:12|softtissue:6|lung:5|unk:5|bladder:3|cnsbrain:2|thymus:2|adrenalgland:1|biliarytract:1|esophagusstomach:1|headandneck:1|kidney:1|liver:1|ovaryfallopiantube:1|pancreas:1|testis:1|uterus:1\nNRAS 61 -1237.69143477067 422 Q:422 620 0.333333333333333 295|0.692307692307692:0.733333333333333:0.2:0.933333333333333:1:0.25:0.666666666666667:1:0.25:0.571428571428571:1:1:0.5:0.363636363636364:0.428571428571429:0.0833333333333333:1:1:1:1:0.5:1:0.125:0.363636363636364:0.173913043478261:0.25:1:0.8:0.153846153846154:0.857142857142857:0.5:0.5:0.5:1:0.272727272727273:0.214285714285714:1:0.5:1:1:0.2:0.333333333333333:0.6875:0.708333333333333:0.25:0.266666666666667:0.111111111111111:1:1:0.333333333333333:0.428571428571429:0.666666666666667:0.25:0.5:0.833333333333333:0.5:0.735294117647059:0.0476190476190476:0.1:0.133333333333333:0.230769230769231:0.25:1:0.5:0.294117647058824:0.217391304347826:0.46875:0.5:1:0.2:0.166666666666667:0.666666666666667:1:0.8:0.407407407407407:1:0.0212765957446809:0.285714285714286:0.0909090909090909:0.333333333333333:0.2:0.333333333333333:0.5:0.5:1:0.111111111111111:0.5:0.903846153846154:0.5:0.2:1:1:0.0909090909090909:0.4:0.428571428571429:0.0625:0.25:0.833333333333333:1:0.956521739130435:0.111111111111111:0.6:0.212765957446809:0.5:0.207547169811321:1:0.75:0.294117647058824:0.666666666666667:1:0.333333333333333:0.714285714285714:0.142857142857143:1:0.3:0.416666666666667:0.272727272727273:0.25:0.333333333333333:0.345454545454545:0.0952380952380952:0.166666666666667:0.111111111111111:0.454545454545455:0.0666666666666667:1:0.636363636363636:0.636363636363636:0.25:0.272727272727273:0.824324324324324:1:0.75:0.545454545454545:1:1:0.0769230769230769:0.363636363636364:0.290322580645161:0.333333333333333:0.179487179487179:1:0.0666666666666667:0.333333333333333:1:0.478260869565217:0.166666666666667:1:1:0.0276497695852535:0.0716845878136201:0.0263736263736264:0.933333333333333:1:0.5:1:1:0.8125:0.361788617886179:0.113761467889908:0.113761467889908:0.157894736842105:0.333333333333333:0.0555555555555556:0.0357142857142857:0.375:0.111111111111111:0.584415584415584:0.0350877192982456:0.751111111111111:0.761245674740484:0.164989939637827:0.196652719665272:0.135549872122762:0.172113289760349:0.0240963855421687:0.0620767494356659:0.142268041237113:0.147441457068517:0.147959183673469:0.038961038961039:0.686274509803922:0.0929054054054054:0.364787111622555:0.331306990881459:0.691449814126394:0.691449814126394:0.0769230769230769:0.347826086956522:0.117647058823529:0.148148148148148:0.05:0.290030211480363:0.680272108843537:0.188679245283019:0.0701754385964912:0.801526717557252:0.236842105263158:0.1953125:0.0539906103286385:0.015625:0.0390492359932088:0.00790513833992095:0.0597826086956522:0.136783733826248:0.362359550561798:0.0713719270420301:0.328621908127208:0.0657672849915683:0.320099255583127:0.075:0.433021806853583:0.524818401937046:0.524818401937046:0.259259259259259:0.483695652173913:0.0269360269360269:0.100486223662885:0.785507246376812:0.137870855148342:0.472340425531915:0.194331983805668:0.0830769230769231:0.418055555555556:0.546296296296296:0.247596153846154:0.52:0.39832285115304:0.601866251944012:0.234016887816647:0.214007782101167:0.153153153153153:0.137180700094607:0.0666666666666667:0.037037037037037:0.1:0.2:0.458333333333333:0.0588235294117647:0.111111111111111:0.333333333333333:0.181818181818182:0.473684210526316:0.5:0.2:0.136363636363636:0.0769230769230769:0.142857142857143:0.285714285714286:0.25:0.445714285714286:0.149377593360996:0.0227790432801822:0.182278481012658:0.540123456790123:0.021505376344086:0.541666666666667:0.00429184549356223:0.473684210526316:0.103508771929825:0.0930232558139535:0.391304347826087:0.072:0.0113636363636364:0.148837209302326:0.448051948051948:0.761038961038961:0.530373831775701:0.222857142857143:0.433862433862434:0.0810810810810811:0.0723327305605787:0.410714285714286:0.247910863509749:0.384615384615385:0.125:0.24:0.783582089552239:0.0646651270207852:0.445569620253165:0.754777070063694:0.165137614678899:0.10732538330494:0.0375:0.538461538461538:0.0981387478849408:0.029126213592233:0.0833333333333333:0.443514644351464:0.0917431192660551:0.03125:0.674418604651163:0.3125:0.375:0.314285714285714 L:46 cAa/cGa:203|Caa/Aaa:140|cAa/cTa:46|caA/caT:14|caA/caC:13|ggACaa/ggCAaa:2|cAa/cCa:2|Caa/Taa:1|CAa/AGa:1 1:115256529_252|1:115256530_143|1:115256528_27 skcm:787:186|thpa:486:43|mm:275:27|thpd:58:18|coadread:683:16|luad:2057:15|coad:712:13|mup:42:7|aml:198:6|blca:852:5|thap:33:5|read:149:5|rms:50:5|uec:339:5|nsgct:152:5|cll:283:4|ihch:104:4|lgsoc:17:3|sem:59:3|thhc:21:3|erms:8:3|lggnos:544:3|utuc:76:2|cup:135:2|thfo:5:2|sarcl:13:2|mfh:53:2|gbm:688:2|soc:468:2|stad:748:2|thym:125:2|es:229:1|npc:66:1|unk:146:1|panet:86:1|hnsc:643:1|armm:21:1|tmt:3:1|acrm:23:1|thyc:9:1|odg:36:1|paasc:8:1|hnmucm:11:1|blad:7:1|esca:556:1|mixed:3:1|chol:152:1|hcc:620:1|sarc:280:1|chrcc:88:1|aca:93:1 skin:974:187|thyroid:618:71|blood:890:37|bowel:1782:35|lung:2761:17|unk:357:11|softtissue:739:11|testis:217:9|bladder:958:8|cnsbrain:2270:6|ovaryfallopiantube:699:5|biliarytract:358:5|uterus:618:5|headandneck:988:3|thymus:162:3|esophagusstomach:1407:3|pancreas:1059:2|bone:297:1|liver:636:1|kidney:1304:1|adrenalgland:291:1 TTG|ACA|CTT|TCG|CCC|CCA 0.0120300464273379 0.0267810594223141 24592 "pancan,skin,thyroid,bowel,blood,lung,softtissue,testis,bladder,cnsbrain,biliarytract,ovaryfallopiantube,uterus,thymus,headandneck,esophagusstomach" 0 NRAS 61 0 FALSE NA 1 1.16795714944678 1.26187131041539 1.29838371117394 TRUE 165 257 RETAIN TRUE TRUE Q 0 skin skin:24|bowel:7|lung:6|blood:2|cnsbrain:2|unk:2|bladder:1|softtissue:1|uterus:1\nNRAS 61 -1237.69143477067 422 Q:422 620 0.333333333333333 295|0.692307692307692:0.733333333333333:0.2:0.933333333333333:1:0.25:0.666666666666667:1:0.25:0.571428571428571:1:1:0.5:0.363636363636364:0.428571428571429:0.0833333333333333:1:1:1:1:0.5:1:0.125:0.363636363636364:0.173913043478261:0.25:1:0.8:0.153846153846154:0.857142857142857:0.5:0.5:0.5:1:0.272727272727273:0.214285714285714:1:0.5:1:1:0.2:0.333333333333333:0.6875:0.708333333333333:0.25:0.266666666666667:0.111111111111111:1:1:0.333333333333333:0.428571428571429:0.666666666666667:0.25:0.5:0.833333333333333:0.5:0.735294117647059:0.0476190476190476:0.1:0.133333333333333:0.230769230769231:0.25:1:0.5:0.294117647058824:0.217391304347826:0.46875:0.5:1:0.2:0.166666666666667:0.666666666666667:1:0.8:0.407407407407407:1:0.0212765957446809:0.285714285714286:0.0909090909090909:0.333333333333333:0.2:0.333333333333333:0.5:0.5:1:0.111111111111111:0.5:0.903846153846154:0.5:0.2:1:1:0.0909090909090909:0.4:0.428571428571429:0.0625:0.25:0.833333333333333:1:0.956521739130435:0.111111111111111:0.6:0.212765957446809:0.5:0.207547169811321:1:0.75:0.294117647058824:0.666666666666667:1:0.333333333333333:0.714285714285714:0.142857142857143:1:0.3:0.416666666666667:0.272727272727273:0.25:0.333333333333333:0.345454545454545:0.0952380952380952:0.166666666666667:0.111111111111111:0.454545454545455:0.0666666666666667:1:0.636363636363636:0.636363636363636:0.25:0.272727272727273:0.824324324324324:1:0.75:0.545454545454545:1:1:0.0769230769230769:0.363636363636364:0.290322580645161:0.333333333333333:0.179487179487179:1:0.0666666666666667:0.333333333333333:1:0.478260869565217:0.166666666666667:1:1:0.0276497695852535:0.0716845878136201:0.0263736263736264:0.933333333333333:1:0.5:1:1:0.8125:0.361788617886179:0.113761467889908:0.113761467889908:0.157894736842105:0.333333333333333:0.0555555555555556:0.0357142857142857:0.375:0.111111111111111:0.584415584415584:0.0350877192982456:0.751111111111111:0.761245674740484:0.164989939637827:0.196652719665272:0.135549872122762:0.172113289760349:0.0240963855421687:0.0620767494356659:0.142268041237113:0.147441457068517:0.147959183673469:0.038961038961039:0.686274509803922:0.0929054054054054:0.364787111622555:0.331306990881459:0.691449814126394:0.691449814126394:0.0769230769230769:0.347826086956522:0.117647058823529:0.148148148148148:0.05:0.290030211480363:0.680272108843537:0.188679245283019:0.0701754385964912:0.801526717557252:0.236842105263158:0.1953125:0.0539906103286385:0.015625:0.0390492359932088:0.00790513833992095:0.0597826086956522:0.136783733826248:0.362359550561798:0.0713719270420301:0.328621908127208:0.0657672849915683:0.320099255583127:0.075:0.433021806853583:0.524818401937046:0.524818401937046:0.259259259259259:0.483695652173913:0.0269360269360269:0.100486223662885:0.785507246376812:0.137870855148342:0.472340425531915:0.194331983805668:0.0830769230769231:0.418055555555556:0.546296296296296:0.247596153846154:0.52:0.39832285115304:0.601866251944012:0.234016887816647:0.214007782101167:0.153153153153153:0.137180700094607:0.0666666666666667:0.037037037037037:0.1:0.2:0.458333333333333:0.0588235294117647:0.111111111111111:0.333333333333333:0.181818181818182:0.473684210526316:0.5:0.2:0.136363636363636:0.0769230769230769:0.142857142857143:0.285714285714286:0.25:0.445714285714286:0.149377593360996:0.0227790432801822:0.182278481012658:0.540123456790123:0.021505376344086:0.541666666666667:0.00429184549356223:0.473684210526316:0.103508771929825:0.0930232558139535:0.391304347826087:0.072:0.0113636363636364:0.148837209302326:0.448051948051948:0.761038961038961:0.530373831775701:0.222857142857143:0.433862433862434:0.0810810810810811:0.0723327305605787:0.410714285714286:0.247910863509749:0.384615384615385:0.125:0.24:0.783582089552239:0.0646651270207852:0.445569620253165:0.754777070063694:0.165137614678899:0.10732538330494:0.0375:0.538461538461538:0.0981387478849408:0.029126213592233:0.0833333333333333:0.443514644351464:0.0917431192660551:0.03125:0.674418604651163:0.3125:0.375:0.314285714285714 H:27 cAa/cGa:203|Caa/Aaa:140|cAa/cTa:46|caA/caT:14|caA/caC:13|ggACaa/ggCAaa:2|cAa/cCa:2|Caa/Taa:1|CAa/AGa:1 1:115256529_252|1:115256530_143|1:115256528_27 skcm:787:186|thpa:486:43|mm:275:27|thpd:58:18|coadread:683:16|luad:2057:15|coad:712:13|mup:42:7|aml:198:6|blca:852:5|thap:33:5|read:149:5|rms:50:5|uec:339:5|nsgct:152:5|cll:283:4|ihch:104:4|lgsoc:17:3|sem:59:3|thhc:21:3|erms:8:3|lggnos:544:3|utuc:76:2|cup:135:2|thfo:5:2|sarcl:13:2|mfh:53:2|gbm:688:2|soc:468:2|stad:748:2|thym:125:2|es:229:1|npc:66:1|unk:146:1|panet:86:1|hnsc:643:1|armm:21:1|tmt:3:1|acrm:23:1|thyc:9:1|odg:36:1|paasc:8:1|hnmucm:11:1|blad:7:1|esca:556:1|mixed:3:1|chol:152:1|hcc:620:1|sarc:280:1|chrcc:88:1|aca:93:1 skin:974:187|thyroid:618:71|blood:890:37|bowel:1782:35|lung:2761:17|unk:357:11|softtissue:739:11|testis:217:9|bladder:958:8|cnsbrain:2270:6|ovaryfallopiantube:699:5|biliarytract:358:5|uterus:618:5|headandneck:988:3|thymus:162:3|esophagusstomach:1407:3|pancreas:1059:2|bone:297:1|liver:636:1|kidney:1304:1|adrenalgland:291:1 TTG|ACA|CTT|TCG|CCC|CCA 0.0120300464273379 0.0267810594223141 24592 "pancan,skin,thyroid,bowel,blood,lung,softtissue,testis,bladder,cnsbrain,biliarytract,ovaryfallopiantube,uterus,thymus,headandneck,esophagusstomach" 0 NRAS 61 0 FALSE NA 1 1.16795714944678 1.26187131041539 1.29838371117394 TRUE 165 257 RETAIN TRUE TRUE Q 0 skin skin:12|blood:7|bowel:2|lung:2|testis:2|softtissue:1|unk:1\n')),(0,c.kt)("h4",{id:"indel"},"Indel"),(0,c.kt)("pre",null,(0,c.kt)("code",{parentName:"pre",className:"language-scss"},"Hugo_Symbol Amino_Acid_Position log10_pvalue Mutation_Count Reference_Amino_Acid Total_Mutations_in_Gene Median_Allele_Freq_Rank Allele_Freq_Rank SNP_ID Variant_Amino_Acid Codon_Change Genomic_Position Detailed_Cancer_Types Organ_Types Tri-nucleotides Mutability mu_protein ccf Total_Samples indel_size qvalue tm Is_repeat seq length align100 pad12entropy pad24entropy pad36entropy TP reason n_MSK n_Retro judgement inNBT inOncokb Samples\nSMARCA4 546 -7.75235638169585 5 QK:5 101 NA NA :NA K546del:5 cAGAag/cag:5 19:11106926_5 lgg:536:4|dlbcl:246:1 cnsbrain:2283:4|lymph:366:1 NA 0.0573226243518208 0.0473351872460284 NA 24592 1 0.000230672905611517 SMARCA4 546 FALSE NA NA 1 0.91489630957268 1.2950060272429 1.33965330506364 FALSE LOCAL_ENTROPY 1 4 RETAIN FALSE FALSE cnsbrain:4|lymph:1\nCDKN2A 27-42 -6.82111516846557 12 VRALLEA:4|LEAGALP:3|ALPN:1|EV:1|GA:1|PNAPN:1|RALLEA:1 219 NA NA :NA V28_E33del:4 gTGCGGGCGCTGCTGGAGGcg/gcg:4|cTGGAGGCGGGGGCGCTGCcc/ccc:3|GGGGCG/-:1|gCGCTGCCCAac/gac:1|gAGGtg/gtg:1|CGGGCGCTGCTGGAGGCG/-:1|ccCAACGCACCGAAt/cct:1 9:21974727_4|9:21974715_3|9:21974745_1|9:21974725_1|9:21974719_1|9:21974712_1|9:21974702_1 luad:2071:3|esca:556:2|blca:852:1|skcm:192:1|icemu:1:1|paad:932:1|mel:595:1|stad:748:1|hnsc:650:1 esophagusstomach:1413:3|lung:2767:3|skin:974:2|bladder:955:1|cervix:234:1|pancreas:1059:1|headandneck:988:1 NA 0.0573226243518208 0.0473351872460284 NA 24592 15 8.77193090544841e-05 CDKN2A 27-42 FALSE NA NA 1 0.857780912379927 1.13008762297022 1.1577633500238 FALSE LOCAL_ENTROPY 6 6 RETAIN FALSE FALSE cervix:1|esophagusstomach:1|lung:1|pancreas:1\nCDKN2A 27-42 -6.82111516846557 12 VRALLEA:4|LEAGALP:3|ALPN:1|EV:1|GA:1|PNAPN:1|RALLEA:1 219 NA NA :NA L32_L37del:3 gTGCGGGCGCTGCTGGAGGcg/gcg:4|cTGGAGGCGGGGGCGCTGCcc/ccc:3|GGGGCG/-:1|gCGCTGCCCAac/gac:1|gAGGtg/gtg:1|CGGGCGCTGCTGGAGGCG/-:1|ccCAACGCACCGAAt/cct:1 9:21974727_4|9:21974715_3|9:21974745_1|9:21974725_1|9:21974719_1|9:21974712_1|9:21974702_1 luad:2071:3|esca:556:2|blca:852:1|skcm:192:1|icemu:1:1|paad:932:1|mel:595:1|stad:748:1|hnsc:650:1 esophagusstomach:1413:3|lung:2767:3|skin:974:2|bladder:955:1|cervix:234:1|pancreas:1059:1|headandneck:988:1 NA 0.0573226243518208 0.0473351872460284 NA 24592 15 8.77193090544841e-05 CDKN2A 27-42 FALSE NA NA 1 0.857780912379927 1.13008762297022 1.1577633500238 FALSE LOCAL_ENTROPY 6 6 RETAIN FALSE FALSE skin:2|esophagusstomach:1\nCDKN2A 27-42 -6.82111516846557 12 VRALLEA:4|LEAGALP:3|ALPN:1|EV:1|GA:1|PNAPN:1|RALLEA:1 219 NA NA :NA A36_N39delinsD:1 gTGCGGGCGCTGCTGGAGGcg/gcg:4|cTGGAGGCGGGGGCGCTGCcc/ccc:3|GGGGCG/-:1|gCGCTGCCCAac/gac:1|gAGGtg/gtg:1|CGGGCGCTGCTGGAGGCG/-:1|ccCAACGCACCGAAt/cct:1 9:21974727_4|9:21974715_3|9:21974745_1|9:21974725_1|9:21974719_1|9:21974712_1|9:21974702_1 luad:2071:3|esca:556:2|blca:852:1|skcm:192:1|icemu:1:1|paad:932:1|mel:595:1|stad:748:1|hnsc:650:1 esophagusstomach:1413:3|lung:2767:3|skin:974:2|bladder:955:1|cervix:234:1|pancreas:1059:1|headandneck:988:1 NA 0.0573226243518208 0.0473351872460284 NA 24592 15 8.77193090544841e-05 CDKN2A 27-42 FALSE NA NA 0.857780912379927 1.13008762297022 1.1577633500238 FALSE LOCAL_ENTROPY 6 6 RETAIN FALSE FALSE lung:1\n")),(0,c.kt)("h3",{id:"parsing"},"Parsing"),(0,c.kt)("p",null,"From the file, we're mainly interested in the following columns:"),(0,c.kt)("ul",null,(0,c.kt)("li",{parentName:"ul"},(0,c.kt)("inlineCode",{parentName:"li"},"Hugo_Symbol")),(0,c.kt)("li",{parentName:"ul"},(0,c.kt)("inlineCode",{parentName:"li"},"Amino_Acid_Position")),(0,c.kt)("li",{parentName:"ul"},(0,c.kt)("inlineCode",{parentName:"li"},"Mutation_Count")),(0,c.kt)("li",{parentName:"ul"},(0,c.kt)("inlineCode",{parentName:"li"},"Reference_Amino_Acid")),(0,c.kt)("li",{parentName:"ul"},(0,c.kt)("inlineCode",{parentName:"li"},"Variant_Amino_Acid")),(0,c.kt)("li",{parentName:"ul"},(0,c.kt)("inlineCode",{parentName:"li"},"qvalue"))),(0,c.kt)("p",null,"We map the gene symbol onto the canonical transcripts (RefSeq & Ensembl) for that gene. For SNVs, we obtain position, ref and alt amino acid from source file and generate substitution notation. For indels, we get protein change notation from ",(0,c.kt)("inlineCode",{parentName:"p"},"Reference_Amino_Acid")," column.\nThen we match each entry using these notations."),(0,c.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,c.kt)("div",{parentName:"div",className:"admonition-heading"},(0,c.kt)("h5",{parentName:"div"},(0,c.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,c.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,c.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,c.kt)("div",{parentName:"div",className:"admonition-content"},(0,c.kt)("p",{parentName:"div"},"We currently skip all variants labeled as splice from the source"))),(0,c.kt)("h2",{id:"json-output"},"JSON Output"),(0,c.kt)("p",null,"The data source will be captured under the cancerHotspots key in the transcript section."),(0,c.kt)("pre",null,(0,c.kt)("code",{parentName:"pre",className:"language-json",metastring:"{13-18}","{13-18}":!0},'{\n "transcript":"NM_002524.5",\n "source":"RefSeq",\n "bioType":"mRNA",\n "aminoAcids":"Q/K",\n "proteinPos":"61",\n "geneId":"4893",\n "hgnc":"NRAS",\n "hgvsc":"NM_002524.5:c.181C>A",\n "hgvsp":"NP_002515.1:p.(Gln61Lys)",\n "isCanonical":true,\n "proteinId":"NP_002515.1",\n "cancerHotspots":{\n "residue":"Q61",\n "numSamples":422,\n "numAltAminoAcidSamples":142,\n "qValue":0\n }\n}\n')),(0,c.kt)("table",null,(0,c.kt)("thead",{parentName:"table"},(0,c.kt)("tr",{parentName:"thead"},(0,c.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,c.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,c.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,c.kt)("tbody",{parentName:"table"},(0,c.kt)("tr",{parentName:"tbody"},(0,c.kt)("td",{parentName:"tr",align:"left"},"residue"),(0,c.kt)("td",{parentName:"tr",align:"center"},"string"),(0,c.kt)("td",{parentName:"tr",align:"left"})),(0,c.kt)("tr",{parentName:"tbody"},(0,c.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,c.kt)("td",{parentName:"tr",align:"center"},"int"),(0,c.kt)("td",{parentName:"tr",align:"left"},"how many samples are associated with a variant at the same amino acid position")),(0,c.kt)("tr",{parentName:"tbody"},(0,c.kt)("td",{parentName:"tr",align:"left"},"numAltAminoAcidSamples"),(0,c.kt)("td",{parentName:"tr",align:"center"},"int"),(0,c.kt)("td",{parentName:"tr",align:"left"},"how many samples are associated with a variant with the same position and alternate amino acid position")),(0,c.kt)("tr",{parentName:"tbody"},(0,c.kt)("td",{parentName:"tr",align:"left"},"qValue"),(0,c.kt)("td",{parentName:"tr",align:"center"},"double"),(0,c.kt)("td",{parentName:"tr",align:"left"})))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/9b81ec20.8d4ae0bc.js b/assets/js/9b81ec20.8d4ae0bc.js deleted file mode 100644 index 4e04283a..00000000 --- a/assets/js/9b81ec20.8d4ae0bc.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2883],{3905:(t,e,n)=>{n.d(e,{Zo:()=>c,kt:()=>d});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function o(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var i=r.createContext({}),m=function(t){var e=r.useContext(i),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},c=function(t){var e=m(t.components);return r.createElement(i.Provider,{value:e},t.children)},s="mdxType",f={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},u=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,l=t.originalType,i=t.parentName,c=p(t,["components","mdxType","originalType","parentName"]),s=m(n),u=a,d=s["".concat(i,".").concat(u)]||s[u]||f[u]||l;return n?r.createElement(d,o(o({ref:e},c),{},{components:n})):r.createElement(d,o({ref:e},c))}));function d(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var l=n.length,o=new Array(l);o[0]=u;var p={};for(var i in e)hasOwnProperty.call(e,i)&&(p[i]=e[i]);p.originalType=t,p[s]="string"==typeof t?t:a,o[1]=p;for(var m=2;m{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>s,frontMatter:()=>l,metadata:()=>p,toc:()=>i});var r=n(87462),a=(n(67294),n(3905));const l={},o=void 0,p={unversionedId:"data-sources/1000Genomes-snv-json",id:"version-3.14/data-sources/1000Genomes-snv-json",title:"1000Genomes-snv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/1000Genomes-snv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-snv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/1000Genomes-snv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/1000Genomes-snv-json.md",tags:[],version:"3.14",frontMatter:{}},i=[],m={toc:i},c="wrapper";function s(t){let{components:e,...n}=t;return(0,a.kt)(c,(0,r.Z)({},m,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":{\n "allAf":0.200879,\n "afrAf":0.210287,\n "amrAf":0.139769,\n "easAf":0.275794,\n "eurAf":0.181909,\n "sasAf":0.173824,\n "allAn":5008,\n "afrAn":1322,\n "amrAn":694,\n "easAn":1008,\n "eurAn":1006,\n "sasAn":978,\n "allAc":1006,\n "afrAc":278,\n "amrAc":97,\n "easAc":278,\n "eurAc":183,\n "sasAc":170\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for all populations. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"allAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for all populations. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"allAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for all populations. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"afrAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the African super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"afrAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the African super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"afrAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the African super population. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"amrAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"amrAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the Ad Mixed American super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"amrAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the Ad Mixed American super population. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"easAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"easAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the East Asian super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"easAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the East Asian super population. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"eurAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the European super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"eurAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the European super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"eurAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the European super population. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"sasAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"sasAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the South Asian super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"sasAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the South Asian super population. Non-zero integer.")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/9c9c1436.4617ec66.js b/assets/js/9c9c1436.4617ec66.js deleted file mode 100644 index 61fff27a..00000000 --- a/assets/js/9c9c1436.4617ec66.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7120,1865],{3905:(e,n,t)=>{t.d(n,{Zo:()=>c,kt:()=>g});var a=t(67294);function i(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function r(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function l(e){for(var n=1;n=0||(i[t]=e[t]);return i}(e,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(i[t]=e[t])}return i}var o=a.createContext({}),p=function(e){var n=a.useContext(o),t=n;return e&&(t="function"==typeof e?e(n):l(l({},n),e)),t},c=function(e){var n=p(e.components);return a.createElement(o.Provider,{value:n},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},u=a.forwardRef((function(e,n){var t=e.components,i=e.mdxType,r=e.originalType,o=e.parentName,c=s(e,["components","mdxType","originalType","parentName"]),m=p(t),u=i,g=m["".concat(o,".").concat(u)]||m[u]||d[u]||r;return t?a.createElement(g,l(l({ref:n},c),{},{components:t})):a.createElement(g,l({ref:n},c))}));function g(e,n){var t=arguments,i=n&&n.mdxType;if("string"==typeof e||i){var r=t.length,l=new Array(r);l[0]=u;var s={};for(var o in n)hasOwnProperty.call(n,o)&&(s[o]=n[o]);s.originalType=e,s[m]="string"==typeof e?e:i,l[1]=s;for(var p=2;p{t.r(n),t.d(n,{contentTitle:()=>l,default:()=>m,frontMatter:()=>r,metadata:()=>s,toc:()=>o});var a=t(87462),i=(t(67294),t(3905));const r={},l=void 0,s={unversionedId:"data-sources/clinvar-json",id:"version-3.18/data-sources/clinvar-json",title:"clinvar-json",description:"small variants:",source:"@site/versioned_docs/version-3.18/data-sources/clinvar-json.md",sourceDirName:"data-sources",slug:"/data-sources/clinvar-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/clinvar-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/clinvar-json.md",tags:[],version:"3.18",frontMatter:{}},o=[],p={toc:o},c="wrapper";function m(e){let{components:n,...t}=e;return(0,i.kt)(c,(0,a.Z)({},p,t,{components:n,mdxType:"MDXLayout"}),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"small variants:")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "id":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "significance":[\n "benign"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "lastUpdatedDate":"2020-03-01",\n "isAlleleSpecific":true\n },\n {\n "id":"RCV000030258.4",\n "variationId":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "alleleOrigins":[\n "germline"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "phenotypes":[\n "Lynch syndrome"\n ],\n "medGenIds":[\n "C1333990"\n ],\n "omimIds":[\n "120435"\n ],\n "significance":[\n "benign"\n ],\n "lastUpdatedDate":"2017-05-01",\n "isAlleleSpecific":true\n }\n]\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"large variants:")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "chromosome":"1", \n "begin":629025, \n "end":8537745, \n "variantType":"copy_number_loss", \n "id":"RCV000051993.4", \n "variationId":"VCV000058242.1", \n "reviewStatus":"criteria provided, single submitter", \n "alleleOrigins":[\n "not provided"\n ], \n "phenotypes":[\n "See cases"\n ], \n "significance":[\n "pathogenic"\n ], \n "lastUpdatedDate":"2022-04-21", \n "pubMedIds":[\n "21844811"\n ]\n },\n {\n "id":"VCV000058242.1",\n "reviewStatus":"criteria provided, single submitter",\n "significance":[\n "pathogenic"\n ],\n "lastUpdatedDate":"2022-04-21"\n },\n ......\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"id"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"ClinVar ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"variationId"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"ClinVar VCV ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"variant type")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"reviewStatus"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"alleleOrigins"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"medGenIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"MedGen IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omimIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"OMIM IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"orphanetIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Orphanet IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"significance"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"lastUpdatedDate"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,i.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,i.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the ClinVar alternate allele")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"reviewStatus:")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no assertion provided"),(0,i.kt)("li",{parentName:"ul"},"no assertion criteria provided"),(0,i.kt)("li",{parentName:"ul"},"criteria provided, single submitter"),(0,i.kt)("li",{parentName:"ul"},"practice guideline"),(0,i.kt)("li",{parentName:"ul"},"classified by multiple submitters"),(0,i.kt)("li",{parentName:"ul"},"criteria provided, conflicting interpretations"),(0,i.kt)("li",{parentName:"ul"},"criteria provided, multiple submitters, no conflicts"),(0,i.kt)("li",{parentName:"ul"},"no interpretation for the single variant")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"alleleOrigins:")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"unknown"),(0,i.kt)("li",{parentName:"ul"},"other"),(0,i.kt)("li",{parentName:"ul"},"germline"),(0,i.kt)("li",{parentName:"ul"},"somatic"),(0,i.kt)("li",{parentName:"ul"},"inherited"),(0,i.kt)("li",{parentName:"ul"},"paternal"),(0,i.kt)("li",{parentName:"ul"},"maternal"),(0,i.kt)("li",{parentName:"ul"},"de-novo"),(0,i.kt)("li",{parentName:"ul"},"biparental"),(0,i.kt)("li",{parentName:"ul"},"uniparental"),(0,i.kt)("li",{parentName:"ul"},"not-tested"),(0,i.kt)("li",{parentName:"ul"},"tested-inconclusive")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"significance:")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"uncertain significance"),(0,i.kt)("li",{parentName:"ul"},"not provided"),(0,i.kt)("li",{parentName:"ul"},"benign"),(0,i.kt)("li",{parentName:"ul"},"likely benign"),(0,i.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"pathogenic"),(0,i.kt)("li",{parentName:"ul"},"drug response"),(0,i.kt)("li",{parentName:"ul"},"histocompatibility"),(0,i.kt)("li",{parentName:"ul"},"association"),(0,i.kt)("li",{parentName:"ul"},"risk factor"),(0,i.kt)("li",{parentName:"ul"},"protective"),(0,i.kt)("li",{parentName:"ul"},"affects"),(0,i.kt)("li",{parentName:"ul"},"conflicting data from submitters"),(0,i.kt)("li",{parentName:"ul"},"other"),(0,i.kt)("li",{parentName:"ul"},"no interpretation for the single variant"),(0,i.kt)("li",{parentName:"ul"},"conflicting interpretations of pathogenicity")))}m.isMDXComponent=!0},46745:(e,n,t)=>{t.r(n),t.d(n,{contentTitle:()=>s,default:()=>d,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var a=t(87462),i=(t(67294),t(3905)),r=t(41384);const l={title:"ClinVar"},s=void 0,o={unversionedId:"data-sources/clinvar",id:"version-3.18/data-sources/clinvar",title:"ClinVar",description:"Overview",source:"@site/versioned_docs/version-3.18/data-sources/clinvar.mdx",sourceDirName:"data-sources",slug:"/data-sources/clinvar",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/clinvar",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/clinvar.mdx",tags:[],version:"3.18",frontMatter:{title:"ClinVar"},sidebar:"docs",previous:{title:"ClinGen",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/clingen"},next:{title:"COSMIC",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/cosmic"}},p=[{value:"Overview",id:"overview",children:[],level:2},{value:"RCV File",id:"rcv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Parsing Significance",id:"parsing-significance",children:[],level:4}],level:3}],level:2},{value:"VCV File",id:"vcv-file",children:[{value:"Example",id:"example-1",children:[],level:3},{value:"Parsing",id:"parsing-1",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URLs",id:"download-urls",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2},{value:"Building the supplementary files",id:"building-the-supplementary-files",children:[{value:"Source data files",id:"source-data-files",children:[],level:3}],level:2}],c={toc:p},m="wrapper";function d(e){let{components:n,...l}=e;return(0,i.kt)(m,(0,a.Z)({},c,l,{components:n,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"ClinVar is a freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence. ClinVar thus facilitates access to and communication about the relationships asserted between human variation and observed health status, and the history of that interpretation."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Melissa J Landrum, Jennifer M Lee, Mark Benson, Garth R Brown, Chen Chao, Shanmuga Chitipiralla, Baoshan Gu, Jennifer Hart, Douglas Hoffman, Wonhee Jang, Karen Karapetyan, Kenneth Katz, Chunlei Liu, Zenith Maddipatla, Adriana Malheiro, Kurt McDaniel, Michael Ovetsky, George Riley, George Zhou, J Bradley Holmes, Brandi L Kattman, Donna R Maglott, ClinVar: improving access to variant interpretations and supporting evidence, ",(0,i.kt)("em",{parentName:"p"},"Nucleic Acids Research"),", ",(0,i.kt)("strong",{parentName:"p"},"46"),", Issue D1, 4 January 2018, Pages D1062\u2013D1067, ",(0,i.kt)("a",{parentName:"p",href:"https://doi.org/10.1093/nar/gkx1153"},"https://doi.org/10.1093/nar/gkx1153")))),(0,i.kt)("h2",{id:"rcv-file"},"RCV File"),(0,i.kt)("h3",{id:"example"},"Example"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{target:"_blank",href:t(22367).Z},"a full RCV entry"),"."),(0,i.kt)("h3",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"ID")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{3}","{3}":!0},'\n \n \n\n')),(0,i.kt)("p",null,"The Acc and Version fields are merged to form the ID (RCV000000001.2)"),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"LastUpdatedDate")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{2}","{2}":!0},'\n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Significance")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{5}","{5}":!0},'\n \n \n no assertion criteria provided\n Pathogenic\n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"ReviewStatus")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4}","{4}":!0},'\n \n \n no assertion criteria provided\n Pathogenic\n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Phenotypes")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{2-8}","{2-8}":!0},'\n \n \n \n Joubert syndrome 9\n \n \n \n\n')),(0,i.kt)("p",null,'We only use the field with Type="Preferred". Multiple phenotypes may be reported'),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Location, Variant Type and Variant Id")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{3-12}","{3-12}":!0},'\n\n \n \n \n \n \n \n \n\n')),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"The variant position is extracted from the fields for their respective assemblies."),(0,i.kt)("li",{parentName:"ul"},"Updated records contain positionVCF, referenceAlleleVCF and alternateAlleleVCF fields and when present, we use them to create the variant."),(0,i.kt)("li",{parentName:"ul"},'For older records, since "start\' and "stop" fields are not always available, we use the "display_start" and "display_end" fields.'),(0,i.kt)("li",{parentName:"ul"},"If a required allele is not available, we extract it from the reference sequence."),(0,i.kt)("li",{parentName:"ul"},"Only variants having a dbSNP id are extracted."),(0,i.kt)("li",{parentName:"ul"},"Note that a ClinVar accession may have multiple variants associated with it (possible in different locations)"),(0,i.kt)("li",{parentName:"ul"},"VariantId is extracted from the MeasureSet attributes."),(0,i.kt)("li",{parentName:"ul"},"VariantType is extracted from the Measure attributes.",(0,i.kt)("div",{parentName:"li",className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"unsupported variant types")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"We currently don't support the following variant types:"),(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"Microsatellite"),(0,i.kt)("li",{parentName:"ul"},"protein only"),(0,i.kt)("li",{parentName:"ul"},"fusion"),(0,i.kt)("li",{parentName:"ul"},"Complex"),(0,i.kt)("li",{parentName:"ul"},"Variation"),(0,i.kt)("li",{parentName:"ul"},"Translocation ")))))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"MedGen, OMIM, Orphanet IDs")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4-7}","{4-7}":!0},'\n \n \n \n \n \n \n \n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"AlleleOrigins")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{2}","{2}":!0},"\n germline\n\n")),(0,i.kt)("p",null,"We only extract all Allele Origins from Submissions (SCV) entries."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"PubMedIds")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4,10,16,21}","{4,10,16,21}":!0},'\n \n \n 12114475\n \n \n \n LMM Criteria\n \n 24033266\n \n \n \n \n \n 9113933\n \n \n \n \n 23757202\n \n\n')),(0,i.kt)("p",null,"We only extract all Pubmed Ids from Submissions (SCV) entries."),(0,i.kt)("h4",{id:"parsing-significance"},"Parsing Significance"),(0,i.kt)("p",null,"Extracting significance(s) may involve parsing multiple fields. Take the following snippets into consideration."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{3,8,13-14}","{3,8,13-14}":!0},'\n no assertion criteria provided\n Pathogenic\n\n\n\n criteria provided, multiple submitters, no conflicts\n Pathogenic/Likely pathogenic\n\n\n\n no assertion criteria provided\n Conflicting interpretations of pathogenicity\n Pathogenic(1);Uncertain significance(1)\n\n')),(0,i.kt)("p",null,"Given the evidence, we converted the significance field into an array of strings which may be parsed out of the ",(0,i.kt)("inlineCode",{parentName:"p"},"Descriptions")," or ",(0,i.kt)("inlineCode",{parentName:"p"},"Explanation")," fields."),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Varying Delimiters")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The delimiters in each field may vary. Currently, the delimiters for ",(0,i.kt)("inlineCode",{parentName:"p"},"Description")," are ",(0,i.kt)("inlineCode",{parentName:"p"},",")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"/"),". The delimiters for ",(0,i.kt)("inlineCode",{parentName:"p"},"Explanation")," are ",(0,i.kt)("inlineCode",{parentName:"p"},";")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"/"),"."))),(0,i.kt)("h2",{id:"vcv-file"},"VCV File"),(0,i.kt)("h3",{id:"example-1"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml"},'\n\n\n current\n Homo sapiens\n \n \n \n \n \n 1p36.31\n \n \n \n 601142\n \n \n \n 1p36.31\n \n \n \n 607215\n \n \n GRCh37/hg19 1p36.31(chr1:6051187-6158763)\n copy number gain\n \n 1p36.31\n \n \n \n no interpretation for the single variant\n \n \n \n \n \n \n no interpretation for the single variant\n \n \n no interpretation for the single variant\n \n \n \n \n \n \n \n \n \n\n\n')),(0,i.kt)("h3",{id:"parsing-1"},"Parsing"),(0,i.kt)("p",null,"In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"id")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml"},'\n')),(0,i.kt)("p",null,"The Acc and Version fields are merged to form the ID (RCV000000001.2)"),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"significance")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{7}","{7}":!0},'\n \n \n \n \n \n no interpretation for the single variant\n \n \n \n \n \n\n')),(0,i.kt)("p",null,"May have multiple significances listed."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"reviewStatus")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4}","{4}":!0},"\n \n \n no interpretation for the single variant\n \n \n\n")),(0,i.kt)("h2",{id:"known-issues"},"Known Issues"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"The XML file contains ~1k more entries (out of 162K) than the VCF file"),(0,i.kt)("li",{parentName:"ul"},"The XML file does not have a field indicating that a record is associated with the reference base - something that was present in VCF"),(0,i.kt)("li",{parentName:"ul"},'The XML file contains entries (e.g. RCV000016645 version=1) which have IUPAC ambiguous bases ("R", "Y", "H",\netc.) as their alternate allele')))),(0,i.kt)("h2",{id:"download-urls"},"Download URLs"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz"},"ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz")),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_00-latest.xml.gz"},"https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_00-latest.xml.gz")),(0,i.kt)("h2",{id:"json-output"},"JSON Output"),(0,i.kt)(r.default,{mdxType:"JSON"}),(0,i.kt)("h2",{id:"building-the-supplementary-files"},"Building the supplementary files"),(0,i.kt)("p",null,"The ClinVar ",(0,i.kt)("inlineCode",{parentName:"p"},".nsa")," and ",(0,i.kt)("inlineCode",{parentName:"p"},".nsi")," for Nirvana can be built using the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's ",(0,i.kt)("inlineCode",{parentName:"p"},"clinvar")," subcommand."),(0,i.kt)("h3",{id:"source-data-files"},"Source data files"),(0,i.kt)("p",null,"Two input ",(0,i.kt)("inlineCode",{parentName:"p"},".xml")," files and a ",(0,i.kt)("inlineCode",{parentName:"p"},".version")," file are required in order to build the ",(0,i.kt)("inlineCode",{parentName:"p"},".nsa")," and ",(0,i.kt)("inlineCode",{parentName:"p"},".nsi")," file. You should have the following files:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"ClinVarFullRelease_00-latest.xml.gz ClinVarVariationRelease_00-latest.xml.gz\nClinVarFullRelease_00-latest.xml.gz.version\n")),(0,i.kt)("p",null,"The version file is a text file with the follwoing format."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"NAME=ClinVar\nVERSION=20220505\nDATE=2022-05-05\nDESCRIPTION=A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence\n")),(0,i.kt)("p",null,"The help menu for the utility is as follows:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet SAUtils.dll clinvar\n---------------------------------------------------------------------------\nSAUtils (c) 2022 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.18.1\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll clinvar [options]\nCreates a supplementary database with ClinVar annotations\n\nOPTIONS:\n --ref, -r compressed reference sequence file\n --rcv, -i ClinVar Full release XML file\n --vcv, -c ClinVar Variation release XML file\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\ndotnet SAUtils.dll clinvar\n")),(0,i.kt)("p",null,"Here is a sample execution:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet ~/development/Nirvana/bin/Debug/net6.0/SAUtils.dll clinvar \\\\\n--ref ~/development/References/7/Homo_sapiens.GRCh38.Nirvana.dat --rcv ClinVarFullRelease_00-latest.xml.gz \\\\\n--vcv ClinVarVariationRelease_00-latest.xml.gz --out ~/development/SupplementaryDatabase/63/GRCh38\n---------------------------------------------------------------------------\nSAUtils (c) 2022 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.18.1\n---------------------------------------------------------------------------\n\nFound 1535677 VCV records\nUnknown vcv id:225946 found in RCV000211201.2\nUnknown vcv id:225946 found in RCV000211253.2\nUnknown vcv id:225946 found in RCV000211375.2\nUnknown vcv id:976117 found in RCV001253316.1\nUnknown vcv id:1321016 found in RCV001776995.2\n3 unknown VCVs found in RCVs.\n225946,976117,1321016\n0 unknown VCVs found in RCVs.\nChromosome 1 completed in 00:00:15.1\nChromosome 2 completed in 00:00:20.0\nChromosome 3 completed in 00:00:09.7\nChromosome 4 completed in 00:00:05.9\nChromosome 5 completed in 00:00:09.8\nChromosome 6 completed in 00:00:08.3\nChromosome 7 completed in 00:00:08.7\nChromosome 8 completed in 00:00:06.2\nChromosome 9 completed in 00:00:08.6\nChromosome 10 completed in 00:00:07.0\nChromosome 11 completed in 00:00:11.7\nChromosome 12 completed in 00:00:08.0\nChromosome 13 completed in 00:00:06.3\nChromosome 14 completed in 00:00:06.0\nChromosome 15 completed in 00:00:06.6\nChromosome 16 completed in 00:00:10.8\nChromosome 17 completed in 00:00:13.8\nChromosome 18 completed in 00:00:02.9\nChromosome 19 completed in 00:00:08.7\nChromosome 20 completed in 00:00:03.6\nChromosome 21 completed in 00:00:02.4\nChromosome 22 completed in 00:00:03.6\nChromosome MT completed in 00:00:00.2\nChromosome X completed in 00:00:07.5\nChromosome Y completed in 00:00:00.0\nMaximum bp shifted for any variant:2\nWriting 37097 intervals to database...\n\nTime: 00:13:26.9\n\n")))}d.isMDXComponent=!0},22367:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/files/clinvar-rcv-example-4e0a2f2ac6c70acd0ce41410690b683b.xml"}}]); \ No newline at end of file diff --git a/assets/js/9e4087bc.05e20624.js b/assets/js/9e4087bc.05e20624.js new file mode 100644 index 00000000..467c243d --- /dev/null +++ b/assets/js/9e4087bc.05e20624.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3608],{3012:(e,t,a)=>{a.r(t),a.d(t,{default:()=>s});var r=a(7294),n=a(8882),l=a(9960),c=a(5999);function i(e){let{year:t,posts:a}=e;return r.createElement(r.Fragment,null,r.createElement("h3",null,t),r.createElement("ul",null,a.map((e=>r.createElement("li",{key:e.metadata.date},r.createElement(l.Z,{to:e.metadata.permalink},e.metadata.formattedDate," - ",e.metadata.title))))))}function m(e){let{years:t}=e;return r.createElement("section",{className:"margin-vert--lg"},r.createElement("div",{className:"container"},r.createElement("div",{className:"row"},t.map(((e,t)=>r.createElement("div",{key:t,className:"col col--4 margin-vert--lg"},r.createElement(i,e)))))))}function s(e){let{archive:t}=e;const a=(0,c.I)({id:"theme.blog.archive.title",message:"Archive",description:"The page & hero title of the blog archive page"}),l=(0,c.I)({id:"theme.blog.archive.description",message:"Archive",description:"The page & hero description of the blog archive page"}),i=function(e){const t=e.reduceRight(((e,t)=>{const a=t.metadata.date.split("-")[0],r=e.get(a)||[];return e.set(a,[t,...r])}),new Map);return Array.from(t,(e=>{let[t,a]=e;return{year:t,posts:a}}))}(t.blogPosts);return r.createElement(n.Z,{title:a,description:l},r.createElement("header",{className:"hero hero--primary"},r.createElement("div",{className:"container"},r.createElement("h1",{className:"hero__title"},a),r.createElement("p",{className:"hero__subtitle"},l))),r.createElement("main",null,i.length>0&&r.createElement(m,{years:i})))}}}]); \ No newline at end of file diff --git a/assets/js/9e4087bc.41610d4e.js b/assets/js/9e4087bc.41610d4e.js deleted file mode 100644 index af694a27..00000000 --- a/assets/js/9e4087bc.41610d4e.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3608],{63012:(e,t,a)=>{a.r(t),a.d(t,{default:()=>s});var r=a(67294),n=a(18882),l=a(39960),c=a(95999);function i(e){let{year:t,posts:a}=e;return r.createElement(r.Fragment,null,r.createElement("h3",null,t),r.createElement("ul",null,a.map((e=>r.createElement("li",{key:e.metadata.date},r.createElement(l.Z,{to:e.metadata.permalink},e.metadata.formattedDate," - ",e.metadata.title))))))}function m(e){let{years:t}=e;return r.createElement("section",{className:"margin-vert--lg"},r.createElement("div",{className:"container"},r.createElement("div",{className:"row"},t.map(((e,t)=>r.createElement("div",{key:t,className:"col col--4 margin-vert--lg"},r.createElement(i,e)))))))}function s(e){let{archive:t}=e;const a=(0,c.I)({id:"theme.blog.archive.title",message:"Archive",description:"The page & hero title of the blog archive page"}),l=(0,c.I)({id:"theme.blog.archive.description",message:"Archive",description:"The page & hero description of the blog archive page"}),i=function(e){const t=e.reduceRight(((e,t)=>{const a=t.metadata.date.split("-")[0],r=e.get(a)||[];return e.set(a,[t,...r])}),new Map);return Array.from(t,(e=>{let[t,a]=e;return{year:t,posts:a}}))}(t.blogPosts);return r.createElement(n.Z,{title:a,description:l},r.createElement("header",{className:"hero hero--primary"},r.createElement("div",{className:"container"},r.createElement("h1",{className:"hero__title"},a),r.createElement("p",{className:"hero__subtitle"},l))),r.createElement("main",null,i.length>0&&r.createElement(m,{years:i})))}}}]); \ No newline at end of file diff --git a/assets/js/9e8e0130.94e3ccaa.js b/assets/js/9e8e0130.94e3ccaa.js deleted file mode 100644 index 5966e117..00000000 --- a/assets/js/9e8e0130.94e3ccaa.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7418,9383],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>v});var a=n(67294);function l(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(l[n]=e[n]);return l}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(l[n]=e[n])}return l}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},m="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,l=e.mdxType,r=e.originalType,s=e.parentName,c=o(e,["components","mdxType","originalType","parentName"]),m=d(n),u=l,v=m["".concat(s,".").concat(u)]||m[u]||p[u]||r;return n?a.createElement(v,i(i({ref:t},c),{},{components:n})):a.createElement(v,i({ref:t},c))}));function v(e,t){var n=arguments,l=t&&t.mdxType;if("string"==typeof e||l){var r=n.length,i=new Array(r);i[0]=u;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[m]="string"==typeof e?e:l,i[1]=o;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>m,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var a=n(87462),l=(n(67294),n(3905));const r={},i=void 0,o={unversionedId:"data-sources/dbsnp-json",id:"version-3.2.5/data-sources/dbsnp-json",title:"dbsnp-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.2.5/data-sources/dbsnp-json.md",sourceDirName:"data-sources",slug:"/data-sources/dbsnp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/dbsnp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/data-sources/dbsnp-json.md",tags:[],version:"3.2.5",frontMatter:{}},s=[],d={toc:s},c="wrapper";function m(e){let{components:t,...n}=e;return(0,l.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"dbsnp":[\n "rs1042821"\n]\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,l.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"dbsnp"),(0,l.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,l.kt)("td",{parentName:"tr",align:"left"},"dbSNP rsIDs")))))}m.isMDXComponent=!0},31953:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>i,metadata:()=>s,toc:()=>d});var a=n(87462),l=(n(67294),n(3905)),r=n(59999);const i={title:"dbSNP"},o=void 0,s={unversionedId:"data-sources/dbsnp",id:"version-3.2.5/data-sources/dbsnp",title:"dbSNP",description:"Overview",source:"@site/versioned_docs/version-3.2.5/data-sources/dbsnp.mdx",sourceDirName:"data-sources",slug:"/data-sources/dbsnp",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/dbsnp",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/data-sources/dbsnp.mdx",tags:[],version:"3.2.5",frontMatter:{title:"dbSNP"},sidebar:"version-3.2.5/docs",previous:{title:"ClinVar",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/clinvar"},next:{title:"gnomAD",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/gnomad"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"VCF File",id:"vcf-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Global allele extraction",id:"global-allele-extraction",children:[],level:4},{value:"Equal Allele Frequency Example (2 alleles)",id:"equal-allele-frequency-example-2-alleles",children:[],level:4},{value:"Equal Allele Frequency Example (3 alleles)",id:"equal-allele-frequency-example-3-alleles",children:[],level:4},{value:"Equal Allele Frequency in Alternate Alleles",id:"equal-allele-frequency-in-alternate-alleles",children:[],level:4},{value:"Equal Allele Frequency Between Reference & Alternate Allele",id:"equal-allele-frequency-between-reference--alternate-allele",children:[],level:4}],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:d},m="wrapper";function p(e){let{components:t,...n}=e;return(0,l.kt)(m,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("h2",{id:"overview"},"Overview"),(0,l.kt)("p",null,"dbSNP contains human single nucleotide variations, microsatellites, and small-scale insertions and deletions along with publication, population frequency, molecular consequence, and genomic and RefSeq mapping information for both common variations and clinical mutations."),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Sherry, S.T., Ward, M. and Sirotkin, K. (1999) dbSNP\u2014Database for Single Nucleotide Polymorphisms and Other Classes of Minor Genetic Variation. ",(0,l.kt)("em",{parentName:"p"},"Genome Res."),", ",(0,l.kt)("strong",{parentName:"p"},"9"),", 677\u2013679."))),(0,l.kt)("h2",{id:"vcf-file"},"VCF File"),(0,l.kt)("h3",{id:"example"},"Example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 10177 rs367896724 A AC . . RS=367896724;RSPOS=10177;dbSNPBuildID=138; \\ \n SSR=0;SAO=0;VP=0x050000020005130026000200;GENEINFO=DDX11L1:100287102;WGT=1; \\\n VC=DIV;R5;ASP;G5A;G5;KGPhase3;CAF=0.5747,0.4253;COMMON=1; \\\n TOPMED=0.76728147298674821,0.23271852701325178\n")),(0,l.kt)("h3",{id:"parsing"},"Parsing"),(0,l.kt)("p",null,"From the VCF file, we're mainly interested in the following:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"rsID")," from the ",(0,l.kt)("inlineCode",{parentName:"li"},"ID")," field"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"CAF")," from the ",(0,l.kt)("inlineCode",{parentName:"li"},"INFO")," field")),(0,l.kt)("h4",{id:"global-allele-extraction"},"Global allele extraction"),(0,l.kt)("p",null,"The global major and minor alleles are extracted based on the frequency of the alleles provided in the CAF field. The global minor allele frequency is the second highest value of the CAF comma delimited field (ignoring '.' values). "),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Tie Breaking: Global Major Allele")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"If there are two candidates for global major and the reference allele is one of them, we prefer the reference allele."))),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Tie Breaking: Global Minor Allele")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"If there are two candidates for global minor and the reference allele is one of them, we prefer the other allele. If the reference allele is not involved, they are chosen arbitrarily."))),(0,l.kt)("h4",{id:"equal-allele-frequency-example-2-alleles"},"Equal Allele Frequency Example (2 alleles)"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C CAF=0.5,0.5\n")),(0,l.kt)("p",null,"We will select A to be the global major allele and C to be the global minor allele."),(0,l.kt)("h4",{id:"equal-allele-frequency-example-3-alleles"},"Equal Allele Frequency Example (3 alleles)"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C,T CAF=0.33,0.33,0.33\n")),(0,l.kt)("p",null,"We will select A to be the global major allele and either C or T is chosen (arbitrarily) to be the global minor allele."),(0,l.kt)("h4",{id:"equal-allele-frequency-in-alternate-alleles"},"Equal Allele Frequency in Alternate Alleles"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C,T CAF=0.2,0.4,0.4\n")),(0,l.kt)("p",null,"We will select C or T to be arbitrarily assigned to be the global major or global minor allele."),(0,l.kt)("h4",{id:"equal-allele-frequency-between-reference--alternate-allele"},"Equal Allele Frequency Between Reference & Alternate Allele"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C,T CAF=0.2,0.2,0.6\n")),(0,l.kt)("p",null,"We will select T to be the global major allele and C to be the global minor allele."),(0,l.kt)("h2",{id:"known-issues"},"Known Issues"),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"If there are multiple entries with different CAF values for the same allele, we use the first CAF value."))),(0,l.kt)("h2",{id:"download-url"},"Download URL"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://ftp.ncbi.nih.gov/snp/organisms/"},"https://ftp.ncbi.nih.gov/snp/organisms/")),(0,l.kt)("h2",{id:"json-output"},"JSON Output"),(0,l.kt)(r.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/9f1b1c54.3d70b861.js b/assets/js/9f1b1c54.3d70b861.js deleted file mode 100644 index d29f0573..00000000 --- a/assets/js/9f1b1c54.3d70b861.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2154],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function s(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var i=r.createContext({}),l=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):s(s({},t),e)),n},p=function(e){var t=l(e.components);return r.createElement(i.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,p=c(e,["components","mdxType","originalType","parentName"]),u=l(n),m=a,f=u["".concat(i,".").concat(m)]||u[m]||d[m]||o;return n?r.createElement(f,s(s({ref:t},p),{},{components:n})):r.createElement(f,s({ref:t},p))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,s=new Array(o);s[0]=m;var c={};for(var i in t)hasOwnProperty.call(t,i)&&(c[i]=t[i]);c.originalType=e,c[u]="string"==typeof e?e:a,s[1]=c;for(var l=2;l{n.r(t),n.d(t,{contentTitle:()=>s,default:()=>u,frontMatter:()=>o,metadata:()=>c,toc:()=>i});var r=n(87462),a=(n(67294),n(3905));const o={},s=void 0,c={unversionedId:"data-sources/dbsnp-json",id:"version-3.16/data-sources/dbsnp-json",title:"dbsnp-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/dbsnp-json.md",sourceDirName:"data-sources",slug:"/data-sources/dbsnp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/dbsnp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/dbsnp-json.md",tags:[],version:"3.16",frontMatter:{}},i=[],l={toc:i},p="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},l,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"dbsnp":[\n "rs1042821"\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"dbsnp"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"dbSNP rsIDs")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/9f26fc98.afe3ceff.js b/assets/js/9f26fc98.afe3ceff.js deleted file mode 100644 index ee4b60a6..00000000 --- a/assets/js/9f26fc98.afe3ceff.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3057],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>g});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},d=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,d=o(e,["components","mdxType","originalType","parentName"]),p=c(n),m=r,g=p["".concat(s,".").concat(m)]||p[m]||u[m]||i;return n?a.createElement(g,l(l({ref:t},d),{},{components:n})):a.createElement(g,l({ref:t},d))}));function g(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,l=new Array(i);l[0]=m;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[p]="string"==typeof e?e:r,l[1]=o;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>p,frontMatter:()=>i,metadata:()=>o,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const i={},l=void 0,o={unversionedId:"data-sources/clingen-gene-validity-json",id:"version-3.21/data-sources/clingen-gene-validity-json",title:"clingen-gene-validity-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/clingen-gene-validity-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-gene-validity-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/clingen-gene-validity-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/clingen-gene-validity-json.md",tags:[],version:"3.21",frontMatter:{}},s=[],c={toc:s},d="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingenGeneValidity":[\n {\n "diseaseId":"MONDO_0007893",\n "disease":"Noonan syndrome with multiple lentigines",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n },\n {\n "diseaseId":"MONDO_0015280",\n "disease":"cardiofaciocutaneous syndrome",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingenGeneValidity"),(0,r.kt)("td",{parentName:"tr",align:null},"object"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"diseaseId"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Monarch Disease Ontology ID (MONDO)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"disease"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"disease label")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"classification"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see below for possible values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"classificationDate"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"yyyy-MM-dd")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"classification")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no reported evidence"),(0,r.kt)("li",{parentName:"ul"},"disputed"),(0,r.kt)("li",{parentName:"ul"},"limited"),(0,r.kt)("li",{parentName:"ul"},"moderate"),(0,r.kt)("li",{parentName:"ul"},"definitive"),(0,r.kt)("li",{parentName:"ul"},"strong"),(0,r.kt)("li",{parentName:"ul"},"refuted"),(0,r.kt)("li",{parentName:"ul"},"no known disease relationship")))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/a0ea1db7.96257407.js b/assets/js/a0ea1db7.96257407.js deleted file mode 100644 index 6c742788..00000000 --- a/assets/js/a0ea1db7.96257407.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6132],{3905:(e,t,n)=>{n.d(t,{Zo:()=>m,kt:()=>f});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),p=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},m=function(e){var t=p(e.components);return a.createElement(s.Provider,{value:t},e.children)},c="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,m=l(e,["components","mdxType","originalType","parentName"]),c=p(n),u=r,f=c["".concat(s,".").concat(u)]||c[u]||d[u]||i;return n?a.createElement(f,o(o({ref:t},m),{},{components:n})):a.createElement(f,o({ref:t},m))}));function f(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[c]="string"==typeof e?e:r,o[1]=l;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>c,frontMatter:()=>i,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/omim-json",id:"version-3.16/data-sources/omim-json",title:"omim-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/omim-json.md",sourceDirName:"data-sources",slug:"/data-sources/omim-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/omim-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/omim-json.md",tags:[],version:"3.16",frontMatter:{}},s=[{value:"Phenotype",id:"phenotype",children:[],level:4},{value:"Mapping",id:"mapping",children:[],level:4},{value:"Inheritance",id:"inheritance",children:[],level:4},{value:"Comments",id:"comments",children:[],level:4}],p={toc:s},m="wrapper";function c(e){let{components:t,...n}=e;return(0,r.kt)(m,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"omim":[ \n { \n "mimNumber":600678,\n "geneName":"MutS, E. coli, homolog of, 6",\n "description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",\n "phenotypes":[ \n { \n "mimNumber":614350,\n "phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",\n "description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal dominant"\n ]\n },\n { \n "mimNumber":608089,\n "phenotype":"Endometrial cancer, familial",\n "mapping":"molecular basis of the disorder is known"\n },\n { \n "mimNumber":276300,\n "phenotype":"Mismatch repair cancer syndrome",\n "description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal recessive"\n ],\n "comments" : [\n "contribute to susceptibility to multifactorial disorders or to susceptibility to infection",\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM ID for gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneName"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene name")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"left"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#phenotype"},"Phenotype entry below"))))),(0,r.kt)("h4",{id:"phenotype"},"Phenotype"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mapping"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#mapping"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"inheritance"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#inheritance"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"comments"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#comments"},"possible values below"))))),(0,r.kt)("h4",{id:"mapping"},"Mapping"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"disorder was positioned by mapping of the wild type gene"),(0,r.kt)("li",{parentName:"ol"},"disease phenotype itself was mapped"),(0,r.kt)("li",{parentName:"ol"},"molecular basis of the disorder is known"),(0,r.kt)("li",{parentName:"ol"},"disorder is a chromosome deletion or duplication syndrome")),(0,r.kt)("h4",{id:"inheritance"},"Inheritance"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"autosomal recessive"),(0,r.kt)("li",{parentName:"ul"},"autosomal dominant")),(0,r.kt)("h4",{id:"comments"},"Comments"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"contributes to the susceptibility to multifactorial disorders"),(0,r.kt)("li",{parentName:"ul"},"variations that lead to apparently abnormal laboratory test values"),(0,r.kt)("li",{parentName:"ul"},"unconfirmed mapping")))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/a1a4db2c.6784d48d.js b/assets/js/a1a4db2c.6784d48d.js deleted file mode 100644 index 56da6220..00000000 --- a/assets/js/a1a4db2c.6784d48d.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9214,5248],{3905:(t,e,a)=>{a.d(e,{Zo:()=>d,kt:()=>g});var n=a(67294);function r(t,e,a){return e in t?Object.defineProperty(t,e,{value:a,enumerable:!0,configurable:!0,writable:!0}):t[e]=a,t}function l(t,e){var a=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),a.push.apply(a,n)}return a}function i(t){for(var e=1;e=0||(r[a]=t[a]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,a)&&(r[a]=t[a])}return r}var p=n.createContext({}),m=function(t){var e=n.useContext(p),a=e;return t&&(a="function"==typeof t?t(e):i(i({},e),t)),a},d=function(t){var e=m(t.components);return n.createElement(p.Provider,{value:e},t.children)},s="mdxType",c={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},N=n.forwardRef((function(t,e){var a=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,d=o(t,["components","mdxType","originalType","parentName"]),s=m(a),N=r,g=s["".concat(p,".").concat(N)]||s[N]||c[N]||l;return a?n.createElement(g,i(i({ref:e},d),{},{components:a})):n.createElement(g,i({ref:e},d))}));function g(t,e){var a=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=a.length,i=new Array(l);i[0]=N;var o={};for(var p in e)hasOwnProperty.call(e,p)&&(o[p]=e[p]);o.originalType=t,o[s]="string"==typeof t?t:r,i[1]=o;for(var m=2;m{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/fusioncatcher-json",id:"version-3.16/data-sources/fusioncatcher-json",title:"fusioncatcher-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/fusioncatcher-json.md",sourceDirName:"data-sources",slug:"/data-sources/fusioncatcher-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/fusioncatcher-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/fusioncatcher-json.md",tags:[],version:"3.16",frontMatter:{}},p=[{value:"genes",id:"genes",children:[],level:4},{value:"gene",id:"gene",children:[],level:4}],m={toc:p},d="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(d,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},' "fusionCatcher":[\n {\n "genes":{\n "first":{\n "hgnc":"ETV6",\n "isOncogene":true\n },\n "second":{\n "hgnc":"RUNX1"\n },\n "isParalogPair":true,\n "isPseudogenePair":true,\n "isReadthrough":true\n },\n "germlineSources":[\n "1000 Genomes Project"\n ],\n "somaticSources":[\n "COSMIC",\n "TCGA oesophageal carcinomas"\n ]\n }\n ]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genes"),(0,r.kt)("td",{parentName:"tr",align:"center"},"genes object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"5' gene & 3' gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"germlineSources"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"matches in known germline data sources")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"somaticSources"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"matches in known somatic data sources")))),(0,r.kt)("h4",{id:"genes"},"genes"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"first"),(0,r.kt)("td",{parentName:"tr",align:"center"},"gene object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"5' gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"second"),(0,r.kt)("td",{parentName:"tr",align:"center"},"gene object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"3' gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isParalogPair"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when both genes are paralogs for each other")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isPseudogenePair"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when both genes are pseudogenes for each other")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isReadthrough"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this fusion gene is a readthrough event (both are on the same strand and there are no genes between them)")))),(0,r.kt)("h4",{id:"gene"},"gene"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isOncogene"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this gene is an oncogene")))))}s.isMDXComponent=!0},95411:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>o,default:()=>c,frontMatter:()=>i,metadata:()=>p,toc:()=>m});var n=a(87462),r=(a(67294),a(3905)),l=a(99284);const i={title:"FusionCatcher"},o=void 0,p={unversionedId:"data-sources/fusioncatcher",id:"version-3.16/data-sources/fusioncatcher",title:"FusionCatcher",description:"Overview",source:"@site/versioned_docs/version-3.16/data-sources/fusioncatcher.mdx",sourceDirName:"data-sources",slug:"/data-sources/fusioncatcher",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/fusioncatcher",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/fusioncatcher.mdx",tags:[],version:"3.16",frontMatter:{title:"FusionCatcher"},sidebar:"version-3.16/docs",previous:{title:"dbSNP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/dbsnp"},next:{title:"gnomAD",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/gnomad"}},m=[{value:"Overview",id:"overview",children:[],level:2},{value:"Supported Data Sources",id:"supported-data-sources",children:[{value:"Oncogenes",id:"oncogenes",children:[],level:3},{value:"Germline",id:"germline",children:[],level:3},{value:"Somatic",id:"somatic",children:[],level:3}],level:2},{value:"Gene Pair TSV File",id:"gene-pair-tsv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Gene TSV File",id:"gene-tsv-file",children:[{value:"Example",id:"example-1",children:[],level:3},{value:"Parsing",id:"parsing-1",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],d={toc:m},s="wrapper";function c(t){let{components:e,...a}=t;return(0,r.kt)(s,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://github.com/ndaniel/fusioncatcher"},"FusionCatcher")," is a well-known tool that searches for somatic novel/known fusion genes, translocations, and/or chimeras in RNA-seq data. While FusionCatcher itself is not part of Nirvana, we have included a subset of their genomic databases in Nirvana."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Daniel Nicorici, Mihaela \u015eatalan, Henrik Edgren, Sara Kangaspeska, Astrid Murum\xe4gi, Olli Kallioniemi, Sami Virtanen, Olavi Kilkku. (2014) ",(0,r.kt)("a",{parentName:"p",href:"https://www.biorxiv.org/content/10.1101/011650v1"},"FusionCatcher \u2013 a tool for finding somatic fusion genes in paired-end RNA-sequencing data"),". ",(0,r.kt)("em",{parentName:"p"},"bioRxiv")," 011650"))),(0,r.kt)("h2",{id:"supported-data-sources"},"Supported Data Sources"),(0,r.kt)("h3",{id:"oncogenes"},"Oncogenes"),(0,r.kt)("p",null,"The following data sources are aggregated and used to populate the ",(0,r.kt)("inlineCode",{parentName:"p"},"isOncogene")," field in the gene JSON object:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Description"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Reference"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Data"),(0,r.kt)("th",{parentName:"tr",align:"left"},"FusionCatcher filename"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Bushman"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://www.bushmanlab.org/links/genelists"},"bushmanlab.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"cancer_genes.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ONGENE"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.sciencedirect.com/science/article/pii/S1673852716302053"},"JGG")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://ongene.bioinfo-minzhao.org"},"bioinfo-minzhao.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"oncogenes_more.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"UniProt tumor genes"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/49/D1/D480/6006196"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.uniprot.org/downloads"},"uniprot.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tumor_genes.txt")))),(0,r.kt)("h3",{id:"germline"},"Germline"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Nirvana label"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Reference"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Data"),(0,r.kt)("th",{parentName:"tr",align:"left"},"FusionCatcher filename"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"1000 Genomes Project"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0104567"},"PLOS ONE")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"1000genomes.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Healthy (strong support)"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"banned.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Illumina Body Map 2.0"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.ebi.ac.uk/arrayexpress/experiments/E-MTAB-513"},"EBI")),(0,r.kt)("td",{parentName:"tr",align:"left"},"bodymap2.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"CACG"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.sciencedirect.com/science/article/pii/S0888754312000821"},"Genomics")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"cacg.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ConjoinG"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0013284"},"PLOS ONE")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"conjoing.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Healthy prefrontal cortex"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://bmcmedgenomics.biomedcentral.com/articles/10.1186/s12920-016-0164-y"},"BMC Medical Genomics")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE68719"},"NCBI GEO")),(0,r.kt)("td",{parentName:"tr",align:"left"},"cortex.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Duplicated Genes Database"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0050653"},"PLOS ONE")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://dgd.genouest.org/"},"genouest.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"dgd.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"GTEx healthy tissues"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://gtexportal.org/home/"},"gtexportal.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"gtex.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Healthy"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"healthy.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Human Protein Atlas"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.mcponline.org/article/S1535-9476(20)34633-8/fulltext"},"MCP")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.ebi.ac.uk/arrayexpress/experiments/E-MTAB-1733/"},"EBI")),(0,r.kt)("td",{parentName:"tr",align:"left"},"hpa.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Babiceanu non-cancer tissues"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/44/6/2859/2499453"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/44/6/2859/2499453#supplementary-data"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-cancer_tissues.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"non-tumor cell lines"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-tumor_cells.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TumorFusions normal"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/46/D1/D1144/4584571"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/46/D1/D1144/4584571#supplementary-data"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga-normal.txt")))),(0,r.kt)("h3",{id:"somatic"},"Somatic"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Nirvana label"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Reference"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Data"),(0,r.kt)("th",{parentName:"tr",align:"left"},"FusionCatcher filename"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Alaei-Mahabadi 18 cancers"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.pnas.org/content/113/48/13768.long"},"PNAS")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"18cancers.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"DepMap CCLE"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://depmap.org/portal/download/"},"depmap.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"ccle.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"CCLE Klijn"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nbt.3080"},"Nature Biotechnology")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nbt.3080#Sec27"},"Nature Biotechnology")),(0,r.kt)("td",{parentName:"tr",align:"left"},"ccle2.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"CCLE Vellichirammal"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cell.com/molecular-therapy-family/nucleic-acids/fulltext/S2162-2531(20)30058-5"},"Molecular Therapy Nucleic Acids")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"ccle3.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Cancer Genome Project"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://cancer.sanger.ac.uk/cosmic/download"},"COSMIC")),(0,r.kt)("td",{parentName:"tr",align:"left"},"cgp.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ChimerKB 4.0"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/48/D1/D817/5611671"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.kobic.re.kr/chimerdb_mirror/download"},"kobic.re.kr")),(0,r.kt)("td",{parentName:"tr",align:"left"},"chimerdb4kb.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ChimerPub 4.0"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/48/D1/D817/5611671"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.kobic.re.kr/chimerdb_mirror/download"},"kobic.re.kr")),(0,r.kt)("td",{parentName:"tr",align:"left"},"chimerdb4pub.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ChimerSeq 4.0"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/48/D1/D817/5611671"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.kobic.re.kr/chimerdb_mirror/download"},"kobic.re.kr")),(0,r.kt)("td",{parentName:"tr",align:"left"},"chimerdb4seq.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"COSMIC"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/47/D1/D941/5146192"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://cancer.sanger.ac.uk/cosmic/download"},"COSMIC")),(0,r.kt)("td",{parentName:"tr",align:"left"},"cosmic.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Bao gliomas"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://genome.cshlp.org/content/24/11/1765"},"Genome Research")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"gliomas.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Known"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"known.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Mitelman DB"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://mitelmandatabase.isb-cgc.org"},"ISB-CGC")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://storage.cloud.google.com/mitelman-data-files/prod/mitelman_db.zip"},"Google Cloud")),(0,r.kt)("td",{parentName:"tr",align:"left"},"mitelman.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TCGA oesophageal carcinomas"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nature20805"},"Nature")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"oesophagus.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Bailey pancreatic cancers"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nature16965"},"Nature")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nature16965#Sec44"},"Nature")),(0,r.kt)("td",{parentName:"tr",align:"left"},"pancreases.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"PCAWG"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://doi.org/10.1016/j.cell.2018.03.042"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://dcc.icgc.org/releases/PCAWG/transcriptome/fusion"},"ICGC")),(0,r.kt)("td",{parentName:"tr",align:"left"},"pcawg.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Robinson prostate cancers"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://doi.org/10.1016/j.cell.2015.05.001"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cell.com/cell/fulltext/S0092-8674(15)00548-6?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS0092867415005486%3Fshowall%3Dtrue#supplementaryMaterial"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},"prostate_cancer.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TCGA"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cancer.gov/about-nci/organization/ccg/research/structural-genomics/tcga"},"cancer.gov")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TumorFusions tumor"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/46/D1/D1144/4584571"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/46/D1/D1144/4584571#supplementary-data"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga-cancer.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TCGA Gao"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://doi.org/10.1016/j.celrep.2018.03.050"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cell.com/cell-reports/fulltext/S2211-1247(18)30395-4?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS2211124718303954%3Fshowall%3Dtrue#supplementaryMaterial"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga2.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TCGA Vellichirammal"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cell.com/molecular-therapy-family/nucleic-acids/fulltext/S2162-2531(20)30058-5"},"Molecular Therapy Nucleic Acids")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga3.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TICdb"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://bmcgenomics.biomedcentral.com/articles/10.1186/1471-2164-8-33"},"BMC Genomics")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://genetica.unav.edu/TICdb/allseqs_TICdb.txt"},"unav.edu")),(0,r.kt)("td",{parentName:"tr",align:"left"},"ticdb.txt")))),(0,r.kt)("h2",{id:"gene-pair-tsv-file"},"Gene Pair TSV File"),(0,r.kt)("p",null,"Most of the data files in FusionCatcher are two-column TSV files containing the Ensembl gene IDs that are paired together."),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("p",null,"Here are the first few lines of the 1000genomes.txt file:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre"},"ENSG00000006210 ENSG00000102962\nENSG00000006652 ENSG00000181016\nENSG00000014138 ENSG00000149798\nENSG00000026297 ENSG00000071242\nENSG00000035499 ENSG00000155959\nENSG00000055211 ENSG00000131013\nENSG00000055332 ENSG00000179915\nENSG00000062485 ENSG00000257727\nENSG00000065978 ENSG00000166501\nENSG00000066044 ENSG00000104980\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"In Nirvana, we will only import a gene pair if both Ensembl gene IDs are recognized from either our GRCh37 or GRCh38 cache files."),(0,r.kt)("h2",{id:"gene-tsv-file"},"Gene TSV File"),(0,r.kt)("p",null,"Some of the data files are single-column files containing Ensembl gene IDs. This is commonly used in the data files representing oncogene data sources."),(0,r.kt)("h3",{id:"example-1"},"Example"),(0,r.kt)("p",null,"Here are the first few lines of the oncogenes_more.txt file:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre"},"ENSG00000000938\nENSG00000003402\nENSG00000005469\nENSG00000005884\nENSG00000006128\nENSG00000006453\nENSG00000006468\nENSG00000007350\nENSG00000008294\nENSG00000008952\n")),(0,r.kt)("h3",{id:"parsing-1"},"Parsing"),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"FusionCatcher also uses creates custom Ensembl genes (e.g. ",(0,r.kt)("inlineCode",{parentName:"p"},"ENSG09000000002"),") to handle missing Ensembl genes. Nirvana will ignore these entries since we only include the gene IDs that are currently recognized by Nirvana."),(0,r.kt)("p",{parentName:"div"},"I suspect that these were originally RefSeq genes and if so, we can support those directly in Nirvana in the future."))),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://sourceforge.net/projects/fusioncatcher/files/data"},"https://sourceforge.net/projects/fusioncatcher/files/data")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(l.default,{mdxType:"JSON"}))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/a1f68f47.490ade5f.js b/assets/js/a1f68f47.490ade5f.js deleted file mode 100644 index 3715b3b7..00000000 --- a/assets/js/a1f68f47.490ade5f.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4836,2020],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>D});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),p=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},c=function(e){var t=p(e.components);return a.createElement(s.Provider,{value:t},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),d=p(n),u=r,D=d["".concat(s,".").concat(u)]||d[u]||m[u]||i;return n?a.createElement(D,o(o({ref:t},c),{},{components:n})):a.createElement(D,o({ref:t},c))}));function D(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[d]="string"==typeof e?e:r,o[1]=l;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/splice-ai-json",id:"version-3.14/data-sources/splice-ai-json",title:"splice-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/splice-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/splice-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/splice-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/splice-ai-json.md",tags:[],version:"3.14",frontMatter:{}},s=[],p={toc:s},c="wrapper";function d(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"spliceAI":[ \n {\n "hgnc":"BLCAP",\n "acceptorGainDistance":-3,\n "acceptorGainScore":0.3,\n "donorLossDistance":7,\n "donorLossScore":0.9\n },\n { \n "hgnc":"NNAT",\n "acceptorGainDistance":-1,\n "acceptorGainScore":0.2,\n "donorGainDistance":-2,\n "donorGainScore":0.3\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")))))}d.isMDXComponent=!0},21583:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>o,metadata:()=>s,toc:()=>p});var a=n(87462),r=(n(67294),n(3905)),i=n(54886);const o={title:"Splice AI"},l=void 0,s={unversionedId:"data-sources/splice-ai",id:"version-3.14/data-sources/splice-ai",title:"Splice AI",description:"Overview",source:"@site/versioned_docs/version-3.14/data-sources/splice-ai.mdx",sourceDirName:"data-sources",slug:"/data-sources/splice-ai",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/splice-ai",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/splice-ai.mdx",tags:[],version:"3.14",frontMatter:{title:"Splice AI"},sidebar:"version-3.14/docs",previous:{title:"REVEL",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/revel"},next:{title:"Nirvana JSON File Format",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/file-formats/nirvana-json-file-format"}},p=[{value:"Overview",id:"overview",children:[],level:2},{value:"VCF File",id:"vcf-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Pre-processing",id:"pre-processing",children:[{value:"Filtering",id:"filtering",children:[],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:p},d="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"SpliceAI, a 32-layer deep neural network, predicts splicing from a pre-mRNA sequence."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"K. Jaganathan, et al. Predicting splicing from primary sequence with deep learning. ",(0,r.kt)("em",{parentName:"p"},"Cell"),", ",(0,r.kt)("strong",{parentName:"p"},"176")," (3) (2019), pp. 535-548 e24"))),(0,r.kt)("h2",{id:"vcf-file"},"VCF File"),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},'##fileformat=VCFv4.0\n##assembly=GRCh37/hg19\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n#CHROM POS ID REF ALT QUAL FILTER INFO\n10 92946 . C T . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0000;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=-26;DP_AL=-10;DP_DG=3;DP_DL=35\n10 92946 . C G . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0008;DS_AL=0.0000;DS_DG=0.0003;DS_DL=0.0000;DP_AG=34;DP_AL=-27;DP_DG=35;DP_DL=1\n10 92946 . C A . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0004;DS_AL=0.0000;DS_DG=0.0001;DS_DL=0.0000;DP_AG=-10;DP_AL=-48;DP_DG=35;DP_DL=-21\n10 92947 . A C . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0002;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=-49;DP_AL=-11;DP_DG=0;DP_DL=34\n10 92947 . A T . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0002;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=33;DP_AL=-11;DP_DG=-22;DP_DL=34\n10 92947 . A G . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0006;DS_AL=0.0000;DS_DG=0.0001;DS_DL=0.0000;DP_AG=33;DP_AL=-11;DP_DG=34;DP_DL=32\n')),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the VCF file, we're mainly interested in the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_AG")," - \u0394 score (acceptor gain)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_AL")," - \u0394 score (acceptor loss)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_DG")," - \u0394 score (donor gain)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_DL")," - \u0394 score (donor loss)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_AG")," - \u0394 position (acceptor gain) relative to the variant position"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_AL")," - \u0394 position (acceptor loss) relative to the variant position"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_DG")," - \u0394 position (donor gain) relative to the variant position"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_DL")," - \u0394 position (donor loss) relative to the variant position")),(0,r.kt)("p",null,"The Splice AI team suggests the following interpretation for the scores:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"center"},"Range"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Confidence"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Pathogenicity"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"0 \u2264 x < 0.1"),(0,r.kt)("td",{parentName:"tr",align:"left"},"low"),(0,r.kt)("td",{parentName:"tr",align:"left"},"likely benign")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"0.1 \u2264 x \u2264 0.5"),(0,r.kt)("td",{parentName:"tr",align:"left"},"medium"),(0,r.kt)("td",{parentName:"tr",align:"left"},"likely pathogenic")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"x > 0.5"),(0,r.kt)("td",{parentName:"tr",align:"left"},"high"),(0,r.kt)("td",{parentName:"tr",align:"left"},"pathogenic")))),(0,r.kt)("h2",{id:"pre-processing"},"Pre-processing"),(0,r.kt)("h3",{id:"filtering"},"Filtering"),(0,r.kt)("p",null,"Splice AI provides a comprehensive list of entries throughout the genome. However, many of the entries have little value. I.e. observing low splice scores in intergenic regions. Not only do these extra entries require more storage, but the unused content has a negative impact on annotation speed."),(0,r.kt)("p",null,"As a result, Nirvana filters out all the values in the low confidence tier except for regions within 15 bp of nascent splice sites. For those regions, we found it useful to see if Splice AI predicted an interruption of the splicing mechanism."),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://basespace.illumina.com/s/5u6ThOblecrh"},"https://basespace.illumina.com/s/5u6ThOblecrh")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(i.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/a26ba82d.1fac3eec.js b/assets/js/a26ba82d.1fac3eec.js deleted file mode 100644 index 8c9e6e50..00000000 --- a/assets/js/a26ba82d.1fac3eec.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7706,2865],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>h});var o=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);t&&(o=o.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,o)}return n}function i(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(o=0;o=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var s=o.createContext({}),p=function(e){var t=o.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=p(e.components);return o.createElement(s.Provider,{value:t},e.children)},d="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return o.createElement(o.Fragment,{},t)}},m=o.forwardRef((function(e,t){var n=e.components,a=e.mdxType,r=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),d=p(n),m=a,h=d["".concat(s,".").concat(m)]||d[m]||u[m]||r;return n?o.createElement(h,i(i({ref:t},c),{},{components:n})):o.createElement(h,i({ref:t},c))}));function h(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var r=n.length,i=new Array(r);i[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[d]="string"==typeof e?e:a,i[1]=l;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>d,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var o=n(87462),a=(n(67294),n(3905));const r={},i=void 0,l={unversionedId:"data-sources/phylop-json",id:"data-sources/phylop-json",title:"phylop-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/phylop-json.md",sourceDirName:"data-sources",slug:"/data-sources/phylop-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/phylop-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/phylop-json.md",tags:[],version:"current",frontMatter:{}},s=[],p={toc:s},c="wrapper";function d(e){let{components:t,...n}=e;return(0,a.kt)(c,(0,o.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"variants":[\n {\n "vid":"2:48010488:A",\n "chromosome":"chr2",\n "begin":48010488,\n "end":48010488,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "phylopScore":0.459\n }\n] \n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"phylopScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: -14.08 to 6.424")))))}d.isMDXComponent=!0},91702:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>i,metadata:()=>s,toc:()=>p});var o=n(87462),a=(n(67294),n(3905)),r=n(84133);const i={title:"PhyloP"},l=void 0,s={unversionedId:"data-sources/phylop",id:"data-sources/phylop",title:"PhyloP",description:"Overview",source:"@site/docs/data-sources/phylop.mdx",sourceDirName:"data-sources",slug:"/data-sources/phylop",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/phylop",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/phylop.mdx",tags:[],version:"current",frontMatter:{title:"PhyloP"},sidebar:"docs",previous:{title:"OMIM",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/omim"},next:{title:"Primate AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/primate-ai"}},p=[{value:"Overview",id:"overview",children:[],level:2},{value:"WigFix File",id:"wigfix-file",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:p},d="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(d,(0,o.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("h2",{id:"overview"},"Overview"),(0,a.kt)("p",null,"PhyloP (phylogenetic p-values) conservation scores are obtained from the ","[PHAST package]"," (",(0,a.kt)("a",{parentName:"p",href:"http://compgen.bscb.cornell.edu/phast/"},"http://compgen.bscb.cornell.edu/phast/"),") for multiple alignments of vertebrate genomes to the human genome. For GRCh38, the multiple alignments are against 19 mammals and for GRCh37, it is against 45 vertebrate genomes."),(0,a.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,a.kt)("div",{parentName:"div",className:"admonition-heading"},(0,a.kt)("h5",{parentName:"div"},(0,a.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,a.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,a.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,a.kt)("div",{parentName:"div",className:"admonition-content"},(0,a.kt)("p",{parentName:"div"},"Siepel A, Bejerano G, Pedersen JS, Hinrichs AS, Hou M, Rosenbloom K, Clawson H, Spieth J, Hillier LW, Richards S, et al. Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. ",(0,a.kt)("strong",{parentName:"p"},"Genome Res. 2005")," Aug;15(8):1034-50. (",(0,a.kt)("a",{parentName:"p",href:"http://www.genome.org/cgi/doi/10.1101/gr.3715005"},"http://www.genome.org/cgi/doi/10.1101/gr.3715005"),")"))),(0,a.kt)("h2",{id:"wigfix-file"},"WigFix File"),(0,a.kt)("p",null,"The data is provided in WigFix files which is a text file that provides conservation scores for contiguous intervals in the following format:"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"fixedStep chrom=chr1 start=10918 step=1\n0.064\n0.058\n0.064\n0.058\n0.064\n0.064\nfixedStep chrom=chr1 start=34045 step=1\n0.111\n0.100\n0.111\n0.111\n0.100\n0.111\n0.111\n0.111\n0.100\n0.111\n-1.636\n")),(0,a.kt)("p",null,"We convert them to binary files with indexes for fast query. Note that these are scores for genomic positions and are reported only for SNVs."),(0,a.kt)("h2",{id:"download-url"},"Download URL"),(0,a.kt)("p",null,"GRCh37: ",(0,a.kt)("a",{parentName:"p",href:"http://hgdownload.cse.ucsc.edu/goldenpath/hg19/phyloP46way/vertebrate/"},"http://hgdownload.cse.ucsc.edu/goldenpath/hg19/phyloP46way/vertebrate/")),(0,a.kt)("p",null,"GRCh38: ",(0,a.kt)("a",{parentName:"p",href:"http://hgdownload.cse.ucsc.edu/goldenPath/hg38/phyloP20way/"},"http://hgdownload.cse.ucsc.edu/goldenPath/hg38/phyloP20way/")),(0,a.kt)("h2",{id:"json-output"},"JSON Output"),(0,a.kt)("p",null,"Unlike other supplemetary datasources, phyloP scores are reported in the variants section."),(0,a.kt)(r.default,{mdxType:"JSON"}))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/a26ba82d.97959d26.js b/assets/js/a26ba82d.97959d26.js new file mode 100644 index 00000000..4d5477ff --- /dev/null +++ b/assets/js/a26ba82d.97959d26.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7706,2865],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>h});var o=n(7294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);t&&(o=o.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,o)}return n}function i(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(o=0;o=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var s=o.createContext({}),p=function(e){var t=o.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=p(e.components);return o.createElement(s.Provider,{value:t},e.children)},d="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return o.createElement(o.Fragment,{},t)}},m=o.forwardRef((function(e,t){var n=e.components,a=e.mdxType,r=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),d=p(n),m=a,h=d["".concat(s,".").concat(m)]||d[m]||u[m]||r;return n?o.createElement(h,i(i({ref:t},c),{},{components:n})):o.createElement(h,i({ref:t},c))}));function h(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var r=n.length,i=new Array(r);i[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[d]="string"==typeof e?e:a,i[1]=l;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>d,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var o=n(7462),a=(n(7294),n(3905));const r={},i=void 0,l={unversionedId:"data-sources/phylop-json",id:"data-sources/phylop-json",title:"phylop-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/phylop-json.md",sourceDirName:"data-sources",slug:"/data-sources/phylop-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/phylop-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/phylop-json.md",tags:[],version:"current",frontMatter:{}},s=[],p={toc:s},c="wrapper";function d(e){let{components:t,...n}=e;return(0,a.kt)(c,(0,o.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"variants":[\n {\n "vid":"2:48010488:A",\n "chromosome":"chr2",\n "begin":48010488,\n "end":48010488,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "phylopScore":0.459\n }\n] \n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"phylopScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: -14.08 to 6.424")))))}d.isMDXComponent=!0},1702:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>i,metadata:()=>s,toc:()=>p});var o=n(7462),a=(n(7294),n(3905)),r=n(4133);const i={title:"PhyloP"},l=void 0,s={unversionedId:"data-sources/phylop",id:"data-sources/phylop",title:"PhyloP",description:"Overview",source:"@site/docs/data-sources/phylop.mdx",sourceDirName:"data-sources",slug:"/data-sources/phylop",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/phylop",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/phylop.mdx",tags:[],version:"current",frontMatter:{title:"PhyloP"},sidebar:"docs",previous:{title:"OMIM",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/omim"},next:{title:"Primate AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/primate-ai"}},p=[{value:"Overview",id:"overview",children:[],level:2},{value:"WigFix File",id:"wigfix-file",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:p},d="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(d,(0,o.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("h2",{id:"overview"},"Overview"),(0,a.kt)("p",null,"PhyloP (phylogenetic p-values) conservation scores are obtained from the ","[PHAST package]"," (",(0,a.kt)("a",{parentName:"p",href:"http://compgen.bscb.cornell.edu/phast/"},"http://compgen.bscb.cornell.edu/phast/"),") for multiple alignments of vertebrate genomes to the human genome. For GRCh38, the multiple alignments are against 19 mammals and for GRCh37, it is against 45 vertebrate genomes."),(0,a.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,a.kt)("div",{parentName:"div",className:"admonition-heading"},(0,a.kt)("h5",{parentName:"div"},(0,a.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,a.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,a.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,a.kt)("div",{parentName:"div",className:"admonition-content"},(0,a.kt)("p",{parentName:"div"},"Siepel A, Bejerano G, Pedersen JS, Hinrichs AS, Hou M, Rosenbloom K, Clawson H, Spieth J, Hillier LW, Richards S, et al. Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. ",(0,a.kt)("strong",{parentName:"p"},"Genome Res. 2005")," Aug;15(8):1034-50. (",(0,a.kt)("a",{parentName:"p",href:"http://www.genome.org/cgi/doi/10.1101/gr.3715005"},"http://www.genome.org/cgi/doi/10.1101/gr.3715005"),")"))),(0,a.kt)("h2",{id:"wigfix-file"},"WigFix File"),(0,a.kt)("p",null,"The data is provided in WigFix files which is a text file that provides conservation scores for contiguous intervals in the following format:"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"fixedStep chrom=chr1 start=10918 step=1\n0.064\n0.058\n0.064\n0.058\n0.064\n0.064\nfixedStep chrom=chr1 start=34045 step=1\n0.111\n0.100\n0.111\n0.111\n0.100\n0.111\n0.111\n0.111\n0.100\n0.111\n-1.636\n")),(0,a.kt)("p",null,"We convert them to binary files with indexes for fast query. Note that these are scores for genomic positions and are reported only for SNVs."),(0,a.kt)("h2",{id:"download-url"},"Download URL"),(0,a.kt)("p",null,"GRCh37: ",(0,a.kt)("a",{parentName:"p",href:"http://hgdownload.cse.ucsc.edu/goldenpath/hg19/phyloP46way/vertebrate/"},"http://hgdownload.cse.ucsc.edu/goldenpath/hg19/phyloP46way/vertebrate/")),(0,a.kt)("p",null,"GRCh38: ",(0,a.kt)("a",{parentName:"p",href:"http://hgdownload.cse.ucsc.edu/goldenPath/hg38/phyloP20way/"},"http://hgdownload.cse.ucsc.edu/goldenPath/hg38/phyloP20way/")),(0,a.kt)("h2",{id:"json-output"},"JSON Output"),(0,a.kt)("p",null,"Unlike other supplemetary datasources, phyloP scores are reported in the variants section."),(0,a.kt)(r.default,{mdxType:"JSON"}))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/a2ab8500.883bad01.js b/assets/js/a2ab8500.883bad01.js deleted file mode 100644 index 7de48718..00000000 --- a/assets/js/a2ab8500.883bad01.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2865],{3905:(e,t,n)=>{n.d(t,{Zo:()=>s,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var p=r.createContext({}),i=function(e){var t=r.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},s=function(e){var t=i(e.components);return r.createElement(p.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,p=e.parentName,s=c(e,["components","mdxType","originalType","parentName"]),u=i(n),d=a,f=u["".concat(p,".").concat(d)]||u[d]||m[d]||o;return n?r.createElement(f,l(l({ref:t},s),{},{components:n})):r.createElement(f,l({ref:t},s))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=d;var c={};for(var p in t)hasOwnProperty.call(t,p)&&(c[p]=t[p]);c.originalType=e,c[u]="string"==typeof e?e:a,l[1]=c;for(var i=2;i{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>o,metadata:()=>c,toc:()=>p});var r=n(87462),a=(n(67294),n(3905));const o={},l=void 0,c={unversionedId:"data-sources/phylop-json",id:"data-sources/phylop-json",title:"phylop-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/phylop-json.md",sourceDirName:"data-sources",slug:"/data-sources/phylop-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/phylop-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/phylop-json.md",tags:[],version:"current",frontMatter:{}},p=[],i={toc:p},s="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(s,(0,r.Z)({},i,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"variants":[\n {\n "vid":"2:48010488:A",\n "chromosome":"chr2",\n "begin":48010488,\n "end":48010488,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "phylopScore":0.459\n }\n] \n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"phylopScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: -14.08 to 6.424")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/a2ab8500.df00540f.js b/assets/js/a2ab8500.df00540f.js new file mode 100644 index 00000000..6c14c0b8 --- /dev/null +++ b/assets/js/a2ab8500.df00540f.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2865],{3905:(e,t,n)=>{n.d(t,{Zo:()=>s,kt:()=>f});var r=n(7294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var p=r.createContext({}),i=function(e){var t=r.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},s=function(e){var t=i(e.components);return r.createElement(p.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,p=e.parentName,s=c(e,["components","mdxType","originalType","parentName"]),u=i(n),d=a,f=u["".concat(p,".").concat(d)]||u[d]||m[d]||o;return n?r.createElement(f,l(l({ref:t},s),{},{components:n})):r.createElement(f,l({ref:t},s))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=d;var c={};for(var p in t)hasOwnProperty.call(t,p)&&(c[p]=t[p]);c.originalType=e,c[u]="string"==typeof e?e:a,l[1]=c;for(var i=2;i{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>o,metadata:()=>c,toc:()=>p});var r=n(7462),a=(n(7294),n(3905));const o={},l=void 0,c={unversionedId:"data-sources/phylop-json",id:"data-sources/phylop-json",title:"phylop-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/phylop-json.md",sourceDirName:"data-sources",slug:"/data-sources/phylop-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/phylop-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/phylop-json.md",tags:[],version:"current",frontMatter:{}},p=[],i={toc:p},s="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(s,(0,r.Z)({},i,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"variants":[\n {\n "vid":"2:48010488:A",\n "chromosome":"chr2",\n "begin":48010488,\n "end":48010488,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "phylopScore":0.459\n }\n] \n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"phylopScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: -14.08 to 6.424")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/a2f6cc85.dd8f2e1e.js b/assets/js/a2f6cc85.dd8f2e1e.js deleted file mode 100644 index e1f53a01..00000000 --- a/assets/js/a2f6cc85.dd8f2e1e.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3111,217],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>h});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),m=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},d=function(e){var t=m(e.components);return a.createElement(s.Provider,{value:t},e.children)},c="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,d=l(e,["components","mdxType","originalType","parentName"]),c=m(n),u=i,h=c["".concat(s,".").concat(u)]||c[u]||p[u]||r;return n?a.createElement(h,o(o({ref:t},d),{},{components:n})):a.createElement(h,o({ref:t},d))}));function h(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,o=new Array(r);o[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[c]="string"==typeof e?e:i,o[1]=l;for(var m=2;m{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>c,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={},o=void 0,l={unversionedId:"data-sources/cosmic-json",id:"version-3.17/data-sources/cosmic-json",title:"cosmic-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/cosmic-json.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/cosmic-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/cosmic-json.md",tags:[],version:"3.17",frontMatter:{}},s=[],m={toc:s},d="wrapper";function c(e){let{components:t,...n}=e;return(0,i.kt)(d,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},' "cosmicGeneFusions":[\n {\n "id":"COSF881",\n "numSamples":6,\n "geneSymbols":[\n "MYB",\n "NFIB"\n ],\n "hgvsr":"ENST00000341911.5(MYB):r.1_2368::ENST00000397581.2(NFIB):r.2592_3318",\n "histologies":[\n {\n "name":"adenoid cystic carcinoma",\n "numSamples":6\n }\n ],\n "sites":[\n {\n "name":"salivary gland (submandibular)",\n "numSamples":1\n },\n {\n "name":"salivary gland (parotid)",\n "numSamples":1\n },\n {\n "name":"salivary gland (nasal cavity)",\n "numSamples":1\n },\n {\n "name":"breast",\n "numSamples":3\n }\n ],\n "pubMedIds":[\n 19841262\n ]\n }\n ]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"id"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"COSMIC fusion ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"geneSymbols"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"5' gene & 3' gene")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hgvsr"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"HGVS RNA translocation fusion notation")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"histologies"),(0,i.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotypic descriptions")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"sites"),(0,i.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"tissue types")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Count")),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"name"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"description")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})))))}c.isMDXComponent=!0},44512:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>p,frontMatter:()=>o,metadata:()=>s,toc:()=>m});var a=n(87462),i=(n(67294),n(3905)),r=n(31034);const o={title:"COSMIC"},l=void 0,s={unversionedId:"data-sources/cosmic",id:"version-3.17/data-sources/cosmic",title:"COSMIC",description:"Overview",source:"@site/versioned_docs/version-3.17/data-sources/cosmic.mdx",sourceDirName:"data-sources",slug:"/data-sources/cosmic",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/cosmic",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/cosmic.mdx",tags:[],version:"3.17",frontMatter:{title:"COSMIC"},sidebar:"version-3.17/docs",previous:{title:"ClinVar",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/clinvar"},next:{title:"dbSNP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/dbsnp"}},m=[{value:"Overview",id:"overview",children:[],level:2},{value:"Gene Fusions",id:"gene-fusions",children:[{value:"TSV File",id:"tsv-file",children:[{value:"Example",id:"example",children:[],level:4},{value:"Parsing",id:"parsing",children:[],level:4},{value:"Aggregation",id:"aggregation",children:[],level:4},{value:"Fixing the HGVS RNA Notation",id:"fixing-the-hgvs-rna-notation",children:[],level:4},{value:"Aggregating Histologies",id:"aggregating-histologies",children:[],level:4},{value:"Aggregating Sites",id:"aggregating-sites",children:[],level:4}],level:3},{value:"Known Issues",id:"known-issues",children:[],level:3},{value:"Download URL",id:"download-url",children:[],level:3},{value:"JSON Output",id:"json-output",children:[],level:3}],level:2}],d={toc:m},c="wrapper";function p(e){let{components:t,...n}=e;return(0,i.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"COSMIC, the Catalogue of Somatic Mutations in Cancer, is the world's largest source of expert manually curated somatic mutation information relating to human cancers."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"John G Tate, Sally Bamford, Harry C Jubb, Zbyslaw Sondka, David M Beare, Nidhi Bindal, Harry Boutselakis, Charlotte G Cole, Celestino Creatore, Elisabeth Dawson, Peter Fish, Bhavana Harsha, Charlie Hathaway, Steve C Jupe, Chai Yin Kok, Kate Noble, Laura Ponting, Christopher C Ramshaw, Claire E Rye, Helen E Speedy, Ray Stefancsik, Sam L Thompson, Shicai Wang, Sari Ward, Peter J Campbell, Simon A Forbes. (2019) ",(0,i.kt)("a",{parentName:"p",href:"https://academic.oup.com/nar/article/47/D1/D941/5146192"},"COSMIC: the Catalogue Of Somatic Mutations In Cancer"),", ",(0,i.kt)("em",{parentName:"p"},"Nucleic Acids Research"),", Volume 47, Issue D1"))),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Licensed Content")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Commercial companies are required to ",(0,i.kt)("a",{parentName:"p",href:"https://cancer.sanger.ac.uk/cosmic/license"},"acquire a license from COSMIC"),". At the moment, this means that our COSMIC content is only available in Illumina's products and services, not in the open source distribution."),(0,i.kt)("p",{parentName:"div"},"Since many of you are academic users, we will enable a COSMIC login in our downloader later this year that will allow academic and commercial organizations (with a license) access our COSMIC data sources. "))),(0,i.kt)("h2",{id:"gene-fusions"},"Gene Fusions"),(0,i.kt)("p",null,"Gene fusions are manually curated from peer reviewed publications by expert COSMIC curators. A comprehensive literature curation is completed for each fusion pair when it is released in the database. Currently COSMIC includes information on fusions involved in solid tumours and leukaemias."),(0,i.kt)("h3",{id:"tsv-file"},"TSV File"),(0,i.kt)("h4",{id:"example"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"SAMPLE_ID SAMPLE_NAME PRIMARY_SITE SITE_SUBTYPE_1 SITE_SUBTYPE_2 SITE_SUBTYPE_3 PRIMARY_HISTOLOGY HISTOLOGY_SUBTYPE_1 HISTOLOGY_SUBTYPE_2 HISTOLOGY_SUBTYPE_3 FUSION_ID TRANSLOCATION_NAME 5'_CHROMOSOME 5'_STRAND 5'_GENE_ID 5'_GENE_NAME 5'_LAST_OBSERVED_EXON 5'_GENOME_START_FROM 5'_GENOME_START_TO 5'_GENOME_STOP_FROM 5'_GENOME_STOP_TO 3'_CHROMOSOME 3'_STRAND 3'_GENE_ID 3'_GENE_NAME 3'_FIRST_OBSERVED_EXON 3'_GENOME_START_FROM 3'_GENOME_START_TO 3'_GENOME_STOP_FROM 3'_GENOME_STOP_TO FUSION_TYPE PUBMED_PMID\n749711 HCC1187 breast NS NS NS carcinoma ductal_carcinoma NS NS 665 ENST00000360863.10(RGS22):r.1_3555_ENST00000369518.1(SYCP1):r.2100_3452 8 - 197199 RGS22 22 99981937 99981937 100106116 100106116 1 + 212470 SYCP1_ENST00000369518 24 114944339 114944339 114995367 114995367 Inferred Breakpoint 20033038\n")),(0,i.kt)("h4",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"From the TSV file, we're mainly interested in the following columns:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"SAMPLE_ID")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"PRIMARY_SITE")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"PRIMARY_HISTOLOGY")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"HISTOLOGY_SUBTYPE_1")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"FUSION_ID")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"TRANSLOCATION_NAME")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"PUBMED_PMID"))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"For all the histologies and sites, we replace all the underlines with spaces. ",(0,i.kt)("inlineCode",{parentName:"p"},"salivary_gland")," would become ",(0,i.kt)("inlineCode",{parentName:"p"},"salivary gland"),"."))),(0,i.kt)("h4",{id:"aggregation"},"Aggregation"),(0,i.kt)("p",null,"To create the gene fusion entries in Nirvana, we perform the following on each row in the TSV file:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"Group all entries by FUSION_ID"),(0,i.kt)("li",{parentName:"ul"},"Using all the entries related to this FUSION_ID:",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"Collect all the PubMed IDs"),(0,i.kt)("li",{parentName:"ul"},"Tally the number of observed sample IDs"),(0,i.kt)("li",{parentName:"ul"},"Grab the HGVS r. notation (should not change throughout the FUSION_ID)"),(0,i.kt)("li",{parentName:"ul"},"Tally the number of samples observed for each histology"),(0,i.kt)("li",{parentName:"ul"},"Tally the number of samples observed for each site"))),(0,i.kt)("li",{parentName:"ul"},"Extract the transcript IDs from the HGVS notation and lookup the associated gene symbols")),(0,i.kt)("h4",{id:"fixing-the-hgvs-rna-notation"},"Fixing the HGVS RNA Notation"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre"},"ENST00000360863.6(RGS22):r.1_3555_ENST00000369518.1(SYCP1):r.2100_3452\n")),(0,i.kt)("p",null,"There are some issues with the HGVS RNA notation:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"The two transcripts should be linked by a double colon ",(0,i.kt)("inlineCode",{parentName:"li"},"::"),"."),(0,i.kt)("li",{parentName:"ul"},"For coding transcripts, HGVS numbering should use CDS coordinates. Right now COSMIC is using cDNA coordinates for all their fusion"),(0,i.kt)("li",{parentName:"ul"},"If only the breakpoint is truly known, the recommendation is to use ",(0,i.kt)("inlineCode",{parentName:"li"},"?")," marks")),(0,i.kt)("p",null,"We chose to only update the linkage between each transcript using double colons ",(0,i.kt)("inlineCode",{parentName:"p"},"::"),". While we could have recalculated the HGVS notation using the supplied breakpoints, we chose not to because the resulting notation would be quite different from the original material. This would potentially lead to some confusion."),(0,i.kt)("h4",{id:"aggregating-histologies"},"Aggregating Histologies"),(0,i.kt)("p",null,"For histologies we want to capture the most specific description available. In the example above, we saw that the primary histology was ",(0,i.kt)("inlineCode",{parentName:"p"},"carcinoma"),", but the subtype was ",(0,i.kt)("inlineCode",{parentName:"p"},"ductal carcinoma"),". In this case we would use the subtype for the annotation."),(0,i.kt)("p",null,"COSMIC uses ",(0,i.kt)("inlineCode",{parentName:"p"},"NS")," to show that a value is empty. If the subtype is ",(0,i.kt)("inlineCode",{parentName:"p"},"NS"),", we will use the primary histology instead."),(0,i.kt)("h4",{id:"aggregating-sites"},"Aggregating Sites"),(0,i.kt)("p",null,"For sites, we observe that the subtype provides additional description but is still dependent on the primary site value. For example, the primary site might be ",(0,i.kt)("inlineCode",{parentName:"p"},"skin"),", but the subtype is ",(0,i.kt)("inlineCode",{parentName:"p"},"foot"),". Therefore, we will combine the values in the following manner: ",(0,i.kt)("inlineCode",{parentName:"p"},"skin (foot)"),"."),(0,i.kt)("h3",{id:"known-issues"},"Known Issues"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"There are some issues with the HGVS RNA notation:"),(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"The two transcripts should be linked by a double colon ",(0,i.kt)("inlineCode",{parentName:"li"},"::"),". We fixed this aspect in Nirvana."),(0,i.kt)("li",{parentName:"ul"},"For coding transcripts, HGVS numbering should use CDS coordinates. Right now COSMIC is using cDNA coordinates for all their fusions.")))),(0,i.kt)("h3",{id:"download-url"},"Download URL"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh37/cosmic/v94/CosmicFusionExport.tsv.gz"},"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh37/cosmic/v94/CosmicFusionExport.tsv.gz")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v94/CosmicFusionExport.tsv.gz"},"https://cancer.sanger.ac.uk/cosmic/file_download/GRCh38/cosmic/v94/CosmicFusionExport.tsv.gz"))),(0,i.kt)("h3",{id:"json-output"},"JSON Output"),(0,i.kt)(r.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/a3dc7984.dd8761ff.js b/assets/js/a3dc7984.dd8761ff.js deleted file mode 100644 index 17663a37..00000000 --- a/assets/js/a3dc7984.dd8761ff.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8836,6819],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>v});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},c=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),p=d(n),u=r,v=p["".concat(s,".").concat(u)]||p[u]||m[u]||i;return n?a.createElement(v,o(o({ref:t},c),{},{components:n})):a.createElement(v,o({ref:t},c))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[p]="string"==typeof e?e:r,o[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>i,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/revel-json",id:"version-3.21/data-sources/revel-json",title:"revel-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/revel-json.md",sourceDirName:"data-sources",slug:"/data-sources/revel-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/revel-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/revel-json.md",tags:[],version:"3.21",frontMatter:{}},s=[],d={toc:s},c="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"revel":{ \n "score":0.027\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"score"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1.0")))))}p.isMDXComponent=!0},80183:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>o,metadata:()=>s,toc:()=>d});var a=n(87462),r=(n(67294),n(3905)),i=n(27061);const o={title:"REVEL"},l=void 0,s={unversionedId:"data-sources/revel",id:"version-3.21/data-sources/revel",title:"REVEL",description:"Overview",source:"@site/versioned_docs/version-3.21/data-sources/revel.mdx",sourceDirName:"data-sources",slug:"/data-sources/revel",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/revel",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/revel.mdx",tags:[],version:"3.21",frontMatter:{title:"REVEL"},sidebar:"docs",previous:{title:"Primate AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/primate-ai"},next:{title:"Splice AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/splice-ai"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"CSV File",id:"csv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:d},p="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"REVEL is an ensemble method for predicting the pathogenicity of missense variants based on a combination of scores from 13 individual tools: MutPred, FATHMM v2.3, VEST 3.0, PolyPhen-2, SIFT, PROVEAN, MutationAssessor, MutationTaster, LRT, GERP++, SiPhy, phyloP, and phastCons."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Ioannidis, N. M. et al. REVEL: An Ensemble Method for Predicting the Pathogenicity of Rare Missense Variants. ",(0,r.kt)("em",{parentName:"p"},"The American Journal of Human Genetics")," ",(0,r.kt)("strong",{parentName:"p"},"99"),", 877-885 (2016). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1016/j.ajhg.2016.08.016"},"https://doi.org/10.1016/j.ajhg.2016.08.016")))),(0,r.kt)("h2",{id:"csv-file"},"CSV File"),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr,hg19_pos,grch38_pos,ref,alt,aaref,aaalt,REVEL\n1,35142,35142,G,A,T,M,0.027\n1,35142,35142,G,C,T,R,0.035\n1,35142,35142,G,T,T,K,0.043\n1,35143,35143,T,A,T,S,0.018\n1,35143,35143,T,C,T,A,0.034\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the CSV file, we're mainly interested in the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"chr")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"hg19_pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"grch38_pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"ref")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"alt")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"REVEL"))),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Sorting")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Since the input file contains positions for both GRCh37 and GRCh38, we split it into two ",(0,r.kt)("strong",{parentName:"p"},"TSV")," files (for the sake of better readability) with identical format. The positions for GRCh37 were sorted but not for GRCh38. So we re-sort the variants by position in the GRCh38 file."))),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Conflicting Scores")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"When there are multiple scores available for the same variant (i.e. the same position with the same alternative allele), we pick the highest score."))),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://sites.google.com/site/revelgenomics/downloads"},"https://sites.google.com/site/revelgenomics/downloads")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(i.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/a5e136a1.3a72f7d4.js b/assets/js/a5e136a1.3a72f7d4.js deleted file mode 100644 index def72d09..00000000 --- a/assets/js/a5e136a1.3a72f7d4.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8111],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>v});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var c=a.createContext({}),s=function(e){var t=a.useContext(c),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},p=function(e){var t=s(e.components);return a.createElement(c.Provider,{value:t},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,l=e.originalType,c=e.parentName,p=o(e,["components","mdxType","originalType","parentName"]),m=s(n),u=r,v=m["".concat(c,".").concat(u)]||m[u]||d[u]||l;return n?a.createElement(v,i(i({ref:t},p),{},{components:n})):a.createElement(v,i({ref:t},p))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=n.length,i=new Array(l);i[0]=u;var o={};for(var c in t)hasOwnProperty.call(t,c)&&(o[c]=t[c]);o.originalType=e,o[m]="string"==typeof e?e:r,i[1]=o;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>c});var a=n(87462),r=(n(67294),n(3905));const l={title:"Variant IDs"},i=void 0,o={unversionedId:"core-functionality/variant-ids",id:"core-functionality/variant-ids",title:"Variant IDs",description:"Overview",source:"@site/docs/core-functionality/variant-ids.md",sourceDirName:"core-functionality",slug:"/core-functionality/variant-ids",permalink:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/variant-ids",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/core-functionality/variant-ids.md",tags:[],version:"current",frontMatter:{title:"Variant IDs"},sidebar:"docs",previous:{title:"Gene Fusion Detection",permalink:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/gene-fusions"},next:{title:"Jasix",permalink:"/IlluminaConnectedAnnotationsDocumentation/utilities/jasix"}},c=[{value:"Overview",id:"overview",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF Examples",id:"vcf-examples",children:[],level:3},{value:"Format",id:"format",children:[],level:3},{value:"VID Examples",id:"vid-examples",children:[],level:3}],level:2},{value:"Translocation Breakends",id:"translocation-breakends",children:[{value:"VCF Example",id:"vcf-example",children:[],level:3},{value:"Format",id:"format-1",children:[],level:3},{value:"VID Example",id:"vid-example",children:[],level:3}],level:2},{value:"All Other Structural Variants",id:"all-other-structural-variants",children:[{value:"VCF Examples",id:"vcf-examples-1",children:[],level:3},{value:"Format",id:"format-2",children:[],level:3},{value:"VID Examples",id:"vid-examples-1",children:[],level:3}],level:2}],s={toc:c},p="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"Many downstream tools use a variant identifier to store annotation results. We've standardized on using variant identifiers (VIDs) that originated from the notation used by the Broad Institute."),(0,r.kt)("p",null,"The Broad VID scheme is not only simple, but it has the advantage that a user could create a bare bones VCF entry from the information captured in the identifier. One of the limitations of the Broad VID scheme is that it does not define how to handle structural variants. Our VID scheme attempts to fill that gap."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Conventions")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("ul",{parentName:"div"},(0,r.kt)("li",{parentName:"ul"},"all chromosomes use Ensembl style notation (i.e. 22 instead of chr22)"),(0,r.kt)("li",{parentName:"ul"},"for a reference variant (i.e. no alt allele), replace the period (.) with the reference base"),(0,r.kt)("li",{parentName:"ul"},"padding bases are used, neither the reference nor alternate allele can be empty"),(0,r.kt)("li",{parentName:"ul"},"some large variant callers lazily output N for the reference allele. If this is the case, replace it with the true reference base")))),(0,r.kt)("h2",{id:"small-variants"},"Small Variants"),(0,r.kt)("h3",{id:"vcf-examples"},"VCF Examples"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 66507 . T A 184.45 PASS .\nchr1 66521 . T TATATA 144.53 PASS .\nchr1 66572 . GTA G,GTACTATATATTATA 45.45 PASS .\n")),(0,r.kt)("h3",{id:"format"},"Format"),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},"chromosome"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"reference allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"alternate allele")),(0,r.kt)("h3",{id:"vid-examples"},"VID Examples"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"1-66507-T-A"),(0,r.kt)("li",{parentName:"ul"},"1-66521-T-TATATA"),(0,r.kt)("li",{parentName:"ul"},"1-66572-GTA-G"),(0,r.kt)("li",{parentName:"ul"},"1-66572-G-GTACTATATATTA")),(0,r.kt)("h2",{id:"translocation-breakends"},"Translocation Breakends"),(0,r.kt)("h3",{id:"vcf-example"},"VCF Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 2617277 . A AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[ . PASS SVTYPE=BND\n")),(0,r.kt)("h3",{id:"format-1"},"Format"),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},"chromosome"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"reference allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"alternate allele")),(0,r.kt)("h3",{id:"vid-example"},"VID Example"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"1-2617277-A-AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[")),(0,r.kt)("h2",{id:"all-other-structural-variants"},"All Other Structural Variants"),(0,r.kt)("h3",{id:"vcf-examples-1"},"VCF Examples"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 1000 . G . PASS END=3001000;SVTYPE=ROH\nchr1 1350082 . G . PASS END=1351320;SVTYPE=DEL\nchr1 1477854 . C . PASS END=1477984;SVTYPE=DUP\nchr1 1477968 . T . PASS END=1477968;SVTYPE=INS\nchr1 1715898 . N . PASS SVTYPE=CNV;END=1750149\nchr1 2650426 . N . PASS SVTYPE=CNV;END=2653074\nchr2 321682 . T . PASS SVTYPE=INV;END=421681\nchr20 2633403 . G . PASS END=2633421\n")),(0,r.kt)("h3",{id:"format-2"},"Format"),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},"chromosome"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"end position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"reference allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"alternate allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"SVTYPE")),(0,r.kt)("h3",{id:"vid-examples-1"},"VID Examples"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"1-1000-3001000-G-","<","ROH",">","-ROH"),(0,r.kt)("li",{parentName:"ul"},"1-1350082-1351320-G-","<","DEL",">","-DEL"),(0,r.kt)("li",{parentName:"ul"},"1-1477854-1477984-C-","<","DUP:TANDEM",">","-DUP"),(0,r.kt)("li",{parentName:"ul"},"1-1477968-1477968-T-","<","INS",">","-INS"),(0,r.kt)("li",{parentName:"ul"},"1-1715898-1750149-A-","<","DUP",">","-CNV ",(0,r.kt)("strong",{parentName:"li"},"(replace the N with A)")),(0,r.kt)("li",{parentName:"ul"},"1-2650426-2653074-N-","<","DEL",">","-CNV ",(0,r.kt)("strong",{parentName:"li"},"(keep the N)")),(0,r.kt)("li",{parentName:"ul"},"2-321682-421681-T-","<","INV",">","-INV"),(0,r.kt)("li",{parentName:"ul"},"20-2633403-2633421-G-","<","STR2",">","-STR")))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/a5e136a1.c7e5c6d7.js b/assets/js/a5e136a1.c7e5c6d7.js new file mode 100644 index 00000000..e1f1a983 --- /dev/null +++ b/assets/js/a5e136a1.c7e5c6d7.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8111],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>v});var a=n(7294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var c=a.createContext({}),s=function(e){var t=a.useContext(c),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},p=function(e){var t=s(e.components);return a.createElement(c.Provider,{value:t},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,l=e.originalType,c=e.parentName,p=o(e,["components","mdxType","originalType","parentName"]),m=s(n),u=r,v=m["".concat(c,".").concat(u)]||m[u]||d[u]||l;return n?a.createElement(v,i(i({ref:t},p),{},{components:n})):a.createElement(v,i({ref:t},p))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=n.length,i=new Array(l);i[0]=u;var o={};for(var c in t)hasOwnProperty.call(t,c)&&(o[c]=t[c]);o.originalType=e,o[m]="string"==typeof e?e:r,i[1]=o;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>c});var a=n(7462),r=(n(7294),n(3905));const l={title:"Variant IDs"},i=void 0,o={unversionedId:"core-functionality/variant-ids",id:"core-functionality/variant-ids",title:"Variant IDs",description:"Overview",source:"@site/docs/core-functionality/variant-ids.md",sourceDirName:"core-functionality",slug:"/core-functionality/variant-ids",permalink:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/variant-ids",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/core-functionality/variant-ids.md",tags:[],version:"current",frontMatter:{title:"Variant IDs"},sidebar:"docs",previous:{title:"Gene Fusion Detection",permalink:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/gene-fusions"},next:{title:"Jasix",permalink:"/IlluminaConnectedAnnotationsDocumentation/utilities/jasix"}},c=[{value:"Overview",id:"overview",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF Examples",id:"vcf-examples",children:[],level:3},{value:"Format",id:"format",children:[],level:3},{value:"VID Examples",id:"vid-examples",children:[],level:3}],level:2},{value:"Translocation Breakends",id:"translocation-breakends",children:[{value:"VCF Example",id:"vcf-example",children:[],level:3},{value:"Format",id:"format-1",children:[],level:3},{value:"VID Example",id:"vid-example",children:[],level:3}],level:2},{value:"All Other Structural Variants",id:"all-other-structural-variants",children:[{value:"VCF Examples",id:"vcf-examples-1",children:[],level:3},{value:"Format",id:"format-2",children:[],level:3},{value:"VID Examples",id:"vid-examples-1",children:[],level:3}],level:2}],s={toc:c},p="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"Many downstream tools use a variant identifier to store annotation results. We've standardized on using variant identifiers (VIDs) that originated from the notation used by the Broad Institute."),(0,r.kt)("p",null,"The Broad VID scheme is not only simple, but it has the advantage that a user could create a bare bones VCF entry from the information captured in the identifier. One of the limitations of the Broad VID scheme is that it does not define how to handle structural variants. Our VID scheme attempts to fill that gap."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Conventions")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("ul",{parentName:"div"},(0,r.kt)("li",{parentName:"ul"},"all chromosomes use Ensembl style notation (i.e. 22 instead of chr22)"),(0,r.kt)("li",{parentName:"ul"},"for a reference variant (i.e. no alt allele), replace the period (.) with the reference base"),(0,r.kt)("li",{parentName:"ul"},"padding bases are used, neither the reference nor alternate allele can be empty"),(0,r.kt)("li",{parentName:"ul"},"some large variant callers lazily output N for the reference allele. If this is the case, replace it with the true reference base")))),(0,r.kt)("h2",{id:"small-variants"},"Small Variants"),(0,r.kt)("h3",{id:"vcf-examples"},"VCF Examples"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 66507 . T A 184.45 PASS .\nchr1 66521 . T TATATA 144.53 PASS .\nchr1 66572 . GTA G,GTACTATATATTATA 45.45 PASS .\n")),(0,r.kt)("h3",{id:"format"},"Format"),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},"chromosome"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"reference allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"alternate allele")),(0,r.kt)("h3",{id:"vid-examples"},"VID Examples"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"1-66507-T-A"),(0,r.kt)("li",{parentName:"ul"},"1-66521-T-TATATA"),(0,r.kt)("li",{parentName:"ul"},"1-66572-GTA-G"),(0,r.kt)("li",{parentName:"ul"},"1-66572-G-GTACTATATATTA")),(0,r.kt)("h2",{id:"translocation-breakends"},"Translocation Breakends"),(0,r.kt)("h3",{id:"vcf-example"},"VCF Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 2617277 . A AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[ . PASS SVTYPE=BND\n")),(0,r.kt)("h3",{id:"format-1"},"Format"),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},"chromosome"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"reference allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"alternate allele")),(0,r.kt)("h3",{id:"vid-example"},"VID Example"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"1-2617277-A-AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[")),(0,r.kt)("h2",{id:"all-other-structural-variants"},"All Other Structural Variants"),(0,r.kt)("h3",{id:"vcf-examples-1"},"VCF Examples"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 1000 . G . PASS END=3001000;SVTYPE=ROH\nchr1 1350082 . G . PASS END=1351320;SVTYPE=DEL\nchr1 1477854 . C . PASS END=1477984;SVTYPE=DUP\nchr1 1477968 . T . PASS END=1477968;SVTYPE=INS\nchr1 1715898 . N . PASS SVTYPE=CNV;END=1750149\nchr1 2650426 . N . PASS SVTYPE=CNV;END=2653074\nchr2 321682 . T . PASS SVTYPE=INV;END=421681\nchr20 2633403 . G . PASS END=2633421\n")),(0,r.kt)("h3",{id:"format-2"},"Format"),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},"chromosome"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"end position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"reference allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"alternate allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"SVTYPE")),(0,r.kt)("h3",{id:"vid-examples-1"},"VID Examples"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"1-1000-3001000-G-","<","ROH",">","-ROH"),(0,r.kt)("li",{parentName:"ul"},"1-1350082-1351320-G-","<","DEL",">","-DEL"),(0,r.kt)("li",{parentName:"ul"},"1-1477854-1477984-C-","<","DUP:TANDEM",">","-DUP"),(0,r.kt)("li",{parentName:"ul"},"1-1477968-1477968-T-","<","INS",">","-INS"),(0,r.kt)("li",{parentName:"ul"},"1-1715898-1750149-A-","<","DUP",">","-CNV ",(0,r.kt)("strong",{parentName:"li"},"(replace the N with A)")),(0,r.kt)("li",{parentName:"ul"},"1-2650426-2653074-N-","<","DEL",">","-CNV ",(0,r.kt)("strong",{parentName:"li"},"(keep the N)")),(0,r.kt)("li",{parentName:"ul"},"2-321682-421681-T-","<","INV",">","-INV"),(0,r.kt)("li",{parentName:"ul"},"20-2633403-2633421-G-","<","STR2",">","-STR")))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/a7cb00a1.230487e4.js b/assets/js/a7cb00a1.230487e4.js deleted file mode 100644 index b255a99b..00000000 --- a/assets/js/a7cb00a1.230487e4.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4669],{95077:e=>{e.exports=JSON.parse('{"pluginId":"default","version":"3.14","label":"3.14","banner":"unmaintained","badge":true,"className":"docs-version-3.14","isLast":false,"docsSidebars":{"version-3.14/docs":[{"collapsed":true,"type":"category","label":"Introduction","items":[{"type":"link","label":"Introduction","href":"/IlluminaConnectedAnnotationsDocumentation/3.14/","docId":"introduction/introduction"},{"type":"link","label":"Dependencies","href":"/IlluminaConnectedAnnotationsDocumentation/3.14/introduction/dependencies","docId":"introduction/dependencies"},{"type":"link","label":"Getting Started","href":"/IlluminaConnectedAnnotationsDocumentation/3.14/introduction/getting-started","docId":"introduction/getting-started"},{"type":"link","label":"Annotating COVID-19","href":"/IlluminaConnectedAnnotationsDocumentation/3.14/introduction/covid19","docId":"introduction/covid19"}],"collapsible":true},{"collapsed":true,"type":"category","label":"Data Sources","items":[{"type":"link","label":"1000 Genomes","href":"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/1000Genomes","docId":"data-sources/1000Genomes"},{"type":"link","label":"ClinVar","href":"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/clinvar","docId":"data-sources/clinvar"},{"type":"link","label":"dbSNP","href":"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/dbsnp","docId":"data-sources/dbsnp"},{"type":"link","label":"gnomAD","href":"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/gnomad","docId":"data-sources/gnomad"},{"type":"link","label":"Mitochondrial Heteroplasmy","href":"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/mito-heteroplasmy","docId":"data-sources/mito-heteroplasmy"},{"type":"link","label":"MITOMAP","href":"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/mitomap","docId":"data-sources/mitomap"},{"type":"link","label":"OMIM","href":"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/omim","docId":"data-sources/omim"},{"type":"link","label":"Primate AI","href":"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/primate-ai","docId":"data-sources/primate-ai"},{"type":"link","label":"PhyloP","href":"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/phylop","docId":"data-sources/phylop"},{"type":"link","label":"REVEL","href":"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/revel","docId":"data-sources/revel"},{"type":"link","label":"Splice AI","href":"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/splice-ai","docId":"data-sources/splice-ai"}],"collapsible":true},{"collapsed":true,"type":"category","label":"File Formats","items":[{"type":"link","label":"Nirvana JSON File Format","href":"/IlluminaConnectedAnnotationsDocumentation/3.14/file-formats/nirvana-json-file-format","docId":"file-formats/nirvana-json-file-format"},{"type":"link","label":"Custom Annotations","href":"/IlluminaConnectedAnnotationsDocumentation/3.14/file-formats/custom-annotations","docId":"file-formats/custom-annotations"}],"collapsible":true},{"collapsed":true,"type":"category","label":"Core Functionality","items":[{"type":"link","label":"Variant IDs","href":"/IlluminaConnectedAnnotationsDocumentation/3.14/core-functionality/variant-ids","docId":"core-functionality/variant-ids"},{"type":"link","label":"Gene Fusion Detection","href":"/IlluminaConnectedAnnotationsDocumentation/3.14/core-functionality/gene-fusions","docId":"core-functionality/gene-fusions"}],"collapsible":true}]},"docs":{"core-functionality/gene-fusions":{"id":"core-functionality/gene-fusions","title":"Gene Fusion Detection","description":"Overview","sidebar":"version-3.14/docs"},"core-functionality/variant-ids":{"id":"core-functionality/variant-ids","title":"Variant IDs","description":"Overview","sidebar":"version-3.14/docs"},"data-sources/1000Genomes":{"id":"data-sources/1000Genomes","title":"1000 Genomes","description":"Overview","sidebar":"version-3.14/docs"},"data-sources/1000Genomes-snv-json":{"id":"data-sources/1000Genomes-snv-json","title":"1000Genomes-snv-json","description":"| Field | Type | Notes |"},"data-sources/1000Genomes-sv-json":{"id":"data-sources/1000Genomes-sv-json","title":"1000Genomes-sv-json","description":"| Field | Type | Notes |"},"data-sources/clinvar":{"id":"data-sources/clinvar","title":"ClinVar","description":"Overview","sidebar":"version-3.14/docs"},"data-sources/clinvar-json":{"id":"data-sources/clinvar-json","title":"clinvar-json","description":"| Field | Type | Notes |"},"data-sources/dbsnp":{"id":"data-sources/dbsnp","title":"dbSNP","description":"Overview","sidebar":"version-3.14/docs"},"data-sources/dbsnp-json":{"id":"data-sources/dbsnp-json","title":"dbsnp-json","description":"| Field | Type | Notes |"},"data-sources/gnomad":{"id":"data-sources/gnomad","title":"gnomAD","description":"Overview","sidebar":"version-3.14/docs"},"data-sources/gnomad-lof-json":{"id":"data-sources/gnomad-lof-json","title":"gnomad-lof-json","description":"| Field | Type | Notes |"},"data-sources/gnomad-small-variants-json":{"id":"data-sources/gnomad-small-variants-json","title":"gnomad-small-variants-json","description":"| Field | Type | Notes |"},"data-sources/mito-heteroplasmy":{"id":"data-sources/mito-heteroplasmy","title":"Mitochondrial Heteroplasmy","description":"Overview","sidebar":"version-3.14/docs"},"data-sources/mitomap":{"id":"data-sources/mitomap","title":"MITOMAP","description":"Overview","sidebar":"version-3.14/docs"},"data-sources/mitomap-small-variants-json":{"id":"data-sources/mitomap-small-variants-json","title":"mitomap-small-variants-json","description":"| Field | Type | Notes |"},"data-sources/mitomap-structural-variants-json":{"id":"data-sources/mitomap-structural-variants-json","title":"mitomap-structural-variants-json","description":"| Field | Type | Notes |"},"data-sources/omim":{"id":"data-sources/omim","title":"OMIM","description":"Overview","sidebar":"version-3.14/docs"},"data-sources/omim-json":{"id":"data-sources/omim-json","title":"omim-json","description":"| Field | Type | Notes |"},"data-sources/phylop":{"id":"data-sources/phylop","title":"PhyloP","description":"Overview","sidebar":"version-3.14/docs"},"data-sources/phylop-json":{"id":"data-sources/phylop-json","title":"phylop-json","description":"| Field | Type | Notes |"},"data-sources/primate-ai":{"id":"data-sources/primate-ai","title":"Primate AI","description":"Overview","sidebar":"version-3.14/docs"},"data-sources/primate-ai-json":{"id":"data-sources/primate-ai-json","title":"primate-ai-json","description":"| Field | Type | Notes |"},"data-sources/revel":{"id":"data-sources/revel","title":"REVEL","description":"Overview","sidebar":"version-3.14/docs"},"data-sources/revel-json":{"id":"data-sources/revel-json","title":"revel-json","description":"| Field | Type | Notes |"},"data-sources/splice-ai":{"id":"data-sources/splice-ai","title":"Splice AI","description":"Overview","sidebar":"version-3.14/docs"},"data-sources/splice-ai-json":{"id":"data-sources/splice-ai-json","title":"splice-ai-json","description":"| Field | Type | Notes |"},"file-formats/custom-annotations":{"id":"file-formats/custom-annotations","title":"Custom Annotations","description":"Overview","sidebar":"version-3.14/docs"},"file-formats/nirvana-json-file-format":{"id":"file-formats/nirvana-json-file-format","title":"Nirvana JSON File Format","description":"Overview","sidebar":"version-3.14/docs"},"introduction/covid19":{"id":"introduction/covid19","title":"Annotating COVID-19","description":"The Nirvana development team is mainly focused on providing annotations for the human genome. This focus allows us to maximize our resources towards understanding human health.","sidebar":"version-3.14/docs"},"introduction/dependencies":{"id":"introduction/dependencies","title":"Dependencies","description":"All of the following dependencies have been included in this repository.","sidebar":"version-3.14/docs"},"introduction/getting-started":{"id":"introduction/getting-started","title":"Getting Started","description":"Nirvana is written in C# using .NET Core (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files.","sidebar":"version-3.14/docs"},"introduction/introduction":{"id":"introduction/introduction","title":"Introduction","description":"Clinical-grade variant annotation","sidebar":"version-3.14/docs"}}}')}}]); \ No newline at end of file diff --git a/assets/js/a7dbeff9.2cfc9501.js b/assets/js/a7dbeff9.2cfc9501.js deleted file mode 100644 index 83c203f4..00000000 --- a/assets/js/a7dbeff9.2cfc9501.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9111,1212],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>D});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var p=a.createContext({}),s=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},c=function(e){var t=s(e.components);return a.createElement(p.Provider,{value:t},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,p=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),d=s(n),u=r,D=d["".concat(p,".").concat(u)]||d[u]||m[u]||i;return n?a.createElement(D,o(o({ref:t},c),{},{components:n})):a.createElement(D,o({ref:t},c))}));function D(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var p in t)hasOwnProperty.call(t,p)&&(l[p]=t[p]);l.originalType=e,l[d]="string"==typeof e?e:r,o[1]=l;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>l,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/splice-ai-json",id:"version-3.16/data-sources/splice-ai-json",title:"splice-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/splice-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/splice-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/splice-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/splice-ai-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],s={toc:p},c="wrapper";function d(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"spliceAI":[ \n {\n "hgnc":"BLCAP",\n "acceptorGainDistance":-3,\n "acceptorGainScore":0.3,\n "donorLossDistance":7,\n "donorLossScore":0.9\n },\n { \n "hgnc":"NNAT",\n "acceptorGainDistance":-1,\n "acceptorGainScore":0.2,\n "donorGainDistance":-2,\n "donorGainScore":0.3\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")))))}d.isMDXComponent=!0},13571:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>o,metadata:()=>p,toc:()=>s});var a=n(87462),r=(n(67294),n(3905)),i=n(94791);const o={title:"Splice AI"},l=void 0,p={unversionedId:"data-sources/splice-ai",id:"version-3.16/data-sources/splice-ai",title:"Splice AI",description:"Overview",source:"@site/versioned_docs/version-3.16/data-sources/splice-ai.mdx",sourceDirName:"data-sources",slug:"/data-sources/splice-ai",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/splice-ai",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/splice-ai.mdx",tags:[],version:"3.16",frontMatter:{title:"Splice AI"},sidebar:"version-3.16/docs",previous:{title:"REVEL",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/revel"},next:{title:"TOPMed",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/topmed"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"VCF File",id:"vcf-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Pre-processing",id:"pre-processing",children:[{value:"Filtering",id:"filtering",children:[],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:s},d="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"SpliceAI, a 32-layer deep neural network, predicts splicing from a pre-mRNA sequence."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"K. Jaganathan, et al. Predicting splicing from primary sequence with deep learning. ",(0,r.kt)("em",{parentName:"p"},"Cell"),", ",(0,r.kt)("strong",{parentName:"p"},"176")," (3) (2019), pp. 535-548 e24"))),(0,r.kt)("h2",{id:"vcf-file"},"VCF File"),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},'##fileformat=VCFv4.0\n##assembly=GRCh37/hg19\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n#CHROM POS ID REF ALT QUAL FILTER INFO\n10 92946 . C T . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0000;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=-26;DP_AL=-10;DP_DG=3;DP_DL=35\n10 92946 . C G . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0008;DS_AL=0.0000;DS_DG=0.0003;DS_DL=0.0000;DP_AG=34;DP_AL=-27;DP_DG=35;DP_DL=1\n10 92946 . C A . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0004;DS_AL=0.0000;DS_DG=0.0001;DS_DL=0.0000;DP_AG=-10;DP_AL=-48;DP_DG=35;DP_DL=-21\n10 92947 . A C . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0002;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=-49;DP_AL=-11;DP_DG=0;DP_DL=34\n10 92947 . A T . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0002;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=33;DP_AL=-11;DP_DG=-22;DP_DL=34\n10 92947 . A G . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0006;DS_AL=0.0000;DS_DG=0.0001;DS_DL=0.0000;DP_AG=33;DP_AL=-11;DP_DG=34;DP_DL=32\n')),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the VCF file, we're mainly interested in the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_AG")," - \u0394 score (acceptor gain)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_AL")," - \u0394 score (acceptor loss)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_DG")," - \u0394 score (donor gain)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_DL")," - \u0394 score (donor loss)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_AG")," - \u0394 position (acceptor gain) relative to the variant position"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_AL")," - \u0394 position (acceptor loss) relative to the variant position"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_DG")," - \u0394 position (donor gain) relative to the variant position"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_DL")," - \u0394 position (donor loss) relative to the variant position")),(0,r.kt)("p",null,"The Splice AI team suggests the following interpretation for the scores:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"center"},"Range"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Confidence"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Pathogenicity"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"0 \u2264 x < 0.1"),(0,r.kt)("td",{parentName:"tr",align:"left"},"low"),(0,r.kt)("td",{parentName:"tr",align:"left"},"likely benign")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"0.1 \u2264 x \u2264 0.5"),(0,r.kt)("td",{parentName:"tr",align:"left"},"medium"),(0,r.kt)("td",{parentName:"tr",align:"left"},"likely pathogenic")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"x > 0.5"),(0,r.kt)("td",{parentName:"tr",align:"left"},"high"),(0,r.kt)("td",{parentName:"tr",align:"left"},"pathogenic")))),(0,r.kt)("h2",{id:"pre-processing"},"Pre-processing"),(0,r.kt)("h3",{id:"filtering"},"Filtering"),(0,r.kt)("p",null,"Splice AI provides a comprehensive list of entries throughout the genome. However, many of the entries have little value. I.e. observing low splice scores in intergenic regions. Not only do these extra entries require more storage, but the unused content has a negative impact on annotation speed."),(0,r.kt)("p",null,"As a result, Nirvana filters out all the values in the low confidence tier except for regions within 15 bp of nascent splice sites. For those regions, we found it useful to see if Splice AI predicted an interruption of the splicing mechanism."),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://basespace.illumina.com/s/5u6ThOblecrh"},"https://basespace.illumina.com/s/5u6ThOblecrh")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(i.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/422d1fa8.0aaa1497.js b/assets/js/a8504dcf.703a4eba.js similarity index 66% rename from assets/js/422d1fa8.0aaa1497.js rename to assets/js/a8504dcf.703a4eba.js index eed4d14d..08fddf2e 100644 --- a/assets/js/422d1fa8.0aaa1497.js +++ b/assets/js/a8504dcf.703a4eba.js @@ -1 +1 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[833],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function i(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var s=r.createContext({}),l=function(e){var t=r.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},p=function(e){var t=l(e.components);return r.createElement(s.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,s=e.parentName,p=c(e,["components","mdxType","originalType","parentName"]),u=l(n),m=a,f=u["".concat(s,".").concat(m)]||u[m]||d[m]||o;return n?r.createElement(f,i(i({ref:t},p),{},{components:n})):r.createElement(f,i({ref:t},p))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,i=new Array(o);i[0]=m;var c={};for(var s in t)hasOwnProperty.call(t,s)&&(c[s]=t[s]);c.originalType=e,c[u]="string"==typeof e?e:a,i[1]=c;for(var l=2;l{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>c,toc:()=>s});var r=n(87462),a=(n(67294),n(3905));const o={},i=void 0,c={unversionedId:"data-sources/amino-acid-conservation-json",id:"version-3.21/data-sources/amino-acid-conservation-json",title:"amino-acid-conservation-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/amino-acid-conservation-json.md",sourceDirName:"data-sources",slug:"/data-sources/amino-acid-conservation-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/amino-acid-conservation-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/amino-acid-conservation-json.md",tags:[],version:"3.21",frontMatter:{}},s=[],l={toc:s},p="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},l,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"aminoAcidConservation": {\n "scores": [0.34]\n} \n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"aminoAcidConservation"),(0,a.kt)("td",{parentName:"tr",align:"center"},"object"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"scores"),(0,a.kt)("td",{parentName:"tr",align:"center"},"object array of doubles"),(0,a.kt)("td",{parentName:"tr",align:"left"},"percent conserved with respect to human amino acid residue. Range: 0.01 - 1.00")))))}u.isMDXComponent=!0}}]); \ No newline at end of file +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1633],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>f});var r=n(7294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function i(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var s=r.createContext({}),l=function(e){var t=r.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},p=function(e){var t=l(e.components);return r.createElement(s.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,s=e.parentName,p=c(e,["components","mdxType","originalType","parentName"]),u=l(n),d=a,f=u["".concat(s,".").concat(d)]||u[d]||m[d]||o;return n?r.createElement(f,i(i({ref:t},p),{},{components:n})):r.createElement(f,i({ref:t},p))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,i=new Array(o);i[0]=d;var c={};for(var s in t)hasOwnProperty.call(t,s)&&(c[s]=t[s]);c.originalType=e,c[u]="string"==typeof e?e:a,i[1]=c;for(var l=2;l{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>c,toc:()=>s});var r=n(7462),a=(n(7294),n(3905));const o={},i=void 0,c={unversionedId:"data-sources/amino-acid-conservation-json",id:"data-sources/amino-acid-conservation-json",title:"amino-acid-conservation-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/amino-acid-conservation-json.md",sourceDirName:"data-sources",slug:"/data-sources/amino-acid-conservation-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/amino-acid-conservation-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/amino-acid-conservation-json.md",tags:[],version:"current",frontMatter:{}},s=[],l={toc:s},p="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},l,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"aminoAcidConservation": {\n "scores": [0.34]\n} \n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"aminoAcidConservation"),(0,a.kt)("td",{parentName:"tr",align:"center"},"object"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"scores"),(0,a.kt)("td",{parentName:"tr",align:"center"},"object array of doubles"),(0,a.kt)("td",{parentName:"tr",align:"left"},"percent conserved with respect to human amino acid residue. Range: 0.01 - 1.00")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/a8504dcf.c732e17f.js b/assets/js/a8504dcf.c732e17f.js deleted file mode 100644 index 94705950..00000000 --- a/assets/js/a8504dcf.c732e17f.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1633],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function i(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var s=r.createContext({}),l=function(e){var t=r.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},p=function(e){var t=l(e.components);return r.createElement(s.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,s=e.parentName,p=c(e,["components","mdxType","originalType","parentName"]),u=l(n),d=a,f=u["".concat(s,".").concat(d)]||u[d]||m[d]||o;return n?r.createElement(f,i(i({ref:t},p),{},{components:n})):r.createElement(f,i({ref:t},p))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,i=new Array(o);i[0]=d;var c={};for(var s in t)hasOwnProperty.call(t,s)&&(c[s]=t[s]);c.originalType=e,c[u]="string"==typeof e?e:a,i[1]=c;for(var l=2;l{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>c,toc:()=>s});var r=n(87462),a=(n(67294),n(3905));const o={},i=void 0,c={unversionedId:"data-sources/amino-acid-conservation-json",id:"data-sources/amino-acid-conservation-json",title:"amino-acid-conservation-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/amino-acid-conservation-json.md",sourceDirName:"data-sources",slug:"/data-sources/amino-acid-conservation-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/amino-acid-conservation-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/amino-acid-conservation-json.md",tags:[],version:"current",frontMatter:{}},s=[],l={toc:s},p="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},l,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"aminoAcidConservation": {\n "scores": [0.34]\n} \n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"aminoAcidConservation"),(0,a.kt)("td",{parentName:"tr",align:"center"},"object"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"scores"),(0,a.kt)("td",{parentName:"tr",align:"center"},"object array of doubles"),(0,a.kt)("td",{parentName:"tr",align:"left"},"percent conserved with respect to human amino acid residue. Range: 0.01 - 1.00")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/263e87b6.399f3f87.js b/assets/js/a8da062f.41457be8.js similarity index 59% rename from assets/js/263e87b6.399f3f87.js rename to assets/js/a8da062f.41457be8.js index ce06a2e7..ab57e3a1 100644 --- a/assets/js/263e87b6.399f3f87.js +++ b/assets/js/a8da062f.41457be8.js @@ -1 +1 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[525],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function s(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var i=r.createContext({}),l=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):s(s({},t),e)),n},p=function(e){var t=l(e.components);return r.createElement(i.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,p=c(e,["components","mdxType","originalType","parentName"]),u=l(n),m=a,f=u["".concat(i,".").concat(m)]||u[m]||d[m]||o;return n?r.createElement(f,s(s({ref:t},p),{},{components:n})):r.createElement(f,s({ref:t},p))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,s=new Array(o);s[0]=m;var c={};for(var i in t)hasOwnProperty.call(t,i)&&(c[i]=t[i]);c.originalType=e,c[u]="string"==typeof e?e:a,s[1]=c;for(var l=2;l{n.r(t),n.d(t,{contentTitle:()=>s,default:()=>u,frontMatter:()=>o,metadata:()=>c,toc:()=>i});var r=n(87462),a=(n(67294),n(3905));const o={},s=void 0,c={unversionedId:"data-sources/dbsnp-json",id:"version-3.21/data-sources/dbsnp-json",title:"dbsnp-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/dbsnp-json.md",sourceDirName:"data-sources",slug:"/data-sources/dbsnp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/dbsnp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/dbsnp-json.md",tags:[],version:"3.21",frontMatter:{}},i=[],l={toc:i},p="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},l,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"dbsnp":[\n "rs1042821"\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"dbsnp"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"dbSNP rsIDs")))))}u.isMDXComponent=!0}}]); \ No newline at end of file +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2630],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>f});var r=n(7294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function c(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var i=r.createContext({}),l=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):c(c({},t),e)),n},p=function(e){var t=l(e.components);return r.createElement(i.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,p=s(e,["components","mdxType","originalType","parentName"]),u=l(n),m=a,f=u["".concat(i,".").concat(m)]||u[m]||d[m]||o;return n?r.createElement(f,c(c({ref:t},p),{},{components:n})):r.createElement(f,c({ref:t},p))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,c=new Array(o);c[0]=m;var s={};for(var i in t)hasOwnProperty.call(t,i)&&(s[i]=t[i]);s.originalType=e,s[u]="string"==typeof e?e:a,c[1]=s;for(var l=2;l{n.r(t),n.d(t,{contentTitle:()=>c,default:()=>u,frontMatter:()=>o,metadata:()=>s,toc:()=>i});var r=n(7462),a=(n(7294),n(3905));const o={},c=void 0,s={unversionedId:"data-sources/dbsnp-json",id:"data-sources/dbsnp-json",title:"dbsnp-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/dbsnp-json.md",sourceDirName:"data-sources",slug:"/data-sources/dbsnp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dbsnp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/dbsnp-json.md",tags:[],version:"current",frontMatter:{}},i=[],l={toc:i},p="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},l,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"dbsnp":[\n "rs1042821"\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"dbsnp"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"dbSNP rsIDs")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/a8da062f.a410c716.js b/assets/js/a8da062f.a410c716.js deleted file mode 100644 index 621d6853..00000000 --- a/assets/js/a8da062f.a410c716.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2630],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function c(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var i=r.createContext({}),l=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):c(c({},t),e)),n},p=function(e){var t=l(e.components);return r.createElement(i.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,p=s(e,["components","mdxType","originalType","parentName"]),u=l(n),m=a,f=u["".concat(i,".").concat(m)]||u[m]||d[m]||o;return n?r.createElement(f,c(c({ref:t},p),{},{components:n})):r.createElement(f,c({ref:t},p))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,c=new Array(o);c[0]=m;var s={};for(var i in t)hasOwnProperty.call(t,i)&&(s[i]=t[i]);s.originalType=e,s[u]="string"==typeof e?e:a,c[1]=s;for(var l=2;l{n.r(t),n.d(t,{contentTitle:()=>c,default:()=>u,frontMatter:()=>o,metadata:()=>s,toc:()=>i});var r=n(87462),a=(n(67294),n(3905));const o={},c=void 0,s={unversionedId:"data-sources/dbsnp-json",id:"data-sources/dbsnp-json",title:"dbsnp-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/dbsnp-json.md",sourceDirName:"data-sources",slug:"/data-sources/dbsnp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dbsnp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/dbsnp-json.md",tags:[],version:"current",frontMatter:{}},i=[],l={toc:i},p="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},l,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"dbsnp":[\n "rs1042821"\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"dbsnp"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"dbSNP rsIDs")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/a98c0ad1.768a1314.js b/assets/js/a98c0ad1.768a1314.js deleted file mode 100644 index e8499dd0..00000000 --- a/assets/js/a98c0ad1.768a1314.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3103],{3905:(e,t,r)=>{r.d(t,{Zo:()=>p,kt:()=>f});var n=r(67294);function a(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function o(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function c(e){for(var t=1;t=0||(a[r]=e[r]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(a[r]=e[r])}return a}var i=n.createContext({}),s=function(e){var t=n.useContext(i),r=t;return e&&(r="function"==typeof e?e(t):c(c({},t),e)),r},p=function(e){var t=s(e.components);return n.createElement(i.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},d=n.forwardRef((function(e,t){var r=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),u=s(r),d=a,f=u["".concat(i,".").concat(d)]||u[d]||m[d]||o;return r?n.createElement(f,c(c({ref:t},p),{},{components:r})):n.createElement(f,c({ref:t},p))}));function f(e,t){var r=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=r.length,c=new Array(o);c[0]=d;var l={};for(var i in t)hasOwnProperty.call(t,i)&&(l[i]=t[i]);l.originalType=e,l[u]="string"==typeof e?e:a,c[1]=l;for(var s=2;s{r.r(t),r.d(t,{contentTitle:()=>c,default:()=>u,frontMatter:()=>o,metadata:()=>l,toc:()=>i});var n=r(87462),a=(r(67294),r(3905));const o={},c=void 0,l={unversionedId:"data-sources/revel-json",id:"version-3.16/data-sources/revel-json",title:"revel-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/revel-json.md",sourceDirName:"data-sources",slug:"/data-sources/revel-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/revel-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/revel-json.md",tags:[],version:"3.16",frontMatter:{}},i=[],s={toc:i},p="wrapper";function u(e){let{components:t,...r}=e;return(0,a.kt)(p,(0,n.Z)({},s,r,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"revel":{ \n "score":0.027\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"score"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1.0")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/a9d96153.eb04ab58.js b/assets/js/a9d96153.eb04ab58.js deleted file mode 100644 index bb4e2dc8..00000000 --- a/assets/js/a9d96153.eb04ab58.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[137,8808,2520,4005],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>g});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function r(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):r(r({},t),e)),n},d=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,l=e.originalType,s=e.parentName,d=o(e,["components","mdxType","originalType","parentName"]),p=c(n),m=i,g=p["".concat(s,".").concat(m)]||p[m]||u[m]||l;return n?a.createElement(g,r(r({ref:t},d),{},{components:n})):a.createElement(g,r({ref:t},d))}));function g(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var l=n.length,r=new Array(l);r[0]=m;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[p]="string"==typeof e?e:i,r[1]=o;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>r,default:()=>p,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const l={},r=void 0,o={unversionedId:"data-sources/clingen-dosage-json",id:"version-3.16/data-sources/clingen-dosage-json",title:"clingen-dosage-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/clingen-dosage-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-dosage-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/clingen-dosage-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/clingen-dosage-json.md",tags:[],version:"3.16",frontMatter:{}},s=[],c={toc:s},d="wrapper";function p(e){let{components:t,...n}=e;return(0,i.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clingenDosageSensitivityMap": [{\n "chromosome": "15",\n "begin": 30900686,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 0.33994\n},\n{\n "chromosome": "15",\n "begin": 31727418,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "dosage sensitivity unlikely",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 1\n}]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Field"),(0,i.kt)("th",{parentName:"tr",align:null},"Type"),(0,i.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clingenDosageSensitivityMap"),(0,i.kt)("td",{parentName:"tr",align:null},"object array"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"begin"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"end"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"haploinsufficiency"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"triplosensitivity"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"(same as haploinsufficiency)\xa0")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"haploinsufficiency and triplosensitivity")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no evidence to suggest that dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"little evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"emerging evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"gene associated with autosomal recessive phenotype"),(0,i.kt)("li",{parentName:"ul"},"dosage sensitivity unlikely")))}p.isMDXComponent=!0},53379:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>r,default:()=>p,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const l={},r=void 0,o={unversionedId:"data-sources/clingen-gene-validity-json",id:"version-3.16/data-sources/clingen-gene-validity-json",title:"clingen-gene-validity-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/clingen-gene-validity-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-gene-validity-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/clingen-gene-validity-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/clingen-gene-validity-json.md",tags:[],version:"3.16",frontMatter:{}},s=[],c={toc:s},d="wrapper";function p(e){let{components:t,...n}=e;return(0,i.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clingenGeneValidity":[\n {\n "diseaseId":"MONDO_0007893",\n "disease":"Noonan syndrome with multiple lentigines",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n },\n {\n "diseaseId":"MONDO_0015280",\n "disease":"cardiofaciocutaneous syndrome",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Field"),(0,i.kt)("th",{parentName:"tr",align:null},"Type"),(0,i.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clingenGeneValidity"),(0,i.kt)("td",{parentName:"tr",align:null},"object"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"diseaseId"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Monarch Disease Ontology ID (MONDO)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"disease"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"disease label")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"classification"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"see below for possible values")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"classificationDate"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"yyyy-MM-dd")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"classification")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no reported evidence"),(0,i.kt)("li",{parentName:"ul"},"disputed"),(0,i.kt)("li",{parentName:"ul"},"limited"),(0,i.kt)("li",{parentName:"ul"},"moderate"),(0,i.kt)("li",{parentName:"ul"},"definitive"),(0,i.kt)("li",{parentName:"ul"},"strong"),(0,i.kt)("li",{parentName:"ul"},"refuted")))}p.isMDXComponent=!0},86806:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>r,default:()=>p,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const l={},r=void 0,o={unversionedId:"data-sources/clingen-json",id:"version-3.16/data-sources/clingen-json",title:"clingen-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/clingen-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/clingen-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/clingen-json.md",tags:[],version:"3.16",frontMatter:{}},s=[],c={toc:s},d="wrapper";function p(e){let{components:t,...n}=e;return(0,i.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clingen":[\n {\n "chromosome":"17",\n "begin":525,\n "end":14667519,\n "variantType":"copy_number_gain",\n "id":"nsv996083",\n "clinicalInterpretation":"pathogenic",\n "observedGains":1,\n "validated":true,\n "phenotypes":[\n "Intrauterine growth retardation"\n ],\n "phenotypeIds":[\n "HP:0001511",\n "MedGen:C1853481"\n ],\n "reciprocalOverlap":0.00131\n },\n {\n "chromosome":"17",\n "begin":45835,\n "end":7600330,\n "variantType":"copy_number_loss",\n "id":"nsv869419",\n "clinicalInterpretation":"pathogenic",\n "observedLosses":1,\n "validated":true,\n "phenotypes":[\n "Developmental delay AND/OR other significant developmental or morphological phenotypes"\n ],\n "reciprocalOverlap":0.00254\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Field"),(0,i.kt)("th",{parentName:"tr",align:null},"Type"),(0,i.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clingen"),(0,i.kt)("td",{parentName:"tr",align:null},"object array"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"begin"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"end"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"variantType"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Any of the\xa0sequence alterations defined here.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"id"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Identifier from the data source. Alternatively a VID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clinicalInterpretation"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"observedGains"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,i.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"observedLosses"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,i.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"validated"),(0,i.kt)("td",{parentName:"tr",align:null},"boolean"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"phenotypes"),(0,i.kt)("td",{parentName:"tr",align:null},"string array"),(0,i.kt)("td",{parentName:"tr",align:null},"Description of the phenotype.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"phenotypeIds"),(0,i.kt)("td",{parentName:"tr",align:null},"string array"),(0,i.kt)("td",{parentName:"tr",align:null},"Description of the phenotype IDs.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"clinicalInterpretation")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"benign"),(0,i.kt)("li",{parentName:"ul"},"curated benign"),(0,i.kt)("li",{parentName:"ul"},"curated pathogenic"),(0,i.kt)("li",{parentName:"ul"},"likely benign"),(0,i.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"path gain"),(0,i.kt)("li",{parentName:"ul"},"path loss"),(0,i.kt)("li",{parentName:"ul"},"pathogenic"),(0,i.kt)("li",{parentName:"ul"},"uncertain")))}p.isMDXComponent=!0},43906:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>c,default:()=>g,frontMatter:()=>s,metadata:()=>d,toc:()=>p});var a=n(87462),i=(n(67294),n(3905)),l=n(86806),r=n(53496),o=n(53379);const s={title:"ClinGen"},c=void 0,d={unversionedId:"data-sources/clingen",id:"version-3.16/data-sources/clingen",title:"ClinGen",description:"Overview",source:"@site/versioned_docs/version-3.16/data-sources/clingen.mdx",sourceDirName:"data-sources",slug:"/data-sources/clingen",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/clingen",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/clingen.mdx",tags:[],version:"3.16",frontMatter:{title:"ClinGen"},sidebar:"version-3.16/docs",previous:{title:"Amino Acid Conservation",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/amino-acid-conservation"},next:{title:"ClinVar",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/clinvar"}},p=[{value:"Overview",id:"overview",children:[],level:2},{value:"ISCA Regions",id:"isca-regions",children:[{value:"TSV Extraction",id:"tsv-extraction",children:[{value:"Status levels",id:"status-levels",children:[],level:4},{value:"Parsing",id:"parsing",children:[],level:4}],level:3}],level:2},{value:"Conflict Resolution",id:"conflict-resolution",children:[{value:"Clinical significance priority",id:"clinical-significance-priority",children:[],level:3},{value:"Validation Priority",id:"validation-priority",children:[],level:3},{value:"Download URL",id:"download-url",children:[],level:3},{value:"JSON Output",id:"json-output",children:[],level:3}],level:2},{value:"Dosage Sensitivity Map",id:"dosage-sensitivity-map",children:[{value:"TSV Source files",id:"tsv-source-files",children:[],level:3},{value:"Dosage Rating System",id:"dosage-rating-system",children:[],level:3},{value:"Download URL",id:"download-url-1",children:[],level:3},{value:"JSON Output",id:"json-output-1",children:[],level:3}],level:2},{value:"Gene-Disease Validity",id:"gene-disease-validity",children:[{value:"Source TSV",id:"source-tsv",children:[],level:3},{value:"Download URL",id:"download-url-2",children:[],level:3},{value:"Conflict Resolution",id:"conflict-resolution-1",children:[{value:"Multiple Classifications",id:"multiple-classifications",children:[],level:4},{value:"Multiple Dates",id:"multiple-dates",children:[],level:4}],level:3},{value:"JSON Output",id:"json-output-2",children:[],level:3}],level:2}],u={toc:p},m="wrapper";function g(e){let{components:t,...n}=e;return(0,i.kt)(m,(0,a.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"ClinGen is a National Institutes of Health (NIH)-funded resource dedicated to building a central resource that defines the clinical relevance of genes and variants for use in precision medicine and research."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Heidi L. Rehm, Ph.D., Jonathan S. Berg, M.D., Ph.D., Lisa D. Brooks, Ph.D., Carlos D. Bustamante, Ph.D., James P. Evans, M.D., Ph.D., Melissa J. Landrum, Ph.D., David H. Ledbetter, Ph.D., Donna R. Maglott, Ph.D., Christa Lese Martin, Ph.D., Robert L. Nussbaum, M.D., Sharon E. Plon, M.D., Ph.D., Erin M. Ramos, Ph.D., Stephen T. Sherry, Ph.D., and Michael S. Watson, Ph.D., for ClinGen. ",(0,i.kt)("strong",{parentName:"p"},"ClinGen The Clinical Genome Resource.")," ",(0,i.kt)("em",{parentName:"p"},"N Engl J Med 2015; 372:2235-2242 June 4, 2015 DOI: 10.1056/NEJMsr1406261.")))),(0,i.kt)("h2",{id:"isca-regions"},"ISCA Regions"),(0,i.kt)("h3",{id:"tsv-extraction"},"TSV Extraction"),(0,i.kt)("p",null,"ClinGen contains only copy number variation variants, since the coordinates in ClinGen original file follow the same rule as BED format, the coordinates had to be adjusted to ","[BEGIN+1, END]","."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#bin chrom chromStart chromEnd name score strand thickStart thickEnd attrCount attrTags attrVals\nnsv530705 1 564405 8597804 0 1 copy_number_loss pathogenic False Developmental delay AND/OR other significant developmental or morphological phenotypes\nnsv530706 1 564424 3262790 0 1 copy_number_loss pathogenic False Abnormal facial shape,Abnormality of cardiac morphology,Global developmental delay,Muscular hypotonia HP:0001252,HP:0001263,HP:0001627,HP:0001999,MedGen:CN001147,MedGen:CN001157,MedGen:CN001482,MedGen:CN001810\nnsv530707 1 564424 7068738 0 1 copy_number_loss pathogenic False Abnormality of cardiac morphology,Cleft upper lip,Failure to thrive,Global developmental delay,Intrauterine growth retardation,Microcephaly,Short stature HP:0000204,HP:0000252,HP:0001263,HP:0001508,HP:0001511,HP:0001627,HP:0004322,MedGen:C0349588,MedGen:C1845868,MedGen:C1853481,MedGen:C2364119,MedGen:CN000197,MedGen:CN001157,MedGen:CN001482\nnsv533512 1 564435 649748 0 1 copy_number_loss benign False Developmental delay AND/OR other significant developmental or morphological phenotypes\nnsv931338 1 714078 4958499 0 1 copy_number_loss pathogenic False Developmental delay AND/OR other significant developmental or morphological phenotypes\nnsv530300 1 728138 5066371 1 0 copy_number_gain pathogenic False Abnormality of cardiac morphology,Cleft palate,Global developmental delay HP:0000175,HP:0001263,HP:0001627,MedGen:C2240378,MedGen:CN001157,MedGen:CN001482\n")),(0,i.kt)("h4",{id:"status-levels"},"Status levels"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"benign"),(0,i.kt)("li",{parentName:"ul"},"curated benign"),(0,i.kt)("li",{parentName:"ul"},"curated pathogenic"),(0,i.kt)("li",{parentName:"ul"},"likely benign"),(0,i.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"path gain"),(0,i.kt)("li",{parentName:"ul"},"path loss"),(0,i.kt)("li",{parentName:"ul"},"pathogenic"),(0,i.kt)("li",{parentName:"ul"},"uncertain")),(0,i.kt)("h4",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"We parse the ClinGen tsv file and extract the following:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"chrom"),(0,i.kt)("li",{parentName:"ul"},"chromStart (note this a 0-based coordinate)"),(0,i.kt)("li",{parentName:"ul"},"chromEnd"),(0,i.kt)("li",{parentName:"ul"},"attrTags"),(0,i.kt)("li",{parentName:"ul"},"attrVals")),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"attrTags")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"attrVals")," are comma separated lists. ",(0,i.kt)("inlineCode",{parentName:"p"},"attrTags")," contains the field keys and ",(0,i.kt)("inlineCode",{parentName:"p"},"attrVals")," contains the field values. We will parse the following keys from the two fields:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"parent (this will be used as the ID in our JSON output)"),(0,i.kt)("li",{parentName:"ul"},"clinical_int"),(0,i.kt)("li",{parentName:"ul"},"validated"),(0,i.kt)("li",{parentName:"ul"},"phenotype (this should be a string array)"),(0,i.kt)("li",{parentName:"ul"},"phenotype_id (this should be a string array)")),(0,i.kt)("p",null,"Observed losses and observed gains will be calculated from entries that share a common parent ID."),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"variants with a common parent ID and same coordinates are grouped",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"calculated observed losses, observed gains for each group"),(0,i.kt)("li",{parentName:"ul"},"Clinical significance and validation status are collapsed using the priority strategy described below"))),(0,i.kt)("li",{parentName:"ul"},"Variants with the same parent ID can have different coordinates (mapped to hg38)",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"nsv491508 : chr14:105583663-106881350 and chr14:105605043-106766076 (only one example)"),(0,i.kt)("li",{parentName:"ul"},"we kept both variants")))),(0,i.kt)("h2",{id:"conflict-resolution"},"Conflict Resolution"),(0,i.kt)("h3",{id:"clinical-significance-priority"},"Clinical significance priority"),(0,i.kt)("p",null,"When there are a mixture of variants belonging to the same parent ID, we will choose the most pathogenic clinical significance from the available values. i.e. if 3 samples were deemed pathogenic and 2 samples were likely pathogenic, we would list the variant as pathogenic."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Priority")," (high to low)"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"Priority"),(0,i.kt)("li",{parentName:"ul"},"Pathogenic"),(0,i.kt)("li",{parentName:"ul"},"Likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"Benign"),(0,i.kt)("li",{parentName:"ul"},"Likely benign"),(0,i.kt)("li",{parentName:"ul"},"Uncertain significance")),(0,i.kt)("h3",{id:"validation-priority"},"Validation Priority"),(0,i.kt)("p",null,"When there are a mixture of variants belonging to same parent ID, we will set the validation status to true if any of the variants were validated."),(0,i.kt)("h3",{id:"download-url"},"Download URL"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"https://cirm.ucsc.edu/cgi-bin/hgTrackUi?db=hg19&g=iscaComposite"},"https://cirm.ucsc.edu/cgi-bin/hgTrackUi?db=hg19&g=iscaComposite")),(0,i.kt)("h3",{id:"json-output"},"JSON Output"),(0,i.kt)(l.default,{mdxType:"CLINGENJSON"}),(0,i.kt)("h2",{id:"dosage-sensitivity-map"},"Dosage Sensitivity Map"),(0,i.kt)("p",null,"The Clinical Genome Resource (ClinGen) consortium is curating genes and regions of the genome to assess whether there is evidence to support that these genes/regions are dosage sensitive and should be targeted on a cytogenomic array. Nirvana reports these annotations for overlapping SVs."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Riggs ER, Nelson T, Merz A, Ackley T, Bunke B, Collins CD, Collinson MN, Fan YS, Goodenberger ML, Golden DM, Haglund-Hazy L, Krgovic D, Lamb AN, Lewis Z, Li G, Liu Y, Meck J, Neufeld-Kaiser W, Runke CK, Sanmann JN, Stavropoulos DJ, Strong E, Su M, Tayeh MK, Kokalj Vokac N, Thorland EC, Andersen E, Martin CL. ",(0,i.kt)("strong",{parentName:"p"},"Copy number variant discrepancy resolution using the ClinGen dosage sensitivity map results in updated clinical interpretations in ClinVar.")," ",(0,i.kt)("em",{parentName:"p"},"Hum Mutat. 2018 Nov;39(11):1650-1659. doi: 10.1002/humu.23610. PMID: 30095202; PMCID: PMC7374944.")))),(0,i.kt)("h3",{id:"tsv-source-files"},"TSV Source files"),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Regions")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#ClinGen Region Curation Results\n#07 May,2019\n#Genomic Locations are reported on GRCh38 (hg38): GCF_000001405.36\n#https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen\n#to create link: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/clingen_region.cgi?id=key\n#ISCA ID ISCA Region Name cytoBand Genomic Location Haploinsufficiency Score Haploinsufficiency Description Haploinsufficiency PMID1 Haploinsufficiency PMID2 Haploinsufficiency PMID3 Triplosensitivity Score Triplosensitivity Description Triplosensitivity PMID1 Triplosensitivity PMID2 Triplosensitivity PMID3 Date Last Evaluated Loss phenotype OMIM ID Triplosensitive phenotype OMIM ID\nISCA-46299 Xp11.22 region (includes HUWE1) Xp11.22 tbd 0 No evidence available 3 Sufficient evidence for dosage pathogenicity 22840365 20655035 26692240 2018-11-19\nISCA-46295 15q13.3 recurrent region (D-CHRNA7 to BP5) (includes CHRNA7 and OTUD7A) 15q13.3 chr15:31727418-32153204 3 Sufficient evidence for dosage pathogenicity 19898479 20236110 22775350 40 Dosage sensitivity unlikely 26968334 22420048 2018-05-10\nISCA-46291 7q11.23 recurrent distal region (includes HIP1, YWHAG) 7q11.23 chr7:75528718-76433859 2 Some evidence for dosage pathogenicity 21109226 16971481 1 Little evidence for dosage pathogenicity 21109226 27867344 2018-12-31\nISCA-46290 Xp11.22p11.23 recurrent region (includes SHROOM4) Xp11.22-p11.23 chrX: 48447780-52444264 0 No evidence available 3 Sufficient evidence for dosage pathogenicity 19716111 21418194 25425167 2017-12-14 300801\n")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Genes")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#ClinGen Gene Curation Results\n#24 May,2019\n#Genomic Locations are reported on GRCh37 (hg19): GCF_000001405.13\n#https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen\n#to create link: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/clingen_gene.cgi?sym=Gene Symbol\n#Gene Symbol Gene ID cytoBand Genomic Location Haploinsufficiency Score Haploinsufficiency Description Haploinsufficiency PMID1 Haploinsufficiency PMID2 Haploinsufficiency PMID3 Triplosensitivity Score Triplosensitivity Description Triplosensitivity PMID1 Triplosensitivity PMID2 Triplosensitivity PMID3 Date Last Evaluated Loss phenotype OMIM ID Triplosensitive phenotype OMIM ID\nA4GALT 53947 22q13.2 chr22:43088121-43117307 30 Gene associated with autosomal recessive phenotype 0 No evidence available 2014-12-11 111400\nAAGAB 79719 15q23 chr15:67493013-67547536 3 Sufficient evidence for dosage pathogenicity 23064416 23000146 0 No evidence available 2013-02-28 148600\n")),(0,i.kt)("h3",{id:"dosage-rating-system"},"Dosage Rating System"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Rating"),(0,i.kt)("th",{parentName:"tr",align:null},"Possible Clinical Interpretation"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"0"),(0,i.kt)("td",{parentName:"tr",align:null},"No evidence to suggest that dosage sensitivity is associated with clinical phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"1"),(0,i.kt)("td",{parentName:"tr",align:null},"Little evidence suggesting dosage sensitivity is associated with clinical phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"2"),(0,i.kt)("td",{parentName:"tr",align:null},"Emerging evidence suggesting dosage sensitivity is associated with clinical phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"3"),(0,i.kt)("td",{parentName:"tr",align:null},"Sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"30"),(0,i.kt)("td",{parentName:"tr",align:null},"Gene associated with autosomal recessive phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"40"),(0,i.kt)("td",{parentName:"tr",align:null},"Dosage sensitivity unlikely")))),(0,i.kt)("p",null,"Reference: ",(0,i.kt)("a",{parentName:"p",href:"https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/help.shtml"},"https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/help.shtml")),(0,i.kt)("h3",{id:"download-url-1"},"Download URL"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"ftp://ftp.clinicalgenome.org/"},"ftp://ftp.clinicalgenome.org/")),(0,i.kt)("h3",{id:"json-output-1"},"JSON Output"),(0,i.kt)(r.default,{mdxType:"ClinGenDosageJson"}),(0,i.kt)("h2",{id:"gene-disease-validity"},"Gene-Disease Validity"),(0,i.kt)("p",null,"The ClinGen Gene-Disease Clinical Validity curation process involves evaluating the strength of evidence supporting or refuting a claim that variation in a particular gene causes a particular disease. Nirvana reports these annotations for genes in the genes section of the JSON."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Strande NT, Riggs ER, Buchanan AH, et al. ",(0,i.kt)("strong",{parentName:"p"},"Evaluating the Clinical Validity of Gene-Disease Associations: An Evidence-Based Framework Developed by the Clinical Genome Resource.")," ",(0,i.kt)("em",{parentName:"p"},"Am J Hum Genet. 2017;100(6):895-906. doi:10.1016/j.ajhg.2017.04.015")))),(0,i.kt)("h3",{id:"source-tsv"},"Source TSV"),(0,i.kt)("p",null,"The source data comes in a CSV file that we convert to a TSV as follows:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"CLINGEN GENE VALIDITY CURATIONS\nFILE CREATED: 2019-05-28\nWEBPAGE: https://search.clinicalgenome.org/kb/gene-validity\n+++++++++++,++++++++++++++,+++++++++++++,++++++++++++++++++,+++++++++,++++++++++++++,+++++++++++++,+++++++++++++++++++\nGENE SYMBOL,GENE ID (HGNC),DISEASE LABEL,DISEASE ID (MONDO),SOP,CLASSIFICATION,ONLINE REPORT,CLASSIFICATION DATE\n+++++++++++,++++++++++++++,+++++++++++++,++++++++++++++++++,+++++++++,++++++++++++++,+++++++++++++,+++++++++++++++++++\nA2ML1,HGNC:23336,Noonan syndrome with multiple lentigines,MONDO_0007893,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/59b87033-dd91-4f1e-aec1-c9b1f5124b16--2018-06-07T14:37:47,2018-06-07T14:37:47.175Z\nA2ML1,HGNC:23336,cardiofaciocutaneous syndrome,MONDO_0015280,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/fc3c41d8-8497-489b-a350-c9e30016bc6a--2018-06-07T14:31:03,2018-06-07T14:31:03.696Z\nA2ML1,HGNC:23336,Costello syndrome,MONDO_0009026,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/ea72ba8d-cf62-44bc-86be-da64e3848eba--2018-06-07T14:34:05,2018-06-07T14:34:05.324Z\n")),(0,i.kt)("h3",{id:"download-url-2"},"Download URL"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"https://search.clinicalgenome.org/kb/gene-validity.csv"},"https://search.clinicalgenome.org/kb/gene-validity.csv")),(0,i.kt)("h3",{id:"conflict-resolution-1"},"Conflict Resolution"),(0,i.kt)("h4",{id:"multiple-classifications"},"Multiple Classifications"),(0,i.kt)("p",null,"Here is an example of multiple classifications."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"$ grep MONDO_0010192 ClinGen-Gene-Disease-Summary-2019-12-02.csv | grep EDNRB\nEDNRB,HGNC:3180,Waardenburg syndrome type 4A,MONDO_0010192,SOP6,Moderate,https://search.clinicalgenome.org/kb/gene-validity/d7abbd45-7915-437b-849b-dea876bfc2f5--2018-05-08T04:00:00,2018-05-08T04:00:00.000Z\nEDNRB,HGNC:3180,Waardenburg syndrome type 4A,MONDO_0010192,SOP6,Limited,https://search.clinicalgenome.org/kb/gene-validity/73ee9727-60c1-40fd-830f-08c2b513d2ee--2018-05-08T04:00:00,2018-05-08T04:00:00.000Z\n")),(0,i.kt)("p",null,"In such cases, we select the more severe classification."),(0,i.kt)("h4",{id:"multiple-dates"},"Multiple Dates"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"$ grep MONDO_0016419 ClinGen-Gene-Disease-Summary-2019-12-02.csv | grep MUTYH\nMUTYH,HGNC:7527,hereditary breast carcinoma,MONDO_0016419,SOP4,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/9904,2017-05-24T00:00:00\nMUTYH,HGNC:7527,hereditary breast carcinoma,MONDO_0016419,SOP4,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/9902,2017-05-25T00:00:00\n")),(0,i.kt)("p",null,"If the classifications are the same, we should select the latest classification date."),(0,i.kt)("h3",{id:"json-output-2"},"JSON Output"),(0,i.kt)(o.default,{mdxType:"ClinGenGeneValidity"}))}g.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/c8cd6ec4.39a0f4ec.js b/assets/js/a9ecceb6.8167576d.js similarity index 51% rename from assets/js/c8cd6ec4.39a0f4ec.js rename to assets/js/a9ecceb6.8167576d.js index 16316a1a..b8d505c7 100644 --- a/assets/js/c8cd6ec4.39a0f4ec.js +++ b/assets/js/a9ecceb6.8167576d.js @@ -1 +1 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7516,5337,1790],{3905:(e,t,n)=>{n.d(t,{Zo:()=>u,kt:()=>N});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var p=a.createContext({}),s=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},u=function(e){var t=s(e.components);return a.createElement(p.Provider,{value:t},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},c=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,l=e.originalType,p=e.parentName,u=o(e,["components","mdxType","originalType","parentName"]),m=s(n),c=r,N=m["".concat(p,".").concat(c)]||m[c]||d[c]||l;return n?a.createElement(N,i(i({ref:t},u),{},{components:n})):a.createElement(N,i({ref:t},u))}));function N(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=n.length,i=new Array(l);i[0]=c;var o={};for(var p in t)hasOwnProperty.call(t,p)&&(o[p]=t[p]);o.originalType=e,o[m]="string"==typeof e?e:r,i[1]=o;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},i=void 0,o={unversionedId:"data-sources/1000Genomes-snv-json",id:"version-3.2.5/data-sources/1000Genomes-snv-json",title:"1000Genomes-snv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.2.5/data-sources/1000Genomes-snv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-snv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/1000Genomes-snv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/data-sources/1000Genomes-snv-json.md",tags:[],version:"3.2.5",frontMatter:{}},p=[],s={toc:p},u="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(u,(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":{\n "allAf":0.200879,\n "afrAf":0.210287,\n "amrAf":0.139769,\n "easAf":0.275794,\n "eurAf":0.181909,\n "sasAf":0.173824,\n "allAn":5008,\n "afrAn":1322,\n "amrAn":694,\n "easAn":1008,\n "eurAn":1006,\n "sasAn":978,\n "allAc":1006,\n "afrAc":278,\n "amrAc":97,\n "easAc":278,\n "eurAc":183,\n "sasAc":170\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the African super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the African super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the Ad Mixed American super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the Ad Mixed American super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the East Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the East Asian super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the European super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the European super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the South Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the South Asian super population. Non-zero integer.")))))}m.isMDXComponent=!0},50092:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},i=void 0,o={unversionedId:"data-sources/1000Genomes-sv-json",id:"version-3.2.5/data-sources/1000Genomes-sv-json",title:"1000Genomes-sv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.2.5/data-sources/1000Genomes-sv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-sv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/1000Genomes-sv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/data-sources/1000Genomes-sv-json.md",tags:[],version:"3.2.5",frontMatter:{}},p=[],s={toc:p},u="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(u,(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":[\n {\n "chromosome":"1",\n "begin":1595369,\n "end":1612441,\n "variantType": "copy_number_variation",\n "id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",\n "allAn": 5008,\n "allAc": 2702,\n "allAf": 0.539537,\n "afrAf": 0.6052,\n "amrAf": 0.3675,\n "eurAf": 0.5357,\n "easAf": 0.5368,\n "sasAf": 0.5797,\n "reciprocalOverlap": 0.07555\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"range: 0 - 1.")))))}m.isMDXComponent=!0},56462:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>p,default:()=>c,frontMatter:()=>o,metadata:()=>s,toc:()=>u});var a=n(87462),r=(n(67294),n(3905)),l=n(22166),i=n(50092);const o={title:"1000 Genomes"},p=void 0,s={unversionedId:"data-sources/1000Genomes",id:"version-3.2.5/data-sources/1000Genomes",title:"1000 Genomes",description:"Overview",source:"@site/versioned_docs/version-3.2.5/data-sources/1000Genomes.mdx",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/1000Genomes",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/data-sources/1000Genomes.mdx",tags:[],version:"3.2.5",frontMatter:{title:"1000 Genomes"},sidebar:"version-3.2.5/docs",previous:{title:"Getting Started",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/introduction/getting-started"},next:{title:"ClinVar",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/clinvar"}},u=[{value:"Overview",id:"overview",children:[],level:2},{value:"Populations",id:"populations",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF File Parsing",id:"vcf-file-parsing",children:[{value:"Conflict Resolution",id:"conflict-resolution",children:[],level:4}],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2},{value:"Structural Variants",id:"structural-variants",children:[{value:"VCF File Parsing",id:"vcf-file-parsing-1",children:[],level:3},{value:"Converting VCF svTypes to SO sequence alterations",id:"converting-vcf-svtypes-to-so-sequence-alterations",children:[{value:"Exceptions",id:"exceptions",children:[],level:4}],level:3}],level:2},{value:"JSON Output",id:"json-output-1",children:[],level:2}],m={toc:u},d="wrapper";function c(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"The goal of the 1000 Genomes Project was to find most genetic variants with frequencies of at least 1% in the populations studied. It was the first project to sequence the genomes of a large number of people, to provide a comprehensive resource on human genetic variation. Data from the 1000 Genomes Project was quickly made available to the worldwide scientific community through freely accessible public databases."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Sudmant, P., Rausch, T., Gardner, E. et al. An integrated map of structural variation in 2,504 human genomes. ",(0,r.kt)("em",{parentName:"p"},"Nature 526"),", 75\u201381 (2015). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1038/nature15394"},"https://doi.org/10.1038/nature15394")))),(0,r.kt)("h2",{id:"populations"},"Populations"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"The super population membership can be found here: (",(0,r.kt)("a",{parentName:"li",href:"http://www.1000genomes.org/category/population/"},"http://www.1000genomes.org/category/population/"),")"),(0,r.kt)("li",{parentName:"ul"},"We want to capture the allele frequencies for all 26 populations as well as the 5 super populations and the total population.")),(0,r.kt)("h2",{id:"small-variants"},"Small Variants"),(0,r.kt)("h3",{id:"vcf-file-parsing"},"VCF File Parsing"),(0,r.kt)("p",null,"The original VCF files come with allele frequency fields (e.g. ALL_AF, AMR_AF) but we recompute them using allele counts and allele numbers in order to get 6 digit precision. The allele counts and allele numbers (e.g. AMR_AC, AMR_AN) are not expressed in the INFO field. Instead the genotypes need to be parsed to compute that information. Our team converted the original data to VCF entries with allele counts and allele numbers like the following."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 15274 rs62636497 A G,T 100 PASS AC=1739,3210;AF=0.347244,0.640974;AN=5008;NS=2504;DP=23255;EAS_AF=0.4812,0.5188;AMR_AF=0.2752,0.7205;AFR_AF=0.323,0.6369;EUR_AF=0.2922,0.7078;SAS_AF=0.3497,0.6472;AA=g|||;VT=SNP;MULTI_ALLELIC;EAS_AN=1008;EAS_AC=485,523;EUR_AN=1006;EUR_AC=294,712;AFR_AN=1322;AFR_AC=427,842;AMR_AN=694;AMR_AC=191,500;SAS_AN=978;SAS_AC=342,633\n")),(0,r.kt)("p",null,"The ancestral allele, if it exists, is the first value in the pipe separated AA fields (the Indel specific REF, ALT, IndelType fields are ignored)."),(0,r.kt)("p",null,"We parse the VCF file and extract the following fields from INFO:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"AA"),(0,r.kt)("li",{parentName:"ul"},"AC"),(0,r.kt)("li",{parentName:"ul"},"AN"),(0,r.kt)("li",{parentName:"ul"},"EAS_AN"),(0,r.kt)("li",{parentName:"ul"},"AMR_AN"),(0,r.kt)("li",{parentName:"ul"},"AFR_AN"),(0,r.kt)("li",{parentName:"ul"},"EUR_AN"),(0,r.kt)("li",{parentName:"ul"},"SAS_AN"),(0,r.kt)("li",{parentName:"ul"},"EAS_AC"),(0,r.kt)("li",{parentName:"ul"},"AMR_AC"),(0,r.kt)("li",{parentName:"ul"},"AFR_AC"),(0,r.kt)("li",{parentName:"ul"},"EUR_AC"),(0,r.kt)("li",{parentName:"ul"},"SAS_AC")),(0,r.kt)("h4",{id:"conflict-resolution"},"Conflict Resolution"),(0,r.kt)("p",null,"We have observed conflicting allele frequency information in the source. Take the following example:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 20505705 rs35377696 C CTCTG,CTG,CTGTG 100 PASS AC=46,1513,152;AF=0.0091853,0.302117,0.0303514;\n1 20505705 rs35377696 C CTG 100 PASS AC=4;AF=0.000798722;\n")),(0,r.kt)("p",null,"That is, the variant 1-20505705-C-CTG has conflicting entries. To get an idea of how frequently we observe this, here is a table summarizing ChrX and all chromosomes. Note that almost all such entries are found in ChrX."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"center"},"Chromosome"),(0,r.kt)("th",{parentName:"tr",align:"left"},"#"," of alleles"),(0,r.kt)("th",{parentName:"tr",align:"center"},"#"," of conflicting alleles"),(0,r.kt)("th",{parentName:"tr",align:"left"},"percentage"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"chrX"),(0,r.kt)("td",{parentName:"tr",align:"left"},"834800"),(0,r.kt)("td",{parentName:"tr",align:"center"},"2733"),(0,r.kt)("td",{parentName:"tr",align:"left"},"0.33%")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"Total"),(0,r.kt)("td",{parentName:"tr",align:"left"},"21413098"),(0,r.kt)("td",{parentName:"tr",align:"center"},"2743"),(0,r.kt)("td",{parentName:"tr",align:"left"},"0.013%")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Currently"),", we removed the allele frequency of the conflicting allele (i.e., insertion TG in the example) but keep allele frequencies of all other alleles in the VCF line."),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Potential Alternate Solutions")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"Remove all alleles that are contained in the vcf lines which have conflicting allele. (Recommended by 1000 genome group Holly Zheng-Bradley, 7/29/2015)"),(0,r.kt)("li",{parentName:"ul"},"Recalculate the allele frequency for the conflicting allele."),(0,r.kt)("li",{parentName:"ul"},"Pick the allele frequency that has the highest data support.")),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/"},"GRCh37"),"\n",(0,r.kt)("a",{parentName:"p",href:"http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/"},"GRCh38")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(l.default,{mdxType:"JSONSNV"}),(0,r.kt)("h2",{id:"structural-variants"},"Structural Variants"),(0,r.kt)("h3",{id:"vcf-file-parsing-1"},"VCF File Parsing"),(0,r.kt)("p",null,"The VCF files contain entries like the following:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103\n22 16050654 esv3647175;esv3647176;esv3647177;esv3647178 A ,,, 100 PASS AC=9,87,599,20;AF=0.00179712,0.0173722,0.119609,0.00399361;AN=5008;CS=DUP_gs;END=16063474;NS=2504;SVTYPE=CNV;DP=22545;EAS_AF=0.001,0.0169,0.2361,0.0099;AMR_AF=0,0.0101,0.219,0.0072;AFR_AF=0.0061,0.0363,0.0053,0;EUR_AF=0,0.007,0.0944,0.003;SAS_AF=0,0.0082,0.1094,0.002;VT=SV GT 3|0 0|0 0|0 0|0 0|0 0|0 0|4\n")),(0,r.kt)("p",null,"Please note that, CNVs are allele-specific. For example, HG00096 is effectively copy number 4, which would be a net gain on chr22."),(0,r.kt)("p",null,"1000 Genomes contains 5 types of structural variants:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"CNV"),(0,r.kt)("li",{parentName:"ul"},"DEL"),(0,r.kt)("li",{parentName:"ul"},"DUP"),(0,r.kt)("li",{parentName:"ul"},"INS"),(0,r.kt)("li",{parentName:"ul"},"INV")),(0,r.kt)("p",null,"Since data of 1000 genomes is provided in VCF format, we assume that the coordinates follow the vcf format, i.e., there is a padding base for symbolic alleles. So all the interval can be interpreted as ","[BEGIN+1, END]",".\nSimilarly, for all other variant types except insertion, END is far larger than BEGIN. The distribution of BEGIN and END for insertions is summarized below."),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Insertion issues")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"END = BEGIN for 6/165"),(0,r.kt)("li",{parentName:"ul"},"END = BEGIN+2 for 93/165"),(0,r.kt)("li",{parentName:"ul"},"END = BEGIN+3 for 11/165"),(0,r.kt)("li",{parentName:"ul"},"END = BEGIN+4 for 11/165"),(0,r.kt)("li",{parentName:"ul"},"END \u2013 BEGIN range from 5 to 1156 for others.")),(0,r.kt)("h3",{id:"converting-vcf-svtypes-to-so-sequence-alterations"},"Converting VCF svTypes to SO sequence alterations"),(0,r.kt)("p",null,"The svType will be captured in our JSON file under the ",(0,r.kt)("a",{parentName:"p",href:"http://www.sequenceontology.org/browser/current_svn/term/SO:0001059"},"sequenceAlteration")," key. Here's the translation we'll use according to svType in 1000 Genomes."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"svType"),(0,r.kt)("th",{parentName:"tr",align:null},"Alternative Alleles contain "),(0,r.kt)("th",{parentName:"tr",align:null},"sequenceAlteration"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"ALU"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"DUP"),(0,r.kt)("td",{parentName:"tr",align:null},"TRUE"),(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_gain")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"CNV"),(0,r.kt)("td",{parentName:"tr",align:null},"TRUE"),(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_gain (observed_gains >0 and observed_losses =0) ",(0,r.kt)("br",null),"copy_number_loss\xa0(observed_gains = 0 and observed_losses > 0) ",(0,r.kt)("br",null),"copy_number_variation (otherwise)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"DEL"),(0,r.kt)("td",{parentName:"tr",align:null},"TRUE"),(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_loss")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"LINE1"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"SVA"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"INV"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"inversion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"INS"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"insertion")))),(0,r.kt)("h4",{id:"exceptions"},"Exceptions"),(0,r.kt)("p",null,(0,r.kt)("em",{parentName:"p"},"We discard structural variants without END")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103\n21 9495848 esv3646347 A 100 PASS AC=1543;AF=0.308107;AN=5008;CS=L1_umary;MEINFO=LINE1,5669,6005,+;NS=2504;SVLEN=336;SVTYPE=LINE1;TSD=null;DP=20015;EAS_AF=0.3125;AMR_AF=0.2911;AFR_AF=0.3026;EUR_AF=0.2922;SAS_AF=0.3395;VT=SV GT 0|0 1|1 1|0 0|1 1|0 1|0 0|0\n")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"CNVs in chrY")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"No other types of structural variants exist in chrY"),(0,r.kt)("li",{parentName:"ul"},'Since copy number is provided in genotype field, we directly parse the copy number from "CN" field.'),(0,r.kt)("li",{parentName:"ul"},"For most CNVs in chrY, the reference copy number is 1, but the refence number for CNVs in segmental duplication sites is 2 ("," in the 2nd example). All segmental duplication calls have identifiers starting with GS_SD_M2.")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00101 HG00103 HG00105 HG00107 HG00108\nY 2888555 CNV_Y_2888555_3014661 T 100 PASS AC=1;AF=0.000817661;AN=1223;END=3014661;NS=1233;SVTYPE=CNV;AMR_AF=0.0000;AFR_AF=0.0000;EUR_AF=0.0000;SAS_AF=0.0019;EAS_AF=0.0000;VT=SV GT:CN:CNL:CNP:CNQ:GP:GQ:PL 0:1:-1000,0,-58.45:-1000,0,-61.55:99:0,-61.55:99:0,585 0:1:-296.36,0,-16.6:-300.46,0,-19.7:99:0,-19.7:99:0,166 0:1:-1000,0,-39.44:-1000,0,-42.54:99:0,-42.54:99:0,394\nY 6128381 GS_SD_M2_Y_6128381_6230094_Y_9650284_9752225 C , 100 PASS AC=4,2;AF=0.00327065,0.00163532;AN=1223;END=6230094;NS=1233;SVTYPE=CNV;AMR_AF=0.0029,0.0029;AFR_AF=0.0016,0.0016;EUR_AF=0.0000,0.0000;SAS_AF=0.0038,0.0000;EAS_AF=0.0000,0.0000;VT=SV;EX_TARGET GT:CN:CNL:CNP:CNQ:GP:GQ 0:2:-1000,-138.78,0,-38.53:-1000,-141.27,0,-41.33:99:0,-141.27,-41.33:99 0:2:-1000,-53.32,0,-17.85:-1000,-55.81,0,-20.64:99:0,-55.81,-20.64:99 0:2:-1000,-71.83,0,-32.5:-1000,-74.32,0,-35.29:99:0,-74.32,-35.29:99 0:2:-1000,-60.96,0,-20.29:-1000,-63.45,0,-23.08:99:0,-63.45,-23.08:99 0:2:-1000,-77.6,0,-31.45:-1000,-80.09,0,-34.24:99:0,-80.09,-34.24:99\n")),(0,r.kt)("h2",{id:"json-output-1"},"JSON Output"),(0,r.kt)(i.default,{mdxType:"JSONSV"}))}c.isMDXComponent=!0}}]); \ No newline at end of file +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4203,4648,6602],{3905:(e,t,n)=>{n.d(t,{Zo:()=>u,kt:()=>N});var a=n(7294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var p=a.createContext({}),s=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},u=function(e){var t=s(e.components);return a.createElement(p.Provider,{value:t},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},c=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,l=e.originalType,p=e.parentName,u=o(e,["components","mdxType","originalType","parentName"]),m=s(n),c=r,N=m["".concat(p,".").concat(c)]||m[c]||d[c]||l;return n?a.createElement(N,i(i({ref:t},u),{},{components:n})):a.createElement(N,i({ref:t},u))}));function N(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=n.length,i=new Array(l);i[0]=c;var o={};for(var p in t)hasOwnProperty.call(t,p)&&(o[p]=t[p]);o.originalType=e,o[m]="string"==typeof e?e:r,i[1]=o;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var a=n(7462),r=(n(7294),n(3905));const l={},i=void 0,o={unversionedId:"data-sources/1000Genomes-snv-json",id:"data-sources/1000Genomes-snv-json",title:"1000Genomes-snv-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/1000Genomes-snv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-snv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes-snv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/1000Genomes-snv-json.md",tags:[],version:"current",frontMatter:{}},p=[],s={toc:p},u="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(u,(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":{\n "allAf":0.200879,\n "afrAf":0.210287,\n "amrAf":0.139769,\n "easAf":0.275794,\n "eurAf":0.181909,\n "sasAf":0.173824,\n "allAn":5008,\n "afrAn":1322,\n "amrAn":694,\n "easAn":1008,\n "eurAn":1006,\n "sasAn":978,\n "allAc":1006,\n "afrAc":278,\n "amrAc":97,\n "easAc":278,\n "eurAc":183,\n "sasAc":170\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the African super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the African super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the Ad Mixed American super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the Ad Mixed American super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the East Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the East Asian super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the European super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the European super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the South Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the South Asian super population. Non-zero integer.")))))}m.isMDXComponent=!0},2590:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var a=n(7462),r=(n(7294),n(3905));const l={},i=void 0,o={unversionedId:"data-sources/1000Genomes-sv-json",id:"data-sources/1000Genomes-sv-json",title:"1000Genomes-sv-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/1000Genomes-sv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-sv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes-sv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/1000Genomes-sv-json.md",tags:[],version:"current",frontMatter:{}},p=[],s={toc:p},u="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(u,(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":[\n {\n "chromosome":"1",\n "begin":1595369,\n "end":1612441,\n "variantType": "copy_number_variation",\n "id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",\n "allAn": 5008,\n "allAc": 2702,\n "allAf": 0.539537,\n "afrAf": 0.6052,\n "amrAf": 0.3675,\n "eurAf": 0.5357,\n "easAf": 0.5368,\n "sasAf": 0.5797,\n "reciprocalOverlap": 0.07555\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"range: 0 - 1.")))))}m.isMDXComponent=!0},7234:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>p,default:()=>c,frontMatter:()=>o,metadata:()=>s,toc:()=>u});var a=n(7462),r=(n(7294),n(3905)),l=n(1888),i=n(2590);const o={title:"1000 Genomes"},p=void 0,s={unversionedId:"data-sources/1000Genomes",id:"data-sources/1000Genomes",title:"1000 Genomes",description:"Overview",source:"@site/docs/data-sources/1000Genomes.mdx",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/1000Genomes.mdx",tags:[],version:"current",frontMatter:{title:"1000 Genomes"},sidebar:"docs",previous:{title:"Getting Started",permalink:"/IlluminaConnectedAnnotationsDocumentation/introduction/getting-started"},next:{title:"Amino Acid Conservation",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/amino-acid-conservation"}},u=[{value:"Overview",id:"overview",children:[],level:2},{value:"Populations",id:"populations",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF File Parsing",id:"vcf-file-parsing",children:[{value:"Conflict Resolution",id:"conflict-resolution",children:[],level:4}],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2},{value:"Structural Variants",id:"structural-variants",children:[{value:"VCF File Parsing",id:"vcf-file-parsing-1",children:[],level:3},{value:"Converting VCF svTypes to SO sequence alterations",id:"converting-vcf-svtypes-to-so-sequence-alterations",children:[{value:"Exceptions",id:"exceptions",children:[],level:4}],level:3}],level:2},{value:"JSON Output",id:"json-output-1",children:[],level:2}],m={toc:u},d="wrapper";function c(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"The goal of the 1000 Genomes Project was to find most genetic variants with frequencies of at least 1% in the populations studied. It was the first project to sequence the genomes of a large number of people, to provide a comprehensive resource on human genetic variation. Data from the 1000 Genomes Project was quickly made available to the worldwide scientific community through freely accessible public databases."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Sudmant, P., Rausch, T., Gardner, E. et al. An integrated map of structural variation in 2,504 human genomes. ",(0,r.kt)("em",{parentName:"p"},"Nature 526"),", 75\u201381 (2015). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1038/nature15394"},"https://doi.org/10.1038/nature15394")))),(0,r.kt)("h2",{id:"populations"},"Populations"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"The super population membership can be found here: (",(0,r.kt)("a",{parentName:"li",href:"http://www.1000genomes.org/category/population/"},"http://www.1000genomes.org/category/population/"),")"),(0,r.kt)("li",{parentName:"ul"},"We want to capture the allele frequencies for all 26 populations as well as the 5 super populations and the total population.")),(0,r.kt)("h2",{id:"small-variants"},"Small Variants"),(0,r.kt)("h3",{id:"vcf-file-parsing"},"VCF File Parsing"),(0,r.kt)("p",null,"The original VCF files come with allele frequency fields (e.g. ALL_AF, AMR_AF) but we recompute them using allele counts and allele numbers in order to get 6 digit precision. The allele counts and allele numbers (e.g. AMR_AC, AMR_AN) are not expressed in the INFO field. Instead the genotypes need to be parsed to compute that information. Our team converted the original data to VCF entries with allele counts and allele numbers like the following."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 15274 rs62636497 A G,T 100 PASS AC=1739,3210;AF=0.347244,0.640974;AN=5008;NS=2504;DP=23255;EAS_AF=0.4812,0.5188;AMR_AF=0.2752,0.7205;AFR_AF=0.323,0.6369;EUR_AF=0.2922,0.7078;SAS_AF=0.3497,0.6472;AA=g|||;VT=SNP;MULTI_ALLELIC;EAS_AN=1008;EAS_AC=485,523;EUR_AN=1006;EUR_AC=294,712;AFR_AN=1322;AFR_AC=427,842;AMR_AN=694;AMR_AC=191,500;SAS_AN=978;SAS_AC=342,633\n")),(0,r.kt)("p",null,"The ancestral allele, if it exists, is the first value in the pipe separated AA fields (the Indel specific REF, ALT, IndelType fields are ignored)."),(0,r.kt)("p",null,"We parse the VCF file and extract the following fields from INFO:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"AA"),(0,r.kt)("li",{parentName:"ul"},"AC"),(0,r.kt)("li",{parentName:"ul"},"AN"),(0,r.kt)("li",{parentName:"ul"},"EAS_AN"),(0,r.kt)("li",{parentName:"ul"},"AMR_AN"),(0,r.kt)("li",{parentName:"ul"},"AFR_AN"),(0,r.kt)("li",{parentName:"ul"},"EUR_AN"),(0,r.kt)("li",{parentName:"ul"},"SAS_AN"),(0,r.kt)("li",{parentName:"ul"},"EAS_AC"),(0,r.kt)("li",{parentName:"ul"},"AMR_AC"),(0,r.kt)("li",{parentName:"ul"},"AFR_AC"),(0,r.kt)("li",{parentName:"ul"},"EUR_AC"),(0,r.kt)("li",{parentName:"ul"},"SAS_AC")),(0,r.kt)("h4",{id:"conflict-resolution"},"Conflict Resolution"),(0,r.kt)("p",null,"We have observed conflicting allele frequency information in the source. Take the following example:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 20505705 rs35377696 C CTCTG,CTG,CTGTG 100 PASS AC=46,1513,152;AF=0.0091853,0.302117,0.0303514;\n1 20505705 rs35377696 C CTG 100 PASS AC=4;AF=0.000798722;\n")),(0,r.kt)("p",null,"That is, the variant 1-20505705-C-CTG has conflicting entries. To get an idea of how frequently we observe this, here is a table summarizing ChrX and all chromosomes. Note that almost all such entries are found in ChrX."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"center"},"Chromosome"),(0,r.kt)("th",{parentName:"tr",align:"left"},"#"," of alleles"),(0,r.kt)("th",{parentName:"tr",align:"center"},"#"," of conflicting alleles"),(0,r.kt)("th",{parentName:"tr",align:"left"},"percentage"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"chrX"),(0,r.kt)("td",{parentName:"tr",align:"left"},"834800"),(0,r.kt)("td",{parentName:"tr",align:"center"},"2733"),(0,r.kt)("td",{parentName:"tr",align:"left"},"0.33%")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"Total"),(0,r.kt)("td",{parentName:"tr",align:"left"},"21413098"),(0,r.kt)("td",{parentName:"tr",align:"center"},"2743"),(0,r.kt)("td",{parentName:"tr",align:"left"},"0.013%")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Currently"),", we removed the allele frequency of the conflicting allele (i.e., insertion TG in the example) but keep allele frequencies of all other alleles in the VCF line."),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Potential Alternate Solutions")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"Remove all alleles that are contained in the vcf lines which have conflicting allele. (Recommended by 1000 genome group Holly Zheng-Bradley, 7/29/2015)"),(0,r.kt)("li",{parentName:"ul"},"Recalculate the allele frequency for the conflicting allele."),(0,r.kt)("li",{parentName:"ul"},"Pick the allele frequency that has the highest data support.")),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/"},"GRCh37"),"\n",(0,r.kt)("a",{parentName:"p",href:"http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/"},"GRCh38")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(l.default,{mdxType:"JSONSNV"}),(0,r.kt)("h2",{id:"structural-variants"},"Structural Variants"),(0,r.kt)("h3",{id:"vcf-file-parsing-1"},"VCF File Parsing"),(0,r.kt)("p",null,"The VCF files contain entries like the following:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103\n22 16050654 esv3647175;esv3647176;esv3647177;esv3647178 A ,,, 100 PASS AC=9,87,599,20;AF=0.00179712,0.0173722,0.119609,0.00399361;AN=5008;CS=DUP_gs;END=16063474;NS=2504;SVTYPE=CNV;DP=22545;EAS_AF=0.001,0.0169,0.2361,0.0099;AMR_AF=0,0.0101,0.219,0.0072;AFR_AF=0.0061,0.0363,0.0053,0;EUR_AF=0,0.007,0.0944,0.003;SAS_AF=0,0.0082,0.1094,0.002;VT=SV GT 3|0 0|0 0|0 0|0 0|0 0|0 0|4\n")),(0,r.kt)("p",null,"Please note that, CNVs are allele-specific. For example, HG00096 is effectively copy number 4, which would be a net gain on chr22."),(0,r.kt)("p",null,"1000 Genomes contains 5 types of structural variants:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"CNV"),(0,r.kt)("li",{parentName:"ul"},"DEL"),(0,r.kt)("li",{parentName:"ul"},"DUP"),(0,r.kt)("li",{parentName:"ul"},"INS"),(0,r.kt)("li",{parentName:"ul"},"INV")),(0,r.kt)("p",null,"Since data of 1000 genomes is provided in VCF format, we assume that the coordinates follow the vcf format, i.e., there is a padding base for symbolic alleles. So all the interval can be interpreted as ","[BEGIN+1, END]",".\nSimilarly, for all other variant types except insertion, END is far larger than BEGIN. The distribution of BEGIN and END for insertions is summarized below."),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Insertion issues")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"END = BEGIN for 6/165"),(0,r.kt)("li",{parentName:"ul"},"END = BEGIN+2 for 93/165"),(0,r.kt)("li",{parentName:"ul"},"END = BEGIN+3 for 11/165"),(0,r.kt)("li",{parentName:"ul"},"END = BEGIN+4 for 11/165"),(0,r.kt)("li",{parentName:"ul"},"END \u2013 BEGIN range from 5 to 1156 for others.")),(0,r.kt)("h3",{id:"converting-vcf-svtypes-to-so-sequence-alterations"},"Converting VCF svTypes to SO sequence alterations"),(0,r.kt)("p",null,"The svType will be captured in our JSON file under the ",(0,r.kt)("a",{parentName:"p",href:"http://www.sequenceontology.org/browser/current_svn/term/SO:0001059"},"sequenceAlteration")," key. Here's the translation we'll use according to svType in 1000 Genomes."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"svType"),(0,r.kt)("th",{parentName:"tr",align:null},"Alternative Alleles contain "),(0,r.kt)("th",{parentName:"tr",align:null},"sequenceAlteration"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"ALU"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"DUP"),(0,r.kt)("td",{parentName:"tr",align:null},"TRUE"),(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_gain")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"CNV"),(0,r.kt)("td",{parentName:"tr",align:null},"TRUE"),(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_gain (observed_gains >0 and observed_losses =0) ",(0,r.kt)("br",null),"copy_number_loss\xa0(observed_gains = 0 and observed_losses > 0) ",(0,r.kt)("br",null),"copy_number_variation (otherwise)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"DEL"),(0,r.kt)("td",{parentName:"tr",align:null},"TRUE"),(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_loss")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"LINE1"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"SVA"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"INV"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"inversion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"INS"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"insertion")))),(0,r.kt)("h4",{id:"exceptions"},"Exceptions"),(0,r.kt)("p",null,(0,r.kt)("em",{parentName:"p"},"We discard structural variants without END")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103\n21 9495848 esv3646347 A 100 PASS AC=1543;AF=0.308107;AN=5008;CS=L1_umary;MEINFO=LINE1,5669,6005,+;NS=2504;SVLEN=336;SVTYPE=LINE1;TSD=null;DP=20015;EAS_AF=0.3125;AMR_AF=0.2911;AFR_AF=0.3026;EUR_AF=0.2922;SAS_AF=0.3395;VT=SV GT 0|0 1|1 1|0 0|1 1|0 1|0 0|0\n")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"CNVs in chrY")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"No other types of structural variants exist in chrY"),(0,r.kt)("li",{parentName:"ul"},'Since copy number is provided in genotype field, we directly parse the copy number from "CN" field.'),(0,r.kt)("li",{parentName:"ul"},"For most CNVs in chrY, the reference copy number is 1, but the refence number for CNVs in segmental duplication sites is 2 ("," in the 2nd example). All segmental duplication calls have identifiers starting with GS_SD_M2.")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00101 HG00103 HG00105 HG00107 HG00108\nY 2888555 CNV_Y_2888555_3014661 T 100 PASS AC=1;AF=0.000817661;AN=1223;END=3014661;NS=1233;SVTYPE=CNV;AMR_AF=0.0000;AFR_AF=0.0000;EUR_AF=0.0000;SAS_AF=0.0019;EAS_AF=0.0000;VT=SV GT:CN:CNL:CNP:CNQ:GP:GQ:PL 0:1:-1000,0,-58.45:-1000,0,-61.55:99:0,-61.55:99:0,585 0:1:-296.36,0,-16.6:-300.46,0,-19.7:99:0,-19.7:99:0,166 0:1:-1000,0,-39.44:-1000,0,-42.54:99:0,-42.54:99:0,394\nY 6128381 GS_SD_M2_Y_6128381_6230094_Y_9650284_9752225 C , 100 PASS AC=4,2;AF=0.00327065,0.00163532;AN=1223;END=6230094;NS=1233;SVTYPE=CNV;AMR_AF=0.0029,0.0029;AFR_AF=0.0016,0.0016;EUR_AF=0.0000,0.0000;SAS_AF=0.0038,0.0000;EAS_AF=0.0000,0.0000;VT=SV;EX_TARGET GT:CN:CNL:CNP:CNQ:GP:GQ 0:2:-1000,-138.78,0,-38.53:-1000,-141.27,0,-41.33:99:0,-141.27,-41.33:99 0:2:-1000,-53.32,0,-17.85:-1000,-55.81,0,-20.64:99:0,-55.81,-20.64:99 0:2:-1000,-71.83,0,-32.5:-1000,-74.32,0,-35.29:99:0,-74.32,-35.29:99 0:2:-1000,-60.96,0,-20.29:-1000,-63.45,0,-23.08:99:0,-63.45,-23.08:99 0:2:-1000,-77.6,0,-31.45:-1000,-80.09,0,-34.24:99:0,-80.09,-34.24:99\n")),(0,r.kt)("h2",{id:"json-output-1"},"JSON Output"),(0,r.kt)(i.default,{mdxType:"JSONSV"}))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/a9ecceb6.b6d287a4.js b/assets/js/a9ecceb6.b6d287a4.js deleted file mode 100644 index 9e8ccb9c..00000000 --- a/assets/js/a9ecceb6.b6d287a4.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4203,4648,6602],{3905:(e,t,n)=>{n.d(t,{Zo:()=>u,kt:()=>N});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var p=a.createContext({}),s=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},u=function(e){var t=s(e.components);return a.createElement(p.Provider,{value:t},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},c=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,l=e.originalType,p=e.parentName,u=o(e,["components","mdxType","originalType","parentName"]),m=s(n),c=r,N=m["".concat(p,".").concat(c)]||m[c]||d[c]||l;return n?a.createElement(N,i(i({ref:t},u),{},{components:n})):a.createElement(N,i({ref:t},u))}));function N(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=n.length,i=new Array(l);i[0]=c;var o={};for(var p in t)hasOwnProperty.call(t,p)&&(o[p]=t[p]);o.originalType=e,o[m]="string"==typeof e?e:r,i[1]=o;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},i=void 0,o={unversionedId:"data-sources/1000Genomes-snv-json",id:"data-sources/1000Genomes-snv-json",title:"1000Genomes-snv-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/1000Genomes-snv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-snv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes-snv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/1000Genomes-snv-json.md",tags:[],version:"current",frontMatter:{}},p=[],s={toc:p},u="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(u,(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":{\n "allAf":0.200879,\n "afrAf":0.210287,\n "amrAf":0.139769,\n "easAf":0.275794,\n "eurAf":0.181909,\n "sasAf":0.173824,\n "allAn":5008,\n "afrAn":1322,\n "amrAn":694,\n "easAn":1008,\n "eurAn":1006,\n "sasAn":978,\n "allAc":1006,\n "afrAc":278,\n "amrAc":97,\n "easAc":278,\n "eurAc":183,\n "sasAc":170\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the African super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the African super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the Ad Mixed American super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the Ad Mixed American super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the East Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the East Asian super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the European super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the European super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the South Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the South Asian super population. Non-zero integer.")))))}m.isMDXComponent=!0},92590:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},i=void 0,o={unversionedId:"data-sources/1000Genomes-sv-json",id:"data-sources/1000Genomes-sv-json",title:"1000Genomes-sv-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/1000Genomes-sv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-sv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes-sv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/1000Genomes-sv-json.md",tags:[],version:"current",frontMatter:{}},p=[],s={toc:p},u="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(u,(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":[\n {\n "chromosome":"1",\n "begin":1595369,\n "end":1612441,\n "variantType": "copy_number_variation",\n "id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",\n "allAn": 5008,\n "allAc": 2702,\n "allAf": 0.539537,\n "afrAf": 0.6052,\n "amrAf": 0.3675,\n "eurAf": 0.5357,\n "easAf": 0.5368,\n "sasAf": 0.5797,\n "reciprocalOverlap": 0.07555\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"range: 0 - 1.")))))}m.isMDXComponent=!0},7234:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>p,default:()=>c,frontMatter:()=>o,metadata:()=>s,toc:()=>u});var a=n(87462),r=(n(67294),n(3905)),l=n(41888),i=n(92590);const o={title:"1000 Genomes"},p=void 0,s={unversionedId:"data-sources/1000Genomes",id:"data-sources/1000Genomes",title:"1000 Genomes",description:"Overview",source:"@site/docs/data-sources/1000Genomes.mdx",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/1000Genomes.mdx",tags:[],version:"current",frontMatter:{title:"1000 Genomes"},sidebar:"docs",previous:{title:"Getting Started",permalink:"/IlluminaConnectedAnnotationsDocumentation/introduction/getting-started"},next:{title:"Amino Acid Conservation",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/amino-acid-conservation"}},u=[{value:"Overview",id:"overview",children:[],level:2},{value:"Populations",id:"populations",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF File Parsing",id:"vcf-file-parsing",children:[{value:"Conflict Resolution",id:"conflict-resolution",children:[],level:4}],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2},{value:"Structural Variants",id:"structural-variants",children:[{value:"VCF File Parsing",id:"vcf-file-parsing-1",children:[],level:3},{value:"Converting VCF svTypes to SO sequence alterations",id:"converting-vcf-svtypes-to-so-sequence-alterations",children:[{value:"Exceptions",id:"exceptions",children:[],level:4}],level:3}],level:2},{value:"JSON Output",id:"json-output-1",children:[],level:2}],m={toc:u},d="wrapper";function c(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"The goal of the 1000 Genomes Project was to find most genetic variants with frequencies of at least 1% in the populations studied. It was the first project to sequence the genomes of a large number of people, to provide a comprehensive resource on human genetic variation. Data from the 1000 Genomes Project was quickly made available to the worldwide scientific community through freely accessible public databases."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Sudmant, P., Rausch, T., Gardner, E. et al. An integrated map of structural variation in 2,504 human genomes. ",(0,r.kt)("em",{parentName:"p"},"Nature 526"),", 75\u201381 (2015). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1038/nature15394"},"https://doi.org/10.1038/nature15394")))),(0,r.kt)("h2",{id:"populations"},"Populations"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"The super population membership can be found here: (",(0,r.kt)("a",{parentName:"li",href:"http://www.1000genomes.org/category/population/"},"http://www.1000genomes.org/category/population/"),")"),(0,r.kt)("li",{parentName:"ul"},"We want to capture the allele frequencies for all 26 populations as well as the 5 super populations and the total population.")),(0,r.kt)("h2",{id:"small-variants"},"Small Variants"),(0,r.kt)("h3",{id:"vcf-file-parsing"},"VCF File Parsing"),(0,r.kt)("p",null,"The original VCF files come with allele frequency fields (e.g. ALL_AF, AMR_AF) but we recompute them using allele counts and allele numbers in order to get 6 digit precision. The allele counts and allele numbers (e.g. AMR_AC, AMR_AN) are not expressed in the INFO field. Instead the genotypes need to be parsed to compute that information. Our team converted the original data to VCF entries with allele counts and allele numbers like the following."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 15274 rs62636497 A G,T 100 PASS AC=1739,3210;AF=0.347244,0.640974;AN=5008;NS=2504;DP=23255;EAS_AF=0.4812,0.5188;AMR_AF=0.2752,0.7205;AFR_AF=0.323,0.6369;EUR_AF=0.2922,0.7078;SAS_AF=0.3497,0.6472;AA=g|||;VT=SNP;MULTI_ALLELIC;EAS_AN=1008;EAS_AC=485,523;EUR_AN=1006;EUR_AC=294,712;AFR_AN=1322;AFR_AC=427,842;AMR_AN=694;AMR_AC=191,500;SAS_AN=978;SAS_AC=342,633\n")),(0,r.kt)("p",null,"The ancestral allele, if it exists, is the first value in the pipe separated AA fields (the Indel specific REF, ALT, IndelType fields are ignored)."),(0,r.kt)("p",null,"We parse the VCF file and extract the following fields from INFO:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"AA"),(0,r.kt)("li",{parentName:"ul"},"AC"),(0,r.kt)("li",{parentName:"ul"},"AN"),(0,r.kt)("li",{parentName:"ul"},"EAS_AN"),(0,r.kt)("li",{parentName:"ul"},"AMR_AN"),(0,r.kt)("li",{parentName:"ul"},"AFR_AN"),(0,r.kt)("li",{parentName:"ul"},"EUR_AN"),(0,r.kt)("li",{parentName:"ul"},"SAS_AN"),(0,r.kt)("li",{parentName:"ul"},"EAS_AC"),(0,r.kt)("li",{parentName:"ul"},"AMR_AC"),(0,r.kt)("li",{parentName:"ul"},"AFR_AC"),(0,r.kt)("li",{parentName:"ul"},"EUR_AC"),(0,r.kt)("li",{parentName:"ul"},"SAS_AC")),(0,r.kt)("h4",{id:"conflict-resolution"},"Conflict Resolution"),(0,r.kt)("p",null,"We have observed conflicting allele frequency information in the source. Take the following example:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 20505705 rs35377696 C CTCTG,CTG,CTGTG 100 PASS AC=46,1513,152;AF=0.0091853,0.302117,0.0303514;\n1 20505705 rs35377696 C CTG 100 PASS AC=4;AF=0.000798722;\n")),(0,r.kt)("p",null,"That is, the variant 1-20505705-C-CTG has conflicting entries. To get an idea of how frequently we observe this, here is a table summarizing ChrX and all chromosomes. Note that almost all such entries are found in ChrX."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"center"},"Chromosome"),(0,r.kt)("th",{parentName:"tr",align:"left"},"#"," of alleles"),(0,r.kt)("th",{parentName:"tr",align:"center"},"#"," of conflicting alleles"),(0,r.kt)("th",{parentName:"tr",align:"left"},"percentage"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"chrX"),(0,r.kt)("td",{parentName:"tr",align:"left"},"834800"),(0,r.kt)("td",{parentName:"tr",align:"center"},"2733"),(0,r.kt)("td",{parentName:"tr",align:"left"},"0.33%")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"Total"),(0,r.kt)("td",{parentName:"tr",align:"left"},"21413098"),(0,r.kt)("td",{parentName:"tr",align:"center"},"2743"),(0,r.kt)("td",{parentName:"tr",align:"left"},"0.013%")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Currently"),", we removed the allele frequency of the conflicting allele (i.e., insertion TG in the example) but keep allele frequencies of all other alleles in the VCF line."),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Potential Alternate Solutions")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"Remove all alleles that are contained in the vcf lines which have conflicting allele. (Recommended by 1000 genome group Holly Zheng-Bradley, 7/29/2015)"),(0,r.kt)("li",{parentName:"ul"},"Recalculate the allele frequency for the conflicting allele."),(0,r.kt)("li",{parentName:"ul"},"Pick the allele frequency that has the highest data support.")),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/"},"GRCh37"),"\n",(0,r.kt)("a",{parentName:"p",href:"http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/"},"GRCh38")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(l.default,{mdxType:"JSONSNV"}),(0,r.kt)("h2",{id:"structural-variants"},"Structural Variants"),(0,r.kt)("h3",{id:"vcf-file-parsing-1"},"VCF File Parsing"),(0,r.kt)("p",null,"The VCF files contain entries like the following:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103\n22 16050654 esv3647175;esv3647176;esv3647177;esv3647178 A ,,, 100 PASS AC=9,87,599,20;AF=0.00179712,0.0173722,0.119609,0.00399361;AN=5008;CS=DUP_gs;END=16063474;NS=2504;SVTYPE=CNV;DP=22545;EAS_AF=0.001,0.0169,0.2361,0.0099;AMR_AF=0,0.0101,0.219,0.0072;AFR_AF=0.0061,0.0363,0.0053,0;EUR_AF=0,0.007,0.0944,0.003;SAS_AF=0,0.0082,0.1094,0.002;VT=SV GT 3|0 0|0 0|0 0|0 0|0 0|0 0|4\n")),(0,r.kt)("p",null,"Please note that, CNVs are allele-specific. For example, HG00096 is effectively copy number 4, which would be a net gain on chr22."),(0,r.kt)("p",null,"1000 Genomes contains 5 types of structural variants:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"CNV"),(0,r.kt)("li",{parentName:"ul"},"DEL"),(0,r.kt)("li",{parentName:"ul"},"DUP"),(0,r.kt)("li",{parentName:"ul"},"INS"),(0,r.kt)("li",{parentName:"ul"},"INV")),(0,r.kt)("p",null,"Since data of 1000 genomes is provided in VCF format, we assume that the coordinates follow the vcf format, i.e., there is a padding base for symbolic alleles. So all the interval can be interpreted as ","[BEGIN+1, END]",".\nSimilarly, for all other variant types except insertion, END is far larger than BEGIN. The distribution of BEGIN and END for insertions is summarized below."),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Insertion issues")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"END = BEGIN for 6/165"),(0,r.kt)("li",{parentName:"ul"},"END = BEGIN+2 for 93/165"),(0,r.kt)("li",{parentName:"ul"},"END = BEGIN+3 for 11/165"),(0,r.kt)("li",{parentName:"ul"},"END = BEGIN+4 for 11/165"),(0,r.kt)("li",{parentName:"ul"},"END \u2013 BEGIN range from 5 to 1156 for others.")),(0,r.kt)("h3",{id:"converting-vcf-svtypes-to-so-sequence-alterations"},"Converting VCF svTypes to SO sequence alterations"),(0,r.kt)("p",null,"The svType will be captured in our JSON file under the ",(0,r.kt)("a",{parentName:"p",href:"http://www.sequenceontology.org/browser/current_svn/term/SO:0001059"},"sequenceAlteration")," key. Here's the translation we'll use according to svType in 1000 Genomes."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"svType"),(0,r.kt)("th",{parentName:"tr",align:null},"Alternative Alleles contain "),(0,r.kt)("th",{parentName:"tr",align:null},"sequenceAlteration"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"ALU"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"DUP"),(0,r.kt)("td",{parentName:"tr",align:null},"TRUE"),(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_gain")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"CNV"),(0,r.kt)("td",{parentName:"tr",align:null},"TRUE"),(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_gain (observed_gains >0 and observed_losses =0) ",(0,r.kt)("br",null),"copy_number_loss\xa0(observed_gains = 0 and observed_losses > 0) ",(0,r.kt)("br",null),"copy_number_variation (otherwise)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"DEL"),(0,r.kt)("td",{parentName:"tr",align:null},"TRUE"),(0,r.kt)("td",{parentName:"tr",align:null},"copy_number_loss")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"LINE1"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"SVA"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"INV"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"inversion")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"INS"),(0,r.kt)("td",{parentName:"tr",align:null},"FALSE"),(0,r.kt)("td",{parentName:"tr",align:null},"insertion")))),(0,r.kt)("h4",{id:"exceptions"},"Exceptions"),(0,r.kt)("p",null,(0,r.kt)("em",{parentName:"p"},"We discard structural variants without END")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00097 HG00099 HG00100 HG00101 HG00102 HG00103\n21 9495848 esv3646347 A 100 PASS AC=1543;AF=0.308107;AN=5008;CS=L1_umary;MEINFO=LINE1,5669,6005,+;NS=2504;SVLEN=336;SVTYPE=LINE1;TSD=null;DP=20015;EAS_AF=0.3125;AMR_AF=0.2911;AFR_AF=0.3026;EUR_AF=0.2922;SAS_AF=0.3395;VT=SV GT 0|0 1|1 1|0 0|1 1|0 1|0 0|0\n")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"CNVs in chrY")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"No other types of structural variants exist in chrY"),(0,r.kt)("li",{parentName:"ul"},'Since copy number is provided in genotype field, we directly parse the copy number from "CN" field.'),(0,r.kt)("li",{parentName:"ul"},"For most CNVs in chrY, the reference copy number is 1, but the refence number for CNVs in segmental duplication sites is 2 ("," in the 2nd example). All segmental duplication calls have identifiers starting with GS_SD_M2.")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 HG00101 HG00103 HG00105 HG00107 HG00108\nY 2888555 CNV_Y_2888555_3014661 T 100 PASS AC=1;AF=0.000817661;AN=1223;END=3014661;NS=1233;SVTYPE=CNV;AMR_AF=0.0000;AFR_AF=0.0000;EUR_AF=0.0000;SAS_AF=0.0019;EAS_AF=0.0000;VT=SV GT:CN:CNL:CNP:CNQ:GP:GQ:PL 0:1:-1000,0,-58.45:-1000,0,-61.55:99:0,-61.55:99:0,585 0:1:-296.36,0,-16.6:-300.46,0,-19.7:99:0,-19.7:99:0,166 0:1:-1000,0,-39.44:-1000,0,-42.54:99:0,-42.54:99:0,394\nY 6128381 GS_SD_M2_Y_6128381_6230094_Y_9650284_9752225 C , 100 PASS AC=4,2;AF=0.00327065,0.00163532;AN=1223;END=6230094;NS=1233;SVTYPE=CNV;AMR_AF=0.0029,0.0029;AFR_AF=0.0016,0.0016;EUR_AF=0.0000,0.0000;SAS_AF=0.0038,0.0000;EAS_AF=0.0000,0.0000;VT=SV;EX_TARGET GT:CN:CNL:CNP:CNQ:GP:GQ 0:2:-1000,-138.78,0,-38.53:-1000,-141.27,0,-41.33:99:0,-141.27,-41.33:99 0:2:-1000,-53.32,0,-17.85:-1000,-55.81,0,-20.64:99:0,-55.81,-20.64:99 0:2:-1000,-71.83,0,-32.5:-1000,-74.32,0,-35.29:99:0,-74.32,-35.29:99 0:2:-1000,-60.96,0,-20.29:-1000,-63.45,0,-23.08:99:0,-63.45,-23.08:99 0:2:-1000,-77.6,0,-31.45:-1000,-80.09,0,-34.24:99:0,-80.09,-34.24:99\n")),(0,r.kt)("h2",{id:"json-output-1"},"JSON Output"),(0,r.kt)(i.default,{mdxType:"JSONSV"}))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/aa30c44a.24d963b2.js b/assets/js/aa30c44a.24d963b2.js deleted file mode 100644 index b0a86c3f..00000000 --- a/assets/js/aa30c44a.24d963b2.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7751],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>g});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var p=a.createContext({}),u=function(t){var e=a.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},m=function(t){var e=u(t.components);return a.createElement(p.Provider,{value:e},t.children)},s="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},c=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),s=u(n),c=r,g=s["".concat(p,".").concat(c)]||s[c]||d[c]||l;return n?a.createElement(g,o(o({ref:e},m),{},{components:n})):a.createElement(g,o({ref:e},m))}));function g(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=c;var i={};for(var p in e)hasOwnProperty.call(e,p)&&(i[p]=e[p]);i.originalType=t,i[s]="string"==typeof t?t:r,o[1]=i;for(var u=2;u{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>s,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/1000Genomes-sv-json",id:"version-3.14/data-sources/1000Genomes-sv-json",title:"1000Genomes-sv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/1000Genomes-sv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-sv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/1000Genomes-sv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/1000Genomes-sv-json.md",tags:[],version:"3.14",frontMatter:{}},p=[],u={toc:p},m="wrapper";function s(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},u,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":[\n {\n "chromosome":"1",\n "begin":1595369,\n "end":1612441,\n "variantType": "copy_number_variation",\n "id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",\n "allAn": 5008,\n "allAc": 2702,\n "allAf": 0.539537,\n "afrAf": 0.6052,\n "amrAf": 0.3675,\n "eurAf": 0.5357,\n "easAf": 0.5368,\n "sasAf": 0.5797,\n "reciprocalOverlap": 0.07555\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"range: 0 - 1.")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/ab2bd8d8.3c9c473f.js b/assets/js/ab2bd8d8.3c9c473f.js deleted file mode 100644 index cb2accb9..00000000 --- a/assets/js/ab2bd8d8.3c9c473f.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[104],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function o(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var p=r.createContext({}),u=function(e){var t=r.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},c=function(e){var t=u(e.components);return r.createElement(p.Provider,{value:t},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},s=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,l=e.originalType,p=e.parentName,c=i(e,["components","mdxType","originalType","parentName"]),d=u(n),s=a,f=d["".concat(p,".").concat(s)]||d[s]||m[s]||l;return n?r.createElement(f,o(o({ref:t},c),{},{components:n})):r.createElement(f,o({ref:t},c))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var l=n.length,o=new Array(l);o[0]=s;var i={};for(var p in t)hasOwnProperty.call(t,p)&&(i[p]=t[p]);i.originalType=e,i[d]="string"==typeof e?e:a,o[1]=i;for(var u=2;u{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var r=n(87462),a=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/topmed-json",id:"version-3.16/data-sources/topmed-json",title:"topmed-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/topmed-json.md",sourceDirName:"data-sources",slug:"/data-sources/topmed-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/topmed-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/topmed-json.md",tags:[],version:"3.16",frontMatter:{}},p=[],u={toc:p},c="wrapper";function d(e){let{components:t,...n}=e;return(0,a.kt)(c,(0,r.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"topmed":{ \n "allAc":20,\n "allAn":125568,\n "allAf":0.000159,\n "allHc":0,\n "failedFilter":true\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAc"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"TOPMed allele count")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAn"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"TOPMed allele number. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAf"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"TOPMed allele frequency (computed by Nirvana)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allHc"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"TOPMed homozygous count")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,a.kt)("td",{parentName:"tr",align:null},"bool"),(0,a.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/abda0f14.a29fb808.js b/assets/js/abda0f14.a29fb808.js new file mode 100644 index 00000000..719d26a5 --- /dev/null +++ b/assets/js/abda0f14.a29fb808.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[829],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>m});var a=n(7294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},p=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},g=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,p=o(e,["components","mdxType","originalType","parentName"]),u=c(n),g=i,m=u["".concat(s,".").concat(g)]||u[g]||d[g]||r;return n?a.createElement(m,l(l({ref:t},p),{},{components:n})):a.createElement(m,l({ref:t},p))}));function m(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,l=new Array(r);l[0]=g;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[u]="string"==typeof e?e:i,l[1]=o;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var a=n(7462),i=(n(7294),n(3905));const r={},l=void 0,o={unversionedId:"data-sources/clingen-dosage-json",id:"data-sources/clingen-dosage-json",title:"clingen-dosage-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/clingen-dosage-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-dosage-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-dosage-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clingen-dosage-json.md",tags:[],version:"current",frontMatter:{}},s=[],c={toc:s},p="wrapper";function u(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clingenDosageSensitivityMap": [{\n "chromosome": "15",\n "begin": 30900686,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 0.33994\n},\n{\n "chromosome": "15",\n "begin": 31727418,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "dosage sensitivity unlikely",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 1\n}]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Field"),(0,i.kt)("th",{parentName:"tr",align:null},"Type"),(0,i.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clingenDosageSensitivityMap"),(0,i.kt)("td",{parentName:"tr",align:null},"object array"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"begin"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"end"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"haploinsufficiency"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"triplosensitivity"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"(same as haploinsufficiency)\xa0")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"haploinsufficiency and triplosensitivity")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no evidence to suggest that dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"little evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"emerging evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"gene associated with autosomal recessive phenotype"),(0,i.kt)("li",{parentName:"ul"},"dosage sensitivity unlikely")))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/abda0f14.d93732fd.js b/assets/js/abda0f14.d93732fd.js deleted file mode 100644 index b52bcaa3..00000000 --- a/assets/js/abda0f14.d93732fd.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[829],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>m});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},p=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},g=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,p=o(e,["components","mdxType","originalType","parentName"]),u=c(n),g=i,m=u["".concat(s,".").concat(g)]||u[g]||d[g]||r;return n?a.createElement(m,l(l({ref:t},p),{},{components:n})):a.createElement(m,l({ref:t},p))}));function m(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,l=new Array(r);l[0]=g;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[u]="string"==typeof e?e:i,l[1]=o;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={},l=void 0,o={unversionedId:"data-sources/clingen-dosage-json",id:"data-sources/clingen-dosage-json",title:"clingen-dosage-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/clingen-dosage-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-dosage-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-dosage-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clingen-dosage-json.md",tags:[],version:"current",frontMatter:{}},s=[],c={toc:s},p="wrapper";function u(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clingenDosageSensitivityMap": [{\n "chromosome": "15",\n "begin": 30900686,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 0.33994\n},\n{\n "chromosome": "15",\n "begin": 31727418,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "dosage sensitivity unlikely",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 1\n}]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Field"),(0,i.kt)("th",{parentName:"tr",align:null},"Type"),(0,i.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clingenDosageSensitivityMap"),(0,i.kt)("td",{parentName:"tr",align:null},"object array"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"begin"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"end"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"haploinsufficiency"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"triplosensitivity"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"(same as haploinsufficiency)\xa0")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"haploinsufficiency and triplosensitivity")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no evidence to suggest that dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"little evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"emerging evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"gene associated with autosomal recessive phenotype"),(0,i.kt)("li",{parentName:"ul"},"dosage sensitivity unlikely")))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/ac509df2.f922ab82.js b/assets/js/ac509df2.f922ab82.js deleted file mode 100644 index 87934668..00000000 --- a/assets/js/ac509df2.f922ab82.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9496],{3905:(n,e,t)=>{t.d(e,{Zo:()=>d,kt:()=>u});var a=t(67294);function i(n,e,t){return e in n?Object.defineProperty(n,e,{value:t,enumerable:!0,configurable:!0,writable:!0}):n[e]=t,n}function o(n,e){var t=Object.keys(n);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(n);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(n,e).enumerable}))),t.push.apply(t,a)}return t}function r(n){for(var e=1;e=0||(i[t]=n[t]);return i}(n,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(n);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(n,t)&&(i[t]=n[t])}return i}var c=a.createContext({}),l=function(n){var e=a.useContext(c),t=e;return n&&(t="function"==typeof n?n(e):r(r({},e),n)),t},d=function(n){var e=l(n.components);return a.createElement(c.Provider,{value:e},n.children)},p="mdxType",g={inlineCode:"code",wrapper:function(n){var e=n.children;return a.createElement(a.Fragment,{},e)}},m=a.forwardRef((function(n,e){var t=n.components,i=n.mdxType,o=n.originalType,c=n.parentName,d=s(n,["components","mdxType","originalType","parentName"]),p=l(t),m=i,u=p["".concat(c,".").concat(m)]||p[m]||g[m]||o;return t?a.createElement(u,r(r({ref:e},d),{},{components:t})):a.createElement(u,r({ref:e},d))}));function u(n,e){var t=arguments,i=e&&e.mdxType;if("string"==typeof n||i){var o=t.length,r=new Array(o);r[0]=m;var s={};for(var c in e)hasOwnProperty.call(e,c)&&(s[c]=e[c]);s.originalType=n,s[p]="string"==typeof n?n:i,r[1]=s;for(var l=2;l{t.r(e),t.d(e,{contentTitle:()=>r,default:()=>p,frontMatter:()=>o,metadata:()=>s,toc:()=>c});var a=t(87462),i=(t(67294),t(3905));const o={title:"Parsing Nirvana JSON"},r=void 0,s={unversionedId:"introduction/parsing-json",id:"version-3.18/introduction/parsing-json",title:"Parsing Nirvana JSON",description:"Why JSON?",source:"@site/versioned_docs/version-3.18/introduction/parsing-json.md",sourceDirName:"introduction",slug:"/introduction/parsing-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/introduction/parsing-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/introduction/parsing-json.md",tags:[],version:"3.18",frontMatter:{title:"Parsing Nirvana JSON"},sidebar:"docs",previous:{title:"Getting Started",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/introduction/getting-started"},next:{title:"Annotating COVID-19",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/introduction/covid19"}},c=[{value:"Why JSON?",id:"why-json",children:[{value:"What do other annotators use?",id:"what-do-other-annotators-use",children:[],level:3},{value:"What do we gain by using JSON?",id:"what-do-we-gain-by-using-json",children:[],level:3}],level:2},{value:"Parsing JSON",id:"parsing-json",children:[{value:"Organization",id:"organization",children:[],level:3},{value:"JASIX",id:"jasix",children:[],level:3}],level:2}],l={toc:c},d="wrapper";function p(n){let{components:e,...o}=n;return(0,i.kt)(d,(0,a.Z)({},l,o,{components:e,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"why-json"},"Why JSON?"),(0,i.kt)("p",null,"VCF is a fantastic file format that was developed during the methods development activities within the 1000 Genomes Project. Prior to that, variant callers were outputting information into a variety of tab-delimited formats. Sometimes based on existing standards (like GFF), while most were proprietary. The primary intent of VCF files was to provide a human-readable, standardized representation of genetic variants. Similar to SAM/BAM files, VCF files used BCF files as their binary counterpart."),(0,i.kt)("p",null,"In the very beginning, Nirvana offered VCF output for annotation. While many variant annotators offer an option to output VCF files, one could argue if they are still human-readable. Here's an example from a VCF file produced by VEP v102:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre"},"chr3 107840527 . A ATTTTTTTTT,AT,ATTTTTTTT 153.51 PASS AN=6;MQ=244.10;\nSOR=1.739;QD=2.24;DP=57;AF=0.500,0.167,0.333;FS=0.000;AC=3,1,2;CSQ=TTTTTTTTT|\nintron_variant&non_coding_transcript_variant|MODIFIER|LINC00635|ENSG00000241469|\nTranscript|ENST00000608506.6|lncRNA||4/4|ENST00000608506.6:n.622-132_622-124dup|||||||\nrs35564779||-1||HGNC|HGNC:27184|||5|||||||||Ensembl|||||||||||||||||||||||||||||||||||\n|||||||||0.792|-0.109757,T|intron_variant&non_coding_transcript_variant|MODIFIER|\nLINC00635|ENSG00000241469|Transcript|ENST00000608506.6|lncRNA||4/4|\nENST00000608506.6:n.622-124dup|||||||rs35564779||-1||HGNC|HGNC:27184|||5|||||||||\nEnsembl||||||||||||||||||||||||||||||||||||||||||||0.932|-0.075622,TTTTTTTT|\nintron_variant&non_coding_transcript_variant|MODIFIER|LINC00635|ENSG00000241469|\nTranscript|ENST00000608506.6|lncRNA||4/4|ENST00000608506.6:n.622-131_622-124dup|||||||\nrs35564779||-1||HGNC|HGNC:27184|||5|||||||||Ensembl|||||||||||||||||||||||||||||||||||\n|||||||||0.808|-0.105490,TTTTTTTTT|intron_variant&non_coding_transcript_variant|\nMODIFIER|LINC00636|ENSG00000240423|Transcript|ENST00000649048.1|lncRNA||2/3|\nENST00000649048.1:n.179+5223_179+5231dup|||||||rs35564779||1||HGNC|HGNC:27702|||||||||\n|||Ensembl||||||||||||||||||||||||||||||||||||||||||||0.792|-0.109757, (etc.)\n")),(0,i.kt)("p",null,"Originally Nirvana used the same VCF notation as VEP uses above. The problem is that you end up with a large amount of text that is difficult to parse out by eye and requires the use of several delimiters to divide the information into useful segments. When we originally annotated this variant using VEP, ",(0,i.kt)("strong",{parentName:"p"},"this single variant used 488,909 bytes")," (almost \xbd MB). Surprisingly, we found that this broke some downstream tools that had preconceived notions of how long a single line could be in a VCF file."),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Whitespace is not allowed in the VCF INFO field. This means that if you wanted to express a gene description from OMIM: ",(0,i.kt)("strong",{parentName:"p"},'"HRAS PROTOONCOGENE, GTPase; HRAS"'),", you would need to replace the spaces with something else like an underline. You would also need to hope that the VCF parser correctly handles embedded commas and semicolons in the description."))),(0,i.kt)("h3",{id:"what-do-other-annotators-use"},"What do other annotators use?"),(0,i.kt)("p",null,"Unfortunately, file format standardization has not made it all the way to variant annotation yet. The ",(0,i.kt)("a",{parentName:"p",href:"https://ga4gh-gks.github.io/variant_annotation.html"},"GA4GH Annotation group")," had many discussions on the topic several years ago. While a set of JSON schemas were created in that effort, there wasn't enough momentum to make this a new standard."),(0,i.kt)("p",null,"While there is some overlap in general file formats (JSON vs VCF vs TSV), none of those are compatible with each other. I.e. the VCF representation in VEP and snpEff is different just like the JSON schemas used by VEP, Nirvana, and GA4GH are different."),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Source"),(0,i.kt)("th",{parentName:"tr",align:null},"Formats"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"VEP"),(0,i.kt)("td",{parentName:"tr",align:null},(0,i.kt)("strong",{parentName:"td"},"JSON"),", TSV, VCF")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"snpEff"),(0,i.kt)("td",{parentName:"tr",align:null},"VCF")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"Annovar"),(0,i.kt)("td",{parentName:"tr",align:null},"TSV")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"Nirvana"),(0,i.kt)("td",{parentName:"tr",align:null},(0,i.kt)("strong",{parentName:"td"},"JSON"))),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"GA4GH"),(0,i.kt)("td",{parentName:"tr",align:null},(0,i.kt)("strong",{parentName:"td"},"JSON"))))),(0,i.kt)("p",null,"We are interested in working together with others in the annotation space to develop a common annotation file format. Our belief is that this would accelerate methods development and benchmarking activities within annotation much in the same way the creation of SAM/BAM & VCF/BCF accelerated secondary analysis development."),(0,i.kt)("h3",{id:"what-do-we-gain-by-using-json"},"What do we gain by using JSON?"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"JSON files are better at showing hierarchical and other relational data. For example when we output ClinVar data, we often want to output several overlapping RCV entries (variants coupled with a disease phenotype). In each, we would want to output a list of phenotypes, clinical significance, etc. That is difficult to accomplish in a human-readable way using VCF files (without resorting to growing lexicon of delimiters)."),(0,i.kt)("li",{parentName:"ul"},"JSON files use JavaScript data types, while VCF INFO fields don't directly have data types. Instead, external metadata located in the VCF header is required to indicated the preferred data type."),(0,i.kt)("li",{parentName:"ul"},"JSON files are more verbose. Often this is seen as a negative, but compression largely compensates for this. Given the following excerpt from the VCF example above ",(0,i.kt)("inlineCode",{parentName:"li"},"HGNC:27184|||5|||||||||Ensembl")," it's not immediately obvious what the ",(0,i.kt)("inlineCode",{parentName:"li"},"5")," refers to (without checking the VCF header for details). With JSON files, you would always see a key name associated with a value."),(0,i.kt)("li",{parentName:"ul"},"JSON files can be natively imported into different search and analytics solutions like Elasticsearch and Snowflake."),(0,i.kt)("li",{parentName:"ul"},"JSON strings do not have any limitations on the use of whitespace.")),(0,i.kt)("h2",{id:"parsing-json"},"Parsing JSON"),(0,i.kt)("p",null,"Our JSON files are organized similarly to original VCF variants:"),(0,i.kt)("p",null,(0,i.kt)("img",{src:t(10886).Z})),(0,i.kt)("p",null,"Nirvana JSON files can get very large and sometimes we receive feedback that a bioinformatician tried to read the JSON file into Python or R resulting in a program that ran out of available RAM. This happens because those parsers try to load everything into memory all at once."),(0,i.kt)("p",null,"To get around those issues, we play some clever tricks with newlines that enables our users to parse our JSON files quickly and efficiently."),(0,i.kt)("h3",{id:"organization"},"Organization"),(0,i.kt)("p",null,"Our JSON file is arranged as follows:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the header section is located on the first line"),(0,i.kt)("li",{parentName:"ul"},"each line after that corresponds to a position (same as a row in a VCF file)",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"until you reach the genes section ",(0,i.kt)("inlineCode",{parentName:"li"},'],"genes":[')))),(0,i.kt)("li",{parentName:"ul"},"each line after that corresponds to a gene",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"until you reach the end ",(0,i.kt)("inlineCode",{parentName:"li"},"]}"))))),(0,i.kt)("p",null,"Knowing this, you can load each position line as an independent JSON object and extract the information you need. "),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Jupyter Notebook")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"To demonstrate this, we have put together a ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/Illumina/NirvanaDocumentation/blob/master/static/files/parse-nirvana-json-python.ipynb"},"Jupyter notebook demonstrating how to do this in Python")," and a ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/Illumina/NirvanaDocumentation/blob/master/static/files/parse-nirvana-json-r.ipynb"},"R version")," as well."))),(0,i.kt)("h3",{id:"jasix"},"JASIX"),(0,i.kt)("p",null,"One of the tools that we really like in the VCF ecosystem is ",(0,i.kt)("a",{parentName:"p",href:"https://dx.doi.org/10.1093%2Fbioinformatics%2Fbtq671"},"tabix"),". Unfortunately, tabix only works for tab-delimited file formats. As a result, we created a similar tool for Nirvana JSON files called JASIX."),(0,i.kt)("p",null,"Here's an example of how you might use JASIX:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/Jasix.dll -i dragen.json.gz -q chr1:942450-942455\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the Nirvana JSON path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-q")," argument specifies a genomic range ",(0,i.kt)("em",{parentName:"li"},"(you can use as many of these as you want)"))),(0,i.kt)("p",null,"JASIX also includes additional options for showing the Nirvana header or for extracting different sections (like the genes section)."),(0,i.kt)("p",null,"The output from JASIX is compliant JSON object shown in pretty-printed form:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{"positions":[\n{\n "chromosome": "chr1",\n "position": 942451,\n "refAllele": "T",\n "altAlleles": [\n "C"\n ],\n "quality": 484.23,\n "filters": [\n "PASS"\n ],\n "cytogeneticBand": "1p36.33",\n "samples": [\n {\n "genotype": "1/1",\n "variantFrequencies": [\n 1\n ],\n "totalDepth": 21,\n "genotypeQuality": 60,\n "alleleDepths": [\n 0,\n 21\n ]\n },\n {\n "genotype": "1/1",\n "variantFrequencies": [\n 1\n ],\n "totalDepth": 32,\n "genotypeQuality": 93,\n "alleleDepths": [\n 0,\n 32\n ]\n },\n {\n "genotype": "1/1",\n "variantFrequencies": [\n 1\n ],\n "totalDepth": 36,\n "genotypeQuality": 105,\n "alleleDepths": [\n 0,\n 36\n ]\n }\n ],\n "variants": [\n {\n "vid": "1-942451-T-C",\n "chromosome": "chr1",\n "begin": 942451,\n "end": 942451,\n "refAllele": "T",\n "altAllele": "C",\n "variantType": "SNV",\n "hgvsg": "NC_000001.11:g.942451T>C",\n "phylopScore": -0.1,\n "clinvar": [\n {\n "id": "VCV000836156.1",\n "reviewStatus": "criteria provided, single submitter",\n "significance": [\n "uncertain significance"\n ],\n "refAllele": "T",\n "altAllele": "T",\n "lastUpdatedDate": "2020-08-20"\n },\n {\n "id": "RCV001037211.1",\n "variationId": 836156,\n "reviewStatus": "criteria provided, single submitter",\n "alleleOrigins": [\n "germline"\n ],\n "refAllele": "T",\n "altAllele": "T",\n "phenotypes": [\n "not provided"\n ],\n "medGenIds": [\n "CN517202"\n ],\n "significance": [\n "uncertain significance"\n ],\n "lastUpdatedDate": "2020-08-20",\n "pubMedIds": [\n "28492532"\n ]\n }\n ],\n "dbsnp": [\n "rs6672356"\n ],\n "gnomad": {\n "coverage": 25,\n "allAf": 0.999855,\n "allAn": 123742,\n "allAc": 123724,\n "allHc": 61853,\n "afrAf": 0.999416,\n "afrAn": 10278,\n "afrAc": 10272,\n "afrHc": 5133,\n "amrAf": 0.99995,\n "amrAn": 20008,\n "amrAc": 20007,\n "amrHc": 10003,\n "easAf": 1,\n "easAn": 6054,\n "easAc": 6054,\n "easHc": 3027,\n "finAf": 1,\n "finAn": 8696,\n "finAc": 8696,\n "finHc": 4348,\n "nfeAf": 0.999899,\n "nfeAn": 49590,\n "nfeAc": 49585,\n "nfeHc": 24790,\n "asjAf": 1,\n "asjAn": 7208,\n "asjAc": 7208,\n "asjHc": 3604,\n "sasAf": 0.99967,\n "sasAn": 18160,\n "sasAc": 18154,\n "sasHc": 9074,\n "othAf": 1,\n "othAn": 3748,\n "othAc": 3748,\n "othHc": 1874,\n "maleAf": 0.9999,\n "maleAn": 69780,\n "maleAc": 69773,\n "maleHc": 34883,\n "femaleAf": 0.999796,\n "femaleAn": 53962,\n "femaleAc": 53951,\n "femaleHc": 26970,\n "controlsAllAf": 0.999815,\n "controlsAllAn": 48654,\n "controlsAllAc": 48645\n },\n "oneKg": {\n "allAf": 1,\n "afrAf": 1,\n "amrAf": 1,\n "easAf": 1,\n "eurAf": 1,\n "sasAf": 1,\n "allAn": 5008,\n "afrAn": 1322,\n "amrAn": 694,\n "easAn": 1008,\n "eurAn": 1006,\n "sasAn": 978,\n "allAc": 5008,\n "afrAc": 1322,\n "amrAc": 694,\n "easAc": 1008,\n "eurAc": 1006,\n "sasAc": 978\n },\n "primateAI": [\n {\n "hgnc": "SAMD11",\n "scorePercentile": 0.87\n }\n ],\n "revel": {\n "score": 0.145\n },\n "topmed": {\n "allAf": 0.999809,\n "allAn": 125568,\n "allAc": 125544,\n "allHc": 62760\n },\n "transcripts": [\n {\n "transcript": "ENST00000420190.6",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "downstream_gene_variant"\n ],\n "proteinId": "ENSP00000411579.2"\n },\n {\n "transcript": "ENST00000342066.7",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "1110",\n "cdsPos": "1027",\n "exons": "10/14",\n "proteinPos": "343",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000342066.7:c.1027T>C",\n "hgvsp": "ENSP00000342313.3:p.(Trp343Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000342313.3",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000618181.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "732",\n "cdsPos": "652",\n "exons": "7/11",\n "proteinPos": "218",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000618181.4:c.652T>C",\n "hgvsp": "ENSP00000480870.1:p.(Trp218Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000480870.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000622503.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "1110",\n "cdsPos": "1030",\n "exons": "10/14",\n "proteinPos": "344",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000622503.4:c.1030T>C",\n "hgvsp": "ENSP00000482138.1:p.(Trp344Arg)",\n "isCanonical": true,\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000482138.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000618323.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "cTg/cCg",\n "aminoAcids": "L/P",\n "cdnaPos": "712",\n "cdsPos": "632",\n "exons": "8/12",\n "proteinPos": "211",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000618323.4:c.632T>C",\n "hgvsp": "ENSP00000480678.1:p.(Leu211Pro)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "unknown",\n "proteinId": "ENSP00000480678.1",\n "siftScore": 0.03,\n "siftPrediction": "deleterious - low confidence"\n },\n {\n "transcript": "ENST00000616016.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "ccT/ccC",\n "aminoAcids": "P",\n "cdnaPos": "944",\n "cdsPos": "864",\n "exons": "9/13",\n "proteinPos": "288",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "synonymous_variant"\n ],\n "hgvsc": "ENST00000616016.4:c.864T>C",\n "hgvsp": "ENST00000616016.4:c.864T>C(p.(Pro288=))",\n "proteinId": "ENSP00000478421.1"\n },\n {\n "transcript": "ENST00000618779.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "921",\n "cdsPos": "841",\n "exons": "9/13",\n "proteinPos": "281",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000618779.4:c.841T>C",\n "hgvsp": "ENSP00000484256.1:p.(Trp281Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000484256.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000616125.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "783",\n "cdsPos": "703",\n "exons": "8/12",\n "proteinPos": "235",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000616125.4:c.703T>C",\n "hgvsp": "ENSP00000484643.1:p.(Trp235Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000484643.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000620200.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "cTg/cCg",\n "aminoAcids": "L/P",\n "cdnaPos": "427",\n "cdsPos": "347",\n "exons": "5/9",\n "proteinPos": "116",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000620200.4:c.347T>C",\n "hgvsp": "ENSP00000484820.1:p.(Leu116Pro)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "unknown",\n "proteinId": "ENSP00000484820.1",\n "siftScore": 0.16,\n "siftPrediction": "tolerated - low confidence"\n },\n {\n "transcript": "ENST00000617307.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "867",\n "cdsPos": "787",\n "exons": "9/13",\n "proteinPos": "263",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000617307.4:c.787T>C",\n "hgvsp": "ENSP00000482090.1:p.(Trp263Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000482090.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "NM_152486.2",\n "source": "RefSeq",\n "bioType": "protein_coding",\n "codons": "Cgg/Cgg",\n "aminoAcids": "R",\n "cdnaPos": "1107",\n "cdsPos": "1027",\n "exons": "10/14",\n "proteinPos": "343",\n "geneId": "148398",\n "hgnc": "SAMD11",\n "consequence": [\n "synonymous_variant"\n ],\n "hgvsc": "NM_152486.2:c.1027T>C",\n "hgvsp": "NM_152486.2:c.1027T>C(p.(Arg343=))",\n "isCanonical": true,\n "proteinId": "NP_689699.2"\n },\n {\n "transcript": "ENST00000341065.8",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "750",\n "cdsPos": "751",\n "exons": "8/12",\n "proteinPos": "251",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000341065.8:c.750T>C",\n "hgvsp": "ENSP00000349216.4:p.(Trp251Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000349216.4",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000455979.1",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "507",\n "cdsPos": "508",\n "exons": "4/7",\n "proteinPos": "170",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000455979.1:c.507T>C",\n "hgvsp": "ENSP00000412228.1:p.(Trp170Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000412228.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000478729.1",\n "source": "Ensembl",\n "bioType": "processed_transcript",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "downstream_gene_variant"\n ]\n },\n {\n "transcript": "ENST00000474461.1",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "cdnaPos": "389",\n "exons": "3/4",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "non_coding_transcript_exon_variant"\n ],\n "hgvsc": "ENST00000474461.1:n.389T>C"\n },\n {\n "transcript": "ENST00000466827.1",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "cdnaPos": "191",\n "exons": "2/2",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "non_coding_transcript_exon_variant"\n ],\n "hgvsc": "ENST00000466827.1:n.191T>C"\n },\n {\n "transcript": "ENST00000464948.1",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "cdnaPos": "286",\n "exons": "1/2",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "non_coding_transcript_exon_variant"\n ],\n "hgvsc": "ENST00000464948.1:n.286T>C"\n },\n {\n "transcript": "NM_015658.3",\n "source": "RefSeq",\n "bioType": "protein_coding",\n "geneId": "26155",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ],\n "isCanonical": true,\n "proteinId": "NP_056473.2"\n },\n {\n "transcript": "ENST00000483767.5",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "geneId": "ENSG00000188976",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ]\n },\n {\n "transcript": "ENST00000327044.6",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "geneId": "ENSG00000188976",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ],\n "isCanonical": true,\n "proteinId": "ENSP00000317992.6"\n },\n {\n "transcript": "ENST00000477976.5",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "geneId": "ENSG00000188976",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ]\n },\n {\n "transcript": "ENST00000496938.1",\n "source": "Ensembl",\n "bioType": "processed_transcript",\n "geneId": "ENSG00000188976",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ]\n }\n ]\n }\n ]\n}\n]}\n')))}p.isMDXComponent=!0},10886:(n,e,t)=>{t.d(e,{Z:()=>a});const a=t.p+"assets/images/JSON-Layout-fc8e5c0cf4c8428981cd206fe9b6feac.svg"}}]); \ No newline at end of file diff --git a/assets/js/ad17aed4.9dab17cc.js b/assets/js/ad17aed4.9dab17cc.js deleted file mode 100644 index 8b300390..00000000 --- a/assets/js/ad17aed4.9dab17cc.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7910,6132],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>u});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function r(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),m=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):r(r({},t),e)),n},p=function(e){var t=m(e.components);return a.createElement(s.Provider,{value:t},e.children)},d="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},h=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,o=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),d=m(n),h=i,u=d["".concat(s,".").concat(h)]||d[h]||c[h]||o;return n?a.createElement(u,r(r({ref:t},p),{},{components:n})):a.createElement(u,r({ref:t},p))}));function u(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var o=n.length,r=new Array(o);r[0]=h;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[d]="string"==typeof e?e:i,r[1]=l;for(var m=2;m{n.r(t),n.d(t,{contentTitle:()=>r,default:()=>d,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const o={},r=void 0,l={unversionedId:"data-sources/omim-json",id:"version-3.16/data-sources/omim-json",title:"omim-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/omim-json.md",sourceDirName:"data-sources",slug:"/data-sources/omim-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/omim-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/omim-json.md",tags:[],version:"3.16",frontMatter:{}},s=[{value:"Phenotype",id:"phenotype",children:[],level:4},{value:"Mapping",id:"mapping",children:[],level:4},{value:"Inheritance",id:"inheritance",children:[],level:4},{value:"Comments",id:"comments",children:[],level:4}],m={toc:s},p="wrapper";function d(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"omim":[ \n { \n "mimNumber":600678,\n "geneName":"MutS, E. coli, homolog of, 6",\n "description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",\n "phenotypes":[ \n { \n "mimNumber":614350,\n "phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",\n "description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal dominant"\n ]\n },\n { \n "mimNumber":608089,\n "phenotype":"Endometrial cancer, familial",\n "mapping":"molecular basis of the disorder is known"\n },\n { \n "mimNumber":276300,\n "phenotype":"Mismatch repair cancer syndrome",\n "description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal recessive"\n ],\n "comments" : [\n "contribute to susceptibility to multifactorial disorders or to susceptibility to infection",\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"},"OMIM ID for gene")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"geneName"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"gene name")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,i.kt)("td",{parentName:"tr",align:"left"},"object array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#phenotype"},"Phenotype entry below"))))),(0,i.kt)("h4",{id:"phenotype"},"Phenotype"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"mapping"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#mapping"},"possible values below"))),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"inheritance"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#inheritance"},"possible values below"))),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"comments"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#comments"},"possible values below"))))),(0,i.kt)("h4",{id:"mapping"},"Mapping"),(0,i.kt)("ol",null,(0,i.kt)("li",{parentName:"ol"},"disorder was positioned by mapping of the wild type gene"),(0,i.kt)("li",{parentName:"ol"},"disease phenotype itself was mapped"),(0,i.kt)("li",{parentName:"ol"},"molecular basis of the disorder is known"),(0,i.kt)("li",{parentName:"ol"},"disorder is a chromosome deletion or duplication syndrome")),(0,i.kt)("h4",{id:"inheritance"},"Inheritance"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"autosomal recessive"),(0,i.kt)("li",{parentName:"ul"},"autosomal dominant")),(0,i.kt)("h4",{id:"comments"},"Comments"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"contributes to the susceptibility to multifactorial disorders"),(0,i.kt)("li",{parentName:"ul"},"variations that lead to apparently abnormal laboratory test values"),(0,i.kt)("li",{parentName:"ul"},"unconfirmed mapping")))}d.isMDXComponent=!0},14508:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>c,frontMatter:()=>r,metadata:()=>s,toc:()=>m});var a=n(87462),i=(n(67294),n(3905)),o=n(55654);const r={title:"OMIM"},l=void 0,s={unversionedId:"data-sources/omim",id:"version-3.16/data-sources/omim",title:"OMIM",description:"Overview",source:"@site/versioned_docs/version-3.16/data-sources/omim.mdx",sourceDirName:"data-sources",slug:"/data-sources/omim",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/omim",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/omim.mdx",tags:[],version:"3.16",frontMatter:{title:"OMIM"},sidebar:"version-3.16/docs",previous:{title:"MITOMAP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/mitomap"},next:{title:"PhyloP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/phylop"}},m=[{value:"Overview",id:"overview",children:[],level:2},{value:"Parse OMIM data",id:"parse-omim-data",children:[{value:"mim2gene.txt",id:"mim2genetxt",children:[],level:3},{value:"OMIM API",id:"omim-api",children:[{value:"Mapping key to content",id:"mapping-key-to-content",children:[],level:4},{value:"Phenotype character to comment",id:"phenotype-character-to-comment",children:[],level:4}],level:3},{value:"Remove links in OMIM descriptions",id:"remove-links-in-omim-descriptions",children:[],level:3}],level:2},{value:"JSON output",id:"json-output",children:[],level:2},{value:"Building the supplementary files",id:"building-the-supplementary-files",children:[],level:2}],p={toc:m},d="wrapper";function c(e){let{components:t,...n}=e;return(0,i.kt)(d,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"OMIM is a comprehensive, authoritative compendium of human genes and genetic phenotypes that is freely available and updated daily."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publications")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Amberger JS, Bocchini CA, Scott AF, Hamosh A. OMIM.org: leveraging knowledge across phenotype-gene relationships. Nucleic Acids Res. 2019 Jan 8;47(D1):D1038-D1043. doi:10.1093/nar/gky1151. PMID: ",(0,i.kt)("a",{parentName:"p",href:"https://pubmed.ncbi.nlm.nih.gov/30445645/"},"30445645"),"."),(0,i.kt)("p",{parentName:"div"},"Amberger JS, Bocchini CA, Schiettecatte FJM, Scott AF, Hamosh A. OMIM.org: Online Mendelian Inheritance in Man (OMIM\xae), an online catalog of human genes and genetic disorders. Nucleic Acids Res. 2015 Jan;43(Database issue):D789-98. PMID: ",(0,i.kt)("a",{parentName:"p",href:"https://pubmed.ncbi.nlm.nih.gov/25428349/"},"25428349"),"."))),(0,i.kt)("h2",{id:"parse-omim-data"},"Parse OMIM data"),(0,i.kt)("p",null,"Nirvana uses gene symbols as the gene identifiers internally. To generate the OMIM database, we first map the MIM numbers, which are the primary identifiers used by OMIM, to gene symbols supported by Nirvana. Please note that there can be multiple MIM numbers mapped to one gene symbol. Only MIM numbers successfully mapped to a Nirvana gene symbol are further processed. The OMIM API is used to fetch all the information associated with a gene MIM number, except the gene symbols."),(0,i.kt)("h3",{id:"mim2genetxt"},"mim2gene.txt"),(0,i.kt)("p",null,"This mim2gene.txt (",(0,i.kt)("a",{parentName:"p",href:"http://omim.org/static/omim/data/mim2gene.txt"},"http://omim.org/static/omim/data/mim2gene.txt"),") file provides the mapping between MIM numbers and gene symbols. An example of this file is given below:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre"},"# MIM Number MIM Entry Type (see FAQ 1.3 at https://omim.org/help/faq) Entrez Gene ID (NCBI) Approved Gene Symbol (HGNC) Ensembl Gene ID (Ensembl)\n100050 predominantly phenotypes\n100070 phenotype 100329167\n100100 phenotype\n100200 predominantly phenotypes\n100300 phenotype\n100500 moved/removed\n100600 phenotype\n100640 gene 216 ALDH1A1 ENSG00000165092\n100650 gene/phenotype 217 ALDH2 ENSG00000111275\n100660 gene 218 ALDH3A1 ENSG00000108602\n100670 gene 219 ALDH1B1 ENSG00000137124\n100675 predominantly phenotypes\n100678 gene 39 ACAT2 ENSG00000120437\n")),(0,i.kt)("p",null,'The information in the "Entrez Gene ID (NCBI)", "Approved Gene Symbol (HGNC)" and "Ensembl Gene ID (Ensembl)" columns are used to find the proper gene symbol supported by Nirvana, which may or may not be the same as the gene symbol listed here.'),(0,i.kt)("h3",{id:"omim-api"},"OMIM API"),(0,i.kt)("p",null,"Nirvana retrieves the OMIM annotations from the ",(0,i.kt)("a",{parentName:"p",href:"https://www.omim.org/api"},"OMIM API"),' JSON responses. The "entry" handler is used to fetch all the annotations associated with a given OMIM gene. A sample JSON response from the API is provided there.'),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{\n "omim": {\n "version": "1.0",\n "entryList": [\n {\n "entry": {\n "prefix": "*",\n "mimNumber": 100640,\n "status": "live",\n "titles": {\n "preferredTitle": "ALDEHYDE DEHYDROGENASE 1 FAMILY, MEMBER A1; ALDH1A1",\n "alternativeTitles": "ALDEHYDE DEHYDROGENASE 1; ALDH1;;\\nACETALDEHYDE DEHYDROGENASE 1;;\\nALDH, LIVER CYTOSOLIC;;\\nRETINAL DEHYDROGENASE 1; RALDH1"\n },\n "textSectionList": [\n {\n "textSection": {\n "textSectionName": "description",\n "textSectionTitle": "Description",\n "textSectionContent": "The ALDH1A1 gene encodes a liver cytosolic isoform of acetaldehyde dehydrogenase ({EC 1.2.1.3}), an enzyme involved in the major pathway of alcohol metabolism after alcohol dehydrogenase (ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650}), variation in which has been implicated in different responses to alcohol ingestion.\\n\\nALDH1 is associated with a low Km for NAD, a high Km for acetaldehyde, and is strongly inactivated by disulfiram. ALDH2 is associated with a high Km for NAD, and low Km for acetaldehyde, and is insensitive to inhibition by disulfiram ({4:Hsu et al., 1985})."\n }\n }\n ],\n "geneMap": {\n "sequenceID": 7709,\n "chromosome": 9,\n "chromosomeSymbol": "9",\n "chromosomeSort": 225,\n "chromosomeLocationStart": 72900670,\n "chromosomeLocationEnd": 72953052,\n "transcript": "ENST00000297785.7",\n "cytoLocation": "9q21",\n "computedCytoLocation": "9q21.13",\n "mimNumber": 100640,\n "geneSymbols": "ALDH1A1",\n "geneName": "Aldehyde dehydrogenase-1 family, member A1, soluble",\n "mappingMethod": "REa, A",\n "confidence": "P",\n "mouseGeneSymbol": "Aldh1a1",\n "mouseMgiID": "MGI:1353450",\n "geneInheritance": null\n },\n "externalLinks": {\n "geneIDs": "216",\n "hgncID": "402",\n "ensemblIDs": "ENSG00000165092,ENST00000297785.8",\n "approvedGeneSymbols": "ALDH1A1",\n "ncbiReferenceSequences": "1519246465",\n "proteinSequences": "194378740,211947843,2183299,178400,119582947,119582948,178372,40807656,194375548,30582681,209402710,4262707,194739599,4261625,178394,261487497,16306661,21361176,32815082,118495,62089228",\n "uniGenes": "Hs.76392",\n "swissProtIDs": "P00352",\n "decipherGene": false,\n "umlsIDs": "C1412333",\n "gtr": true,\n "cmgGene": false,\n "keggPathways": true,\n "gwasCatalog": false,\n\n }\n }\n },\n {\n "entry": {\n "prefix": "*",\n "mimNumber": 102560,\n "status": "live",\n "titles": {\n "preferredTitle": "ACTIN, GAMMA-1; ACTG1",\n "alternativeTitles": "ACTIN, GAMMA; ACTG;;\\nCYTOSKELETAL GAMMA-ACTIN;;\\nACTIN, CYTOPLASMIC, 2"\n },\n "textSectionList": [\n {\n "textSection": {\n "textSectionName": "description",\n "textSectionTitle": "Description",\n "textSectionContent": "Actins are a family of highly conserved cytoskeletal proteins that play fundamental roles in nearly all aspects of eukaryotic cell biology. The ability of a cell to divide, move, endocytose, generate contractile force, and maintain shape is reliant upon functional actin-based structures. Actin isoforms are grouped according to expression patterns: muscle actins predominate in striated and smooth muscle (e.g., ACTA1, {102610}, and ACTA2, {102620}, respectively), whereas the 2 cytoplasmic nonmuscle actins, gamma-actin (ACTG1) and beta-actin (ACTB; {102630}), are found in all cells ({13:Sonnemann et al., 2006})."\n }\n }\n ],\n "geneMap": {\n "sequenceID": 13666,\n "chromosome": 17,\n "chromosomeSymbol": "17",\n "chromosomeSort": 947,\n "chromosomeLocationStart": 81509970,\n "chromosomeLocationEnd": 81512798,\n "transcript": "ENST00000331925.7",\n "cytoLocation": "17q25.3",\n "computedCytoLocation": "17q25.3",\n "mimNumber": 102560,\n "geneSymbols": "ACTG1, DFNA20, DFNA26, BRWS2",\n "geneName": "Actin, gamma-1",\n "mappingMethod": "REa, A, Fd",\n "confidence": "C",\n "mouseGeneSymbol": "Actg1",\n "mouseMgiID": "MGI:87906",\n "geneInheritance": null,\n "phenotypeMapList": [\n {\n "phenotypeMap": {\n "mimNumber": 102560,\n "phenotype": "Baraitser-Winter syndrome 2",\n "phenotypeMimNumber": 614583,\n "phenotypicSeriesNumber": "PS243310",\n "phenotypeMappingKey": 3,\n "phenotypeInheritance": "Autosomal dominant"\n }\n },\n {\n "phenotypeMap": {\n "mimNumber": 102560,\n "phenotype": "Deafness, autosomal dominant 20/26",\n "phenotypeMimNumber": 604717,\n "phenotypicSeriesNumber": "PS124900",\n "phenotypeMappingKey": 3,\n "phenotypeInheritance": "Autosomal dominant"\n }\n }\n ]\n }\n }\n }\n ]\n }\n}\n')),(0,i.kt)("p",null,"Content from the OMIM API JSON response is reorganized as shown in the Nirvana ",(0,i.kt)("a",{parentName:"p",href:"#json-output"},"JSON Output")),(0,i.kt)("p",null,"Mappings between the Nirvana JSON output and OMIM JSON API are listed in the table below:"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Nirvana JSON key chain"),(0,i.kt)("th",{parentName:"tr",align:"left"},"OMIM API JSON key chain"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:mimNumber")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:geneName"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:geneName")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:textSectionList:textSection:textSectionContent")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:mimNumber")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:phenotype"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:textSectionList:textSection:textSectionContent")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:mapping"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotypeMappingKey (",(0,i.kt)("a",{parentName:"td",href:"#mapping-key-to-content"},"see mapping below"),")")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:inheritances"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotypeInheritance")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:comments"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotype (",(0,i.kt)("a",{parentName:"td",href:"#phenotype-character-to-comment"},"see mapping below"),")")))),(0,i.kt)("h4",{id:"mapping-key-to-content"},"Mapping key to content"),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"1")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"disorder was positioned by mapping of the wild type gene"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"2")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"disease phenotype itself was mapped"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"3")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"molecular basis of the disorder is known"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"4")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"disorder is a chromosome deletion or duplication syndrome"),(0,i.kt)("br",null)),(0,i.kt)("h4",{id:"phenotype-character-to-comment"},"Phenotype character to comment"),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"?")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"unconfirmed or possibly spurious mapping"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"["),"/",(0,i.kt)("inlineCode",{parentName:"p"},"]")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"nondiseases"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"{"),"/",(0,i.kt)("inlineCode",{parentName:"p"},"}")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"contribute to susceptibility to multifactorial disorders or to susceptibility to infection"),(0,i.kt)("br",null)),(0,i.kt)("h3",{id:"remove-links-in-omim-descriptions"},"Remove links in OMIM descriptions"),(0,i.kt)("p",null,"There are different types of link in the OMIM description section. For example, in above JSON response, we have the description of MIM entry 100640:"),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"The ALDH1A1 gene encodes a liver cytosolic isoform of acetaldehyde dehydrogenase ({EC 1.2.1.3}), an enzyme involved in the major pathway of alcohol metabolism after alcohol dehydrogenase (ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650}), variation in which has been implicated in different responses to alcohol ingestion.\\n\\nALDH1 is associated with a low Km for NAD, a high Km for acetaldehyde, and is strongly inactivated by disulfiram. ALDH2 is associated with a high Km for NAD, and low Km for acetaldehyde, and is insensitive to inhibition by disulfiram ({4:Hsu et al., 1985}).")),(0,i.kt)("p",null,"As the descriptions will be shown as plain text, we remove the curry brackets surrounding links and try to make the text still readable with minimal modifications. Briefly:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},'Links referring to another MIM entry (e.g. {100650}) will be removed. Any word(s) specifically associated with the removed link will also be removed. For example, "(ADH, see {103700})" will become "(ADH)" after the process.'),(0,i.kt)("li",{parentName:"ul"},'Links referring to a literature reference will be processed to remove the internal index and curry brackets. For example, "{4:Hsu et al., 1985}" becomes "Hsu et al., 1985".'),(0,i.kt)("li",{parentName:"ul"},'All the other links will simple have their curry brackets removed. For example, "{EC 1.2.1.3}" becomes "EC 1.2.1.3".'),(0,i.kt)("li",{parentName:"ul"},'If the content within a pair of parentheses becomes empty after being processed, the parentheses need to be removed as well and its surrounding white spaces should be properly processed. For example, "ALDH2 ({100650})," will become "ALDH2,".')),(0,i.kt)("p",null,"Here is a list of examples about how the description section supposed to be processed:"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Original text"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Processed text"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"({516030}, {516040}, and {516050})"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., D1, {168461}; D2, {123833}; D3, {123834})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., D1; D2; D3)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(desmocollins; see DSC2, {125645})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(desmocollins; see DSC2)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., see {102700}, {300755})"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(ADH). See also liver mitochondrial ALDH2")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(see, e.g., CACNA1A; {601011})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(see, e.g., CACNA1A)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., GSTA1; {138359}), mu (e.g., {138350})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., GSTA1), mu")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(NFKB; see {164011})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(NFKB)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(see ISGF3G, {147574})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(see ISGF3G)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(DCK; {EC 2.7.1.74}; {125450})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(DCK; EC 2.7.1.74)")))),(0,i.kt)("h2",{id:"json-output"},"JSON output"),(0,i.kt)(o.default,{mdxType:"JSON"}),(0,i.kt)("h2",{id:"building-the-supplementary-files"},"Building the supplementary files"),(0,i.kt)("p",null,"The first step in builing the OMIM ",(0,i.kt)("inlineCode",{parentName:"p"},".nga")," files is to use the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's subcommand ",(0,i.kt)("inlineCode",{parentName:"p"},"downloadOMIM")," to download the necessary data. In order to download the data the user must possess an API key obtained from OMIM. This key has to be set as the environment variable ",(0,i.kt)("em",{parentName:"p"},"OmimApiKey"),"."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"export OmimApiKey=\ndotnet NirvanaBuild/SAUtils.dll downloadOMIM\n---------------------------------------------------------------------------\nSAUtils (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll downloadomim [options]\nDownload the OMIM gene annotation data\n\nOPTIONS:\n --uga, -u universal gene archive path\n --ref, -r input reference filename\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\ndotnet NirvanaBuild/SAUtils.dll downloadOMIM --ref References/7/Homo_sapiens.GRCh38.Nirvana.dat --uga Cache/27/UGA.tsv.gz --out ExternalDataSources/OMIM/2021-06-14\n---------------------------------------------------------------------------\nSAUtils (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0\n---------------------------------------------------------------------------\n\nUnable to resolve gene symbol conflict for CD300H: Ensembl: [ENSG00000284690]: AC079325.2, Entrez Gene: [100130520]: LOC100130520\nUnable to resolve gene symbol conflict for STRIT1: Ensembl: [ENSG00000240045]: DWORF, Entrez Gene: [100507537]: LOC100507537\nUnable to resolve gene symbol conflict for WAKMAR2: Ensembl: [ENSG00000237499]: AL357060.2, Entrez Gene: [100130476]: LOC100130476\nUnable to resolve gene symbol conflict for PERCC1: Ensembl: [ENSG00000284395]: AL032819.3, Entrez Gene: [105371045]: LOC105371045\nUnable to resolve gene symbol conflict for LASTR: Ensembl: [ENSG00000242147]: AL365356.5, Entrez Gene: [105376382]: LOC105376382\nUnable to resolve gene symbol conflict for PRANCR: Ensembl: [ENSG00000257815]: LINC01481, Entrez Gene: [101928062]: LOC101928062\nUnable to resolve gene symbol conflict for THORLNC: Ensembl: [ENSG00000226856]: AC093901.1, Entrez Gene: [100506797]: LOC100506797\nGene Symbol Update Statistics\n============================================\n# of gene symbols already up-to-date: 15,952\n# of gene symbols updated: 330\n# of genes where both IDs are null: 0\n# of gene symbols not in cache: 148\n# of resolved gene symbol conflicts: 15\n# of unresolved gene symbol conflicts: 7\n\nTime: 00:02:38.2\n")),(0,i.kt)("p",null,"Once the download has succeeded, the ",(0,i.kt)("inlineCode",{parentName:"p"},"nga")," files can be produced using the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's subcommand ",(0,i.kt)("inlineCode",{parentName:"p"},"omim"),"."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet NirvanaBuild/SAUtils.dll omim\n---------------------------------------------------------------------------\nSAUtils (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll omim [options]\nCreates a gene annotation database from OMIM data\n\nOPTIONS:\n --m2g, -m MimToGeneSymbol tsv file\n --json, -j OMIM entry json file\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\n\ndotnet NirvanaBuild/SAUtils.dll omim --m2g ExternalDataSources/OMIM/2021-06-14/MimToGeneSymbol.tsv --json ExternalDataSources/OMIM/2021-06-14/MimEntries.json.gz --out SupplementaryDatabase/63/\n---------------------------------------------------------------------------\nSAUtils (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0\n---------------------------------------------------------------------------\n\n\nTime: 00:00:04.5\n")))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/adb80b82.6cda6bce.js b/assets/js/adb80b82.6cda6bce.js deleted file mode 100644 index 5e5af88e..00000000 --- a/assets/js/adb80b82.6cda6bce.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3425],{3905:(t,n,e)=>{e.d(n,{Zo:()=>m,kt:()=>s});var a=e(67294);function r(t,n,e){return n in t?Object.defineProperty(t,n,{value:e,enumerable:!0,configurable:!0,writable:!0}):t[n]=e,t}function l(t,n){var e=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(t,n).enumerable}))),e.push.apply(e,a)}return e}function o(t){for(var n=1;n=0||(r[e]=t[e]);return r}(t,n);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,e)&&(r[e]=t[e])}return r}var p=a.createContext({}),u=function(t){var n=a.useContext(p),e=n;return t&&(e="function"==typeof t?t(n):o(o({},n),t)),e},m=function(t){var n=u(t.components);return a.createElement(p.Provider,{value:n},t.children)},d="mdxType",g={inlineCode:"code",wrapper:function(t){var n=t.children;return a.createElement(a.Fragment,{},n)}},N=a.forwardRef((function(t,n){var e=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),d=u(e),N=r,s=d["".concat(p,".").concat(N)]||d[N]||g[N]||l;return e?a.createElement(s,o(o({ref:n},m),{},{components:e})):a.createElement(s,o({ref:n},m))}));function s(t,n){var e=arguments,r=n&&n.mdxType;if("string"==typeof t||r){var l=e.length,o=new Array(l);o[0]=N;var i={};for(var p in n)hasOwnProperty.call(n,p)&&(i[p]=n[p]);i.originalType=t,i[d]="string"==typeof t?t:r,o[1]=i;for(var u=2;u{e.r(n),e.d(n,{contentTitle:()=>o,default:()=>d,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=e(87462),r=(e(67294),e(3905));const l={},o=void 0,i={unversionedId:"data-sources/gnomad-exomes-small-variants-json",id:"version-3.2.5/data-sources/gnomad-exomes-small-variants-json",title:"gnomad-exomes-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.2.5/data-sources/gnomad-exomes-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-exomes-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/gnomad-exomes-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/data-sources/gnomad-exomes-small-variants-json.md",tags:[],version:"3.2.5",frontMatter:{}},p=[],u={toc:p},m="wrapper";function d(t){let{components:n,...e}=t;return(0,r.kt)(m,(0,a.Z)({},u,e,{components:n,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomadExome":{ \n "coverage":20,\n "allAf":0.190317,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"coverage"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the South Asian population Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the South Asian population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the South Asian population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/add60472.8a2d153f.js b/assets/js/add60472.8a2d153f.js deleted file mode 100644 index d8b71b88..00000000 --- a/assets/js/add60472.8a2d153f.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7605,5096,882],{3905:(e,t,n)=>{n.d(t,{Zo:()=>m,kt:()=>c});var a=n(67294);function l(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(l[n]=e[n]);return l}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(l[n]=e[n])}return l}var s=a.createContext({}),p=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},m=function(e){var t=p(e.components);return a.createElement(s.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},g=a.forwardRef((function(e,t){var n=e.components,l=e.mdxType,r=e.originalType,s=e.parentName,m=i(e,["components","mdxType","originalType","parentName"]),u=p(n),g=l,c=u["".concat(s,".").concat(g)]||u[g]||d[g]||r;return n?a.createElement(c,o(o({ref:t},m),{},{components:n})):a.createElement(c,o({ref:t},m))}));function c(e,t){var n=arguments,l=t&&t.mdxType;if("string"==typeof e||l){var r=n.length,o=new Array(r);o[0]=g;var i={};for(var s in t)hasOwnProperty.call(t,s)&&(i[s]=t[s]);i.originalType=e,i[u]="string"==typeof e?e:l,o[1]=i;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>u,frontMatter:()=>r,metadata:()=>i,toc:()=>s});var a=n(87462),l=(n(67294),n(3905));const r={},o=void 0,i={unversionedId:"data-sources/gnomad-lof-json",id:"version-3.16/data-sources/gnomad-lof-json",title:"gnomad-lof-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/gnomad-lof-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-lof-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/gnomad-lof-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/gnomad-lof-json.md",tags:[],version:"3.16",frontMatter:{}},s=[],p={toc:s},m="wrapper";function u(e){let{components:t,...n}=e;return(0,l.kt)(m,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD":{ \n "pLi":1.00e0,\n "pNull":8.94e-40,\n "pRec":1.84e-16,\n "synZ":-8.44e-2,\n "misZ":5.96e-1,\n "loeuf":1.13e0\n}\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pLi"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pNull"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pRec"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"synZ"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"misZ"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))))}u.isMDXComponent=!0},87602:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>u,frontMatter:()=>r,metadata:()=>i,toc:()=>s});var a=n(87462),l=(n(67294),n(3905));const r={},o=void 0,i={unversionedId:"data-sources/gnomad-small-variants-json",id:"version-3.16/data-sources/gnomad-small-variants-json",title:"gnomad-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/gnomad-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/gnomad-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/gnomad-small-variants-json.md",tags:[],version:"3.16",frontMatter:{}},s=[],p={toc:s},m="wrapper";function u(e){let{components:t,...n}=e;return(0,l.kt)(m,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomad":{ \n "coverage":20,\n "allAf":0.190317,\n "maleAf":0.193,\n "femaleAf": 0.1935,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "maleAn":15096,\n "femaleAn":15700\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "maleAc":2930,\n "femaleAc": 2931,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "maleHc":280,\n "femaleHc":281,\n "controlsAllAf":0.190317,\n "controlsAllAn":30796,\n "controlsAllAc":5861,\n "lowComplexityRegion":true,\n "failedFilter":true\n}\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"coverage"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the controls subset. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for male population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for female population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the controls subset. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for male population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for female population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the controls subset. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the South Asian population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the South Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the South Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"lowComplexityRegion"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant is located in a low complexity region.")))))}u.isMDXComponent=!0},46566:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>s,default:()=>g,frontMatter:()=>i,metadata:()=>p,toc:()=>m});var a=n(87462),l=(n(67294),n(3905)),r=n(87602),o=n(81633);const i={title:"gnomAD"},s=void 0,p={unversionedId:"data-sources/gnomad",id:"version-3.16/data-sources/gnomad",title:"gnomAD",description:"Overview",source:"@site/versioned_docs/version-3.16/data-sources/gnomad.mdx",sourceDirName:"data-sources",slug:"/data-sources/gnomad",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/gnomad",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/gnomad.mdx",tags:[],version:"3.16",frontMatter:{title:"gnomAD"},sidebar:"version-3.16/docs",previous:{title:"FusionCatcher",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/fusioncatcher"},next:{title:"Mitochondrial Heteroplasmy",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/mito-heteroplasmy"}},m=[{value:"Overview",id:"overview",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF extraction",id:"vcf-extraction",children:[],level:3},{value:"Computation",id:"computation",children:[],level:3},{value:"Merging genomes and exomes",id:"merging-genomes-and-exomes",children:[],level:3},{value:"Filters",id:"filters",children:[],level:3},{value:"VCF download instructions",id:"vcf-download-instructions",children:[],level:3},{value:"JSON output",id:"json-output",children:[],level:3}],level:2},{value:"LoF Gene Metrics",id:"lof-gene-metrics",children:[{value:"Tab delimited file example",id:"tab-delimited-file-example",children:[],level:3},{value:"JSON key to TSV column mapping",id:"json-key-to-tsv-column-mapping",children:[],level:3},{value:"Gene symbol update",id:"gene-symbol-update",children:[],level:3},{value:"Conflict resolution",id:"conflict-resolution",children:[],level:3},{value:"Download URL",id:"download-url",children:[],level:3},{value:"JSON output",id:"json-output-1",children:[],level:3}],level:2}],u={toc:m},d="wrapper";function g(e){let{components:t,...n}=e;return(0,l.kt)(d,(0,a.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("h2",{id:"overview"},"Overview"),(0,l.kt)("p",null,"The Genome Aggregation Database (",(0,l.kt)("a",{parentName:"p",href:"https://gnomad.broadinstitute.org/"},"gnomAD"),") is a resource developed by an international coalition of investigators, with the goal of aggregating and harmonizing both exome and genome sequencing data from a wide variety of large-scale sequencing projects, and making summary data available for the wider scientific community."),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Koch, L., 2020. Exploring human genomic diversity with gnomAD. ",(0,l.kt)("em",{parentName:"p"},"Nature Reviews Genetics"),", ",(0,l.kt)("strong",{parentName:"p"},"21(8)"),", pp.448-448."))),(0,l.kt)("h2",{id:"small-variants"},"Small Variants"),(0,l.kt)("h3",{id:"vcf-extraction"},"VCF extraction"),(0,l.kt)("p",null,"We currently extract the following info fields from gnomAD genome and exome VCF files:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},'##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n')),(0,l.kt)("p",null,"We also extract the following extra fields from gnomAD exome VCF file:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},'##INFO=\n##INFO=\n##INFO=\n')),(0,l.kt)("h3",{id:"computation"},"Computation"),(0,l.kt)("p",null,"Using these, we compute the following:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"Coverage"),(0,l.kt)("li",{parentName:"ul"},"Allele count, Homozygous count, allele number and allele frequencies for:",(0,l.kt)("ul",{parentName:"li"},(0,l.kt)("li",{parentName:"ul"},"Global population"),(0,l.kt)("li",{parentName:"ul"},"African/African Americans"),(0,l.kt)("li",{parentName:"ul"},"Admixed Americans"),(0,l.kt)("li",{parentName:"ul"},"Ashkenazi Jews"),(0,l.kt)("li",{parentName:"ul"},"East Asians"),(0,l.kt)("li",{parentName:"ul"},"Finnish"),(0,l.kt)("li",{parentName:"ul"},"Non-Finnish Europeans"),(0,l.kt)("li",{parentName:"ul"},"South Asian"),(0,l.kt)("li",{parentName:"ul"},"Others (population not assigned)"),(0,l.kt)("li",{parentName:"ul"},"Male"),(0,l.kt)("li",{parentName:"ul"},"Female"),(0,l.kt)("li",{parentName:"ul"},"Controls")))),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Note")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("ul",{parentName:"div"},(0,l.kt)("li",{parentName:"ul"},"Coverage = DP / AN. Frequencies are computed using AC/AN for each population."),(0,l.kt)("li",{parentName:"ul"},"Please note that currently there is no genome sequencing data of south asian (SAS) population available in gnomAD."),(0,l.kt)("li",{parentName:"ul"},"Allele Count, Homozygous count, allele number and allele frequencies for control groups are also provided for the global population.")))),(0,l.kt)("h3",{id:"merging-genomes-and-exomes"},"Merging genomes and exomes"),(0,l.kt)("p",null,"When merging the genomes and exomes, the allele counts and allele numbers will be summed across both of the data sets."),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("ul",{parentName:"div"},(0,l.kt)("li",{parentName:"ul"},"For GRCh37, Nirvana currently uses gnomAD version 2.1 which contains both genomes and exomes data. Genomes and exomes data are merged in the output."),(0,l.kt)("li",{parentName:"ul"},"For GRCh38, Nirvana currently uses gnomAD version 3.0 which doesn't contain the exomes data. Therefore, only genomes data are presented in the output.")))),(0,l.kt)("h3",{id:"filters"},"Filters"),(0,l.kt)("p",null,"The following strategy will be used when there's a conflict in filter status:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"center"}),(0,l.kt)("th",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"th"},"Genomes PASS")),(0,l.kt)("th",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"th"},"Genomes Filtered")))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"td"},"Exomes PASS")),(0,l.kt)("td",{parentName:"tr",align:"center"},"PASS"),(0,l.kt)("td",{parentName:"tr",align:"center"},"Only use exome data")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"td"},"Exomes Filtered")),(0,l.kt)("td",{parentName:"tr",align:"center"},"Only use genome data"),(0,l.kt)("td",{parentName:"tr",align:"center"},"Filtered")))),(0,l.kt)("h3",{id:"vcf-download-instructions"},"VCF download instructions"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://gnomad.broadinstitute.org/downloads"},"https://gnomad.broadinstitute.org/downloads")),(0,l.kt)("h3",{id:"json-output"},"JSON output"),(0,l.kt)(r.default,{mdxType:"JSONV"}),(0,l.kt)("h2",{id:"lof-gene-metrics"},"LoF Gene Metrics"),(0,l.kt)("h3",{id:"tab-delimited-file-example"},"Tab delimited file example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"gene transcript obs_mis exp_mis oe_mis mu_mis possible_mis obs_mis_pphen exp_mis_pphen oe_mis_pphen possible_mis_pphen obs_syn exp_syn oe_syn mu_syn possible_syn obs_lof mu_lof possible_lof exp_lof pLI pNull pRec oe_lof oe_syn_lower oe_syn_upper oe_mis_lower oe_mis_upper oe_lof_lower oe_lof_upper constraint_flag syn_zmis_z lof_z oe_lof_upper_rank oe_lof_upper_bin oe_lof_upper_bin_6 n_sites classic_caf max_af no_lofs obs_het_lof obs_hom_lof defined p exp_hom_lof classic_caf_afr classic_caf_amr classic_caf_asj classic_caf_eas classic_caf_fin classic_caf_nfe classic_caf_oth classic_caf_sas p_afr p_amr p_asj p_eas p_fin p_nfep_oth p_sas transcript_type gene_id transcript_level cds_length num_coding_exons gene_type gene_length exac_pLI exac_obs_lof exac_exp_lof exac_oe_lof brain_expression chromosome start_positionend_position\nMED13 ENST00000397786 871 1.1178e+03 7.7921e-01 5.5598e-05 14195 314 5.2975e+02 5.9273e-01 6708 422 3.8753e+02 1.0890e+00 1.9097e-05 4248 0 4.9203e-06 1257 9.8429e+01 1.0000e+00 8.9436e-40 1.8383e-16 0.0000e+00 1.0050e+00 1.1800e+00 7.3600e-01 8.2400e-01 0.0000e+00 3.0000e-02 -1.3765e+00 2.6232e+00 9.1935e+00 0 0 0 2 1.2058e-05 8.0492e-06 124782 3 0 124785 1.2021e-05 1.8031e-05 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 9.2812e-05 8.8571e-06 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 9.2760e-05 8.8276e-06 0.0000e+00 0.0000e+00 protein_coding ENSG00000108510 2 6522 30 protein_coding 122678 1.0000e+00 0 6.4393e+01 0.0000e+00 NA 17 60019966 60142643\n")),(0,l.kt)("h3",{id:"json-key-to-tsv-column-mapping"},"JSON key to TSV column mapping"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"JSON key"),(0,l.kt)("th",{parentName:"tr",align:null},"TSV column"),(0,l.kt)("th",{parentName:"tr",align:null},"Description"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pLi"),(0,l.kt)("td",{parentName:"tr",align:null},"pLI"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pNull"),(0,l.kt)("td",{parentName:"tr",align:null},"pNull"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pRec"),(0,l.kt)("td",{parentName:"tr",align:null},"pRec"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"synZ"),(0,l.kt)("td",{parentName:"tr",align:null},"syn_z"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"misZ"),(0,l.kt)("td",{parentName:"tr",align:null},"mis_z"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,l.kt)("td",{parentName:"tr",align:null},"oe_lof_upper"),(0,l.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))),(0,l.kt)("h3",{id:"gene-symbol-update"},"Gene symbol update"),(0,l.kt)("p",null,"The input file provides Ensembl gene ids for each entry. We observed that they were unique while gene symbols may be repeated (multiple lines may have the same gene symbol). Since Ensembl gene Ids are more stable, and Nirvana transcript cache data contains Ensembl gene ids, we use these ids to extract the gene symbols from the transcript cache. For example, if ENSG0001 has gene symbol GENE1 in the input but Nirvana cache say ENSG0001 maps to GENE2, we use GENE2 as the gene symbol for that entry."),(0,l.kt)("h3",{id:"conflict-resolution"},"Conflict resolution"),(0,l.kt)("p",null,"gnomAD uses Ensembl GeneID as unique identifiers in the ",(0,l.kt)("a",{parentName:"p",href:"https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz"},"source file")," but Nirvana uses HGNC gene symbols. Multiple Ensembl GeneIDs can map to the same HGNC symbol and therefore may result is conflict."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"MDGA2 ENST00000426342 306 4.0043e+02 7.6419e-01 2.1096e-05 4724 78 1.6525e+02 4.7202e-01 1923 125 1.3737e+02 9.0993e-01 7.1973e-06 1413 4 2.0926e-06 453 3.8316e+01 9.9922e-01 8.6490e-12 7.8128e-04 1.0440e-01 7.8600e-01 1.0560e+00 6.9500e-01 8.4000e-01 5.0000e-02 2.3900e-01 8.2988e-01 1.6769e+00 5.1372e+00 1529 0 0 7 2.8103e-05 4.0317e-06 124784 7 0 124791 2.8047e-05 9.8167e-05 0.0000e+00 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 3.5391e-05 1.6672e-04 3.2680e-05 0.0000e+00 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 3.5308e-05 1.6492e-04 3.2678e-05 protein_coding ENSG00000139915 2 2181 13 protein_coding 835332 9.9322e-01 3 2.7833e+01 1.0779e-01 NA 14 47308826 48144157\nMDGA2 ENST00000439988 438 5.5311e+02 7.9189e-01 2.9490e-05 6608 105 2.0496e+02 5.1228e-01 2386 180 1.9491e+02 9.2351e-01 9.8371e-06 2048 11 2.8074e-06 627 5.1882e+01 6.6457e-01 5.5841e-10 3.3543e-01 2.1202e-01 8.1700e-01 1.0450e+00 7.3100e-01 8.5700e-01 1.3200e-01 3.5100e-01 8.3940e-01 1.7393e+00 5.2595e+00 2989 1 0 9 3.6173e-05 4.0463e-06 124782 9 0 124791 3.6061e-05 1.6228e-04 6.4986e-05 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 4.4275e-05 1.6672e-04 3.2680e-05 6.4577e-05 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 4.4135e-05 1.6492e-04 3.2678e-05 protein_coding ENSG00000272781 3 3075 17 protein_coding 832866 NA NA NA NA NA 14 47311134 48143999\n")),(0,l.kt)("p",null,'In such cases, Nirvana chooses the entry with the smallest "LOEUF" value. The reason for choosing this value can be highlighted by the following table:'),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"right"},"LOEUF decile"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Haplo-insufficient"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Autosomal Dominant"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Autosomal Recessive"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Olfactory Genes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"0-10%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"104"),(0,l.kt)("td",{parentName:"tr",align:"right"},"140"),(0,l.kt)("td",{parentName:"tr",align:"right"},"36"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"10-20%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"47"),(0,l.kt)("td",{parentName:"tr",align:"right"},"128"),(0,l.kt)("td",{parentName:"tr",align:"right"},"72"),(0,l.kt)("td",{parentName:"tr",align:"right"},"1")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"20-30%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"17"),(0,l.kt)("td",{parentName:"tr",align:"right"},"86"),(0,l.kt)("td",{parentName:"tr",align:"right"},"112"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"30-40%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"8"),(0,l.kt)("td",{parentName:"tr",align:"right"},"80"),(0,l.kt)("td",{parentName:"tr",align:"right"},"173"),(0,l.kt)("td",{parentName:"tr",align:"right"},"4")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"40-50%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"7"),(0,l.kt)("td",{parentName:"tr",align:"right"},"65"),(0,l.kt)("td",{parentName:"tr",align:"right"},"206"),(0,l.kt)("td",{parentName:"tr",align:"right"},"8")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"50-60%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"4"),(0,l.kt)("td",{parentName:"tr",align:"right"},"54"),(0,l.kt)("td",{parentName:"tr",align:"right"},"207"),(0,l.kt)("td",{parentName:"tr",align:"right"},"6")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"60-70%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0"),(0,l.kt)("td",{parentName:"tr",align:"right"},"46"),(0,l.kt)("td",{parentName:"tr",align:"right"},"154"),(0,l.kt)("td",{parentName:"tr",align:"right"},"18")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"70-80%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"2"),(0,l.kt)("td",{parentName:"tr",align:"right"},"49"),(0,l.kt)("td",{parentName:"tr",align:"right"},"120"),(0,l.kt)("td",{parentName:"tr",align:"right"},"49")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"80-90%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0"),(0,l.kt)("td",{parentName:"tr",align:"right"},"34"),(0,l.kt)("td",{parentName:"tr",align:"right"},"58"),(0,l.kt)("td",{parentName:"tr",align:"right"},"96")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"90-100%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0"),(0,l.kt)("td",{parentName:"tr",align:"right"},"26"),(0,l.kt)("td",{parentName:"tr",align:"right"},"40"),(0,l.kt)("td",{parentName:"tr",align:"right"},"174")))),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Note")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("ul",{parentName:"div"},(0,l.kt)("li",{parentName:"ul"},"Table source: ",(0,l.kt)("a",{parentName:"li",href:"https://www.biorxiv.org/content/biorxiv/early/2019/01/28/531210.full-text.pdf"},"https://www.biorxiv.org/content/biorxiv/early/2019/01/28/531210.full-text.pdf")),(0,l.kt)("li",{parentName:"ul"},"This table indicates that lower LOEUF scores have more deleterious effect on genes."),(0,l.kt)("li",{parentName:"ul"},"Only 15 out of 19685 genes have conflicting entries.")))),(0,l.kt)("p",null,(0,l.kt)("strong",{parentName:"p"},"List of genes with conflicting entries")),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},'MDGA2:\n {"pLI":9.99e-1,"pRec":7.81e-4,"pNull":8.65e-12,"synZ":8.30e-1,"misZ":1.68e0,"loeuf":2.39e-1}\n {"pLI":6.65e-1,"pRec":3.35e-1,"pNull":5.58e-10,"synZ":8.39e-1,"misZ":1.74e0,"loeuf":3.51e-1}\nCRYBG3:\n {"pLI":9.27e-5,"pRec":1.00e0,"pNull":1.88e-7,"synZ":1.82e0,"misZ":4.68e-1,"loeuf":4.93e-1}\n {"pLI":2.69e-4,"pRec":1.00e0,"pNull":1.20e-4,"synZ":2.63e0,"misZ":9.80e-1,"loeuf":5.98e-1}\nCHTF8:\n {"pLI":8.29e-1,"pRec":1.67e-1,"pNull":3.21e-3,"synZ":1.94e0,"misZ":9.48e-1,"loeuf":5.13e-1}\n {"pLI":3.73e-1,"pRec":5.84e-1,"pNull":4.29e-2,"synZ":3.33e-1,"misZ":2.91e-1,"loeuf":9.92e-1}\nSEPT1:\n {"pLI":6.77e-8,"pRec":8.90e-1,"pNull":1.10e-1,"synZ":1.58e-1,"misZ":1.57e0,"loeuf":9.68e-1}\n {"pLI":1.96e-8,"pRec":6.71e-1,"pNull":3.29e-1,"synZ":1.68e-1,"misZ":1.41e0,"loeuf":1.08e0}\nARL14EPL:\n {"pLI":3.48e-2,"pRec":8.38e-1,"pNull":1.28e-1,"synZ":3.56e-1,"misZ":-1.87e-1,"loeuf":1.23e0}\n {"pLI":3.23e-2,"pRec":8.29e-1,"pNull":1.38e-1,"synZ":1.15e0,"misZ":-4.05e-1,"loeuf":1.26e0}\nUGT2A1:\n {"pLI":2.90e-13,"pRec":1.40e-1,"pNull":8.60e-1,"synZ":-1.29e0,"misZ":-1.77e0,"loeuf":1.18e0}\n {"pLI":3.88e-17,"pRec":2.87e-3,"pNull":9.97e-1,"synZ":-8.00e-1,"misZ":-1.40e0,"loeuf":1.53e0}\nLTB4R2:\n {"pLI":4.39e-4,"pRec":6.71e-1,"pNull":3.29e-1,"synZ":-5.24e-1,"misZ":-2.96e-1,"loeuf":1.40e0}\n {"pLI":1.38e-5,"pRec":4.12e-1,"pNull":5.88e-1,"synZ":-4.58e-1,"misZ":-2.02e-1,"loeuf":1.54e0}\nCDRT1:\n {"pLI":4.98e-14,"pRec":5.31e-1,"pNull":4.69e-1,"synZ":8.18e-1,"misZ":6.57e-1,"loeuf":1.00e0}\n {"pLI":3.50e-3,"pRec":6.37e-1,"pNull":3.59e-1,"synZ":4.89e-1,"misZ":6.90e-1,"loeuf":1.63e0}\nMUC3A:\n {"pLI":1.48e-10,"pRec":5.76e-1,"pNull":4.24e-1,"synZ":5.81e-2,"misZ":-6.01e-1,"loeuf":1.06e0}\n {"pLI":4.03e-1,"pRec":4.79e-1,"pNull":1.17e-1,"synZ":4.05e-2,"misZ":-1.60e-1,"loeuf":1.70e0}\nCOG8:\n {"pLI":2.97e-9,"pRec":5.04e-1,"pNull":4.96e-1,"synZ":-1.35e0,"misZ":-9.37e-2,"loeuf":1.13e0}\n {"pLI":2.31e-3,"pRec":5.47e-1,"pNull":4.50e-1,"synZ":-4.94e-1,"misZ":-1.48e-1,"loeuf":1.76e0}\nAC006486.1:\n {"pLI":9.37e-1,"pRec":6.27e-2,"pNull":2.47e-4,"synZ":1.44e0,"misZ":2.12e0,"loeuf":3.41e-1}\n {"pLI":1.14e-1,"pRec":6.16e-1,"pNull":2.70e-1,"synZ":-7.57e-2,"misZ":8.33e-2,"loeuf":1.84e0}\nAL645922.1:\n {"pLI":4.67e-16,"pRec":1.00e0,"pNull":4.15e-5,"synZ":7.99e-1,"misZ":1.61e0,"loeuf":6.92e-1}\n {"pLI":1.60e-3,"pRec":2.78e-1,"pNull":7.21e-1,"synZ":-7.30e-2,"misZ":3.21e-1,"loeuf":1.96e0}\nNBPF20:\n {"pLI":1.42e-7,"pRec":3.40e-2,"pNull":9.66e-1,"synZ":-1.86e0,"misZ":-2.88e0,"loeuf":1.97e0}\n {"pLI":1.92e-22,"pRec":7.96e-6,"pNull":1.00e0,"synZ":-9.73e0,"misZ":-7.67e0,"loeuf":1.97e0}\nPRAMEF11:\n {"pLI":6.16e-4,"pRec":7.42e-1,"pNull":2.58e-1,"synZ":-4.02e0,"misZ":-3.69e0,"loeuf":1.31e0}\n {"synZ":-3.33e0,"misZ":-2.59e0}\nFAM231D:\n {"synZ":-1.98e0,"misZ":-1.44e0}\n {"synZ":1.07e0,"misZ":3.13e-1}\n')),(0,l.kt)("p",null,(0,l.kt)("strong",{parentName:"p"},"Conflict resolution")),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"Pick the entry with the lowest LOEUF score"),(0,l.kt)("li",{parentName:"ul"},"If the same, pick the lowest pLI"),(0,l.kt)("li",{parentName:"ul"},"Otherwise pick the entry with the max absolute value of synZ + misZ")),(0,l.kt)("h3",{id:"download-url"},"Download URL"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz"},"https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz")),(0,l.kt)("h3",{id:"json-output-1"},"JSON output"),(0,l.kt)(o.default,{mdxType:"JSONG"}))}g.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/ae221e74.f28b058a.js b/assets/js/ae221e74.f28b058a.js deleted file mode 100644 index 34ae8870..00000000 --- a/assets/js/ae221e74.f28b058a.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2137],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>k});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function i(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var o=a.createContext({}),s=function(t){var e=a.useContext(o),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},m=function(t){var e=s(t.components);return a.createElement(o.Provider,{value:e},t.children)},c="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},u=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,o=t.parentName,m=p(t,["components","mdxType","originalType","parentName"]),c=s(n),u=r,k=c["".concat(o,".").concat(u)]||c[u]||d[u]||l;return n?a.createElement(k,i(i({ref:e},m),{},{components:n})):a.createElement(k,i({ref:e},m))}));function k(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,i=new Array(l);i[0]=u;var p={};for(var o in e)hasOwnProperty.call(e,o)&&(p[o]=e[o]);p.originalType=t,p[c]="string"==typeof t?t:r,i[1]=p;for(var s=2;s{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>c,frontMatter:()=>l,metadata:()=>p,toc:()=>o});var a=n(87462),r=(n(67294),n(3905));const l={},i=void 0,p={unversionedId:"data-sources/clinvar-json",id:"version-3.16/data-sources/clinvar-json",title:"clinvar-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/clinvar-json.md",sourceDirName:"data-sources",slug:"/data-sources/clinvar-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/clinvar-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/clinvar-json.md",tags:[],version:"3.16",frontMatter:{}},o=[],s={toc:o},m="wrapper";function c(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},s,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "id":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "significance":[\n "benign"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "lastUpdatedDate":"2020-03-01",\n "isAlleleSpecific":true\n },\n {\n "id":"RCV000030258.4",\n "variationId":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "alleleOrigins":[\n "germline"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "phenotypes":[\n "Lynch syndrome"\n ],\n "medGenIds":[\n "C1333990"\n ],\n "omimIds":[\n "120435"\n ],\n "significance":[\n "benign"\n ],\n "lastUpdatedDate":"2017-05-01",\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variationId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar VCV ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"reviewStatus"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"alleleOrigins"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"medGenIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MedGen IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"omimIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"orphanetIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Orphanet IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"significance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"lastUpdatedDate"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the ClinVar alternate allele")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"reviewStatus:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no assertion provided"),(0,r.kt)("li",{parentName:"ul"},"no assertion criteria provided"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, single submitter"),(0,r.kt)("li",{parentName:"ul"},"practice guideline"),(0,r.kt)("li",{parentName:"ul"},"classified by multiple submitters"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, conflicting interpretations"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, multiple submitters, no conflicts"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"alleleOrigins:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"unknown"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"germline"),(0,r.kt)("li",{parentName:"ul"},"somatic"),(0,r.kt)("li",{parentName:"ul"},"inherited"),(0,r.kt)("li",{parentName:"ul"},"paternal"),(0,r.kt)("li",{parentName:"ul"},"maternal"),(0,r.kt)("li",{parentName:"ul"},"de-novo"),(0,r.kt)("li",{parentName:"ul"},"biparental"),(0,r.kt)("li",{parentName:"ul"},"uniparental"),(0,r.kt)("li",{parentName:"ul"},"not-tested"),(0,r.kt)("li",{parentName:"ul"},"tested-inconclusive")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"significance:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"uncertain significance"),(0,r.kt)("li",{parentName:"ul"},"not provided"),(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"drug response"),(0,r.kt)("li",{parentName:"ul"},"histocompatibility"),(0,r.kt)("li",{parentName:"ul"},"association"),(0,r.kt)("li",{parentName:"ul"},"risk factor"),(0,r.kt)("li",{parentName:"ul"},"protective"),(0,r.kt)("li",{parentName:"ul"},"affects"),(0,r.kt)("li",{parentName:"ul"},"conflicting data from submitters"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant"),(0,r.kt)("li",{parentName:"ul"},"conflicting interpretations of pathogenicity")))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/aeaf6b3c.201f3276.js b/assets/js/aeaf6b3c.201f3276.js deleted file mode 100644 index 86b6bdeb..00000000 --- a/assets/js/aeaf6b3c.201f3276.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5671,8762],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>v});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),p=d(n),u=r,v=p["".concat(s,".").concat(u)]||p[u]||m[u]||o;return n?a.createElement(v,i(i({ref:t},c),{},{components:n})):a.createElement(v,i({ref:t},c))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[p]="string"==typeof e?e:r,i[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>p,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/topmed-json",id:"version-3.21/data-sources/topmed-json",title:"topmed-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/topmed-json.md",sourceDirName:"data-sources",slug:"/data-sources/topmed-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/topmed-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/topmed-json.md",tags:[],version:"3.21",frontMatter:{}},s=[],d={toc:s},c="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"topmed":{ \n "allAc":20,\n "allAn":125568,\n "allAf":0.000159,\n "allHc":0,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele number. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele frequency (computed by Nirvana)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed homozygous count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}p.isMDXComponent=!0},21794:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>i,metadata:()=>s,toc:()=>d});var a=n(87462),r=(n(67294),n(3905)),o=n(60216);const i={title:"TOPMed"},l=void 0,s={unversionedId:"data-sources/topmed",id:"version-3.21/data-sources/topmed",title:"TOPMed",description:"Overview",source:"@site/versioned_docs/version-3.21/data-sources/topmed.mdx",sourceDirName:"data-sources",slug:"/data-sources/topmed",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/topmed",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/topmed.mdx",tags:[],version:"3.21",frontMatter:{title:"TOPMed"},sidebar:"docs",previous:{title:"Splice AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/splice-ai"},next:{title:"Nirvana JSON File Format",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/file-formats/nirvana-json-file-format"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"VCF extraction",id:"vcf-extraction",children:[],level:2},{value:"GRCh37 liftover",id:"grch37-liftover",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON output",id:"json-output",children:[],level:2}],c={toc:d},p="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"The ",(0,r.kt)("a",{parentName:"p",href:"https://www.nhlbi.nih.gov/science/trans-omics-precision-medicine-topmed-program"},"Trans-Omics for Precision Medicine")," (TOPMed) program, sponsored by the National Institutes of Health (NIH) National Heart, Lung and Blood Institute (NHLBI), is part of a broader Precision Medicine Initiative, which aims to provide disease treatments tailored to an individual\u2019s unique genes and environment. TOPMed contributes to this Initiative through the integration of whole-genome sequencing (WGS) and other omics (e.g., metabolic profiles, epigenomics, protein and RNA expression patterns) data with molecular, behavioral, imaging, environmental, and clinical data."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Kowalski, M.H., Qian, H., Hou, Z., Rosen, J.D., Tapia, A.L., Shan, Y., Jain, D., Argos, M., Arnett, D.K., Avery, C. and Barnes, K.C., 2019. Use of> 100,000 NHLBI Trans-Omics for Precision Medicine (TOPMed) Consortium whole genome sequences improves imputation quality and detection of rare variant associations in admixed African and Hispanic/Latino populations. ",(0,r.kt)("em",{parentName:"p"},"PLoS genetics"),", ",(0,r.kt)("strong",{parentName:"p"},"15(12)"),", p.e1008500."))),(0,r.kt)("h2",{id:"vcf-extraction"},"VCF extraction"),(0,r.kt)("p",null,"We currently extract the following fields from TOPMed VCF file:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},'##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n')),(0,r.kt)("p",null,"Example:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 10132 TOPMed_freeze_5?chr1:10,132 T C 255 SVM VRT=1;NS=62784;AN=125568;AC=32;AF=0.000254842;Het=32;Hom=0 NA:FRQ 125568:0.000254842\n")),(0,r.kt)("h2",{id:"grch37-liftover"},"GRCh37 liftover"),(0,r.kt)("p",null,"The data is not available for GRCh37 on TOPMed website. We performed a liftover from GRCh38 to GRCh37 using dbSNP ids."),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://bravo.sph.umich.edu/freeze5/hg38/download"},"https://bravo.sph.umich.edu/freeze5/hg38/download")),(0,r.kt)("h2",{id:"json-output"},"JSON output"),(0,r.kt)(o.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/af17000b.cbf469f1.js b/assets/js/af17000b.cbf469f1.js deleted file mode 100644 index 39bdbb82..00000000 --- a/assets/js/af17000b.cbf469f1.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7598,4408],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>u});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function r(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),m=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):r(r({},t),e)),n},p=function(e){var t=m(e.components);return a.createElement(s.Provider,{value:t},e.children)},d="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},h=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,o=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),d=m(n),h=i,u=d["".concat(s,".").concat(h)]||d[h]||c[h]||o;return n?a.createElement(u,r(r({ref:t},p),{},{components:n})):a.createElement(u,r({ref:t},p))}));function u(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var o=n.length,r=new Array(o);r[0]=h;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[d]="string"==typeof e?e:i,r[1]=l;for(var m=2;m{n.r(t),n.d(t,{contentTitle:()=>r,default:()=>d,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const o={},r=void 0,l={unversionedId:"data-sources/omim-json",id:"version-3.21/data-sources/omim-json",title:"omim-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/omim-json.md",sourceDirName:"data-sources",slug:"/data-sources/omim-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/omim-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/omim-json.md",tags:[],version:"3.21",frontMatter:{}},s=[{value:"Phenotype",id:"phenotype",children:[],level:4},{value:"Mapping",id:"mapping",children:[],level:4},{value:"Inheritance",id:"inheritance",children:[],level:4},{value:"Comments",id:"comments",children:[],level:4}],m={toc:s},p="wrapper";function d(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"omim":[ \n { \n "mimNumber":600678,\n "geneName":"MutS, E. coli, homolog of, 6",\n "description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",\n "phenotypes":[ \n { \n "mimNumber":614350,\n "phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",\n "description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal dominant"\n ]\n },\n { \n "mimNumber":608089,\n "phenotype":"Endometrial cancer, familial",\n "mapping":"molecular basis of the disorder is known"\n },\n { \n "mimNumber":276300,\n "phenotype":"Mismatch repair cancer syndrome",\n "description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal recessive"\n ],\n "comments" : [\n "contribute to susceptibility to multifactorial disorders or to susceptibility to infection",\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"},"OMIM ID for gene")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"geneName"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"gene name")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,i.kt)("td",{parentName:"tr",align:"left"},"object array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#phenotype"},"Phenotype entry below"))))),(0,i.kt)("h4",{id:"phenotype"},"Phenotype"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"mapping"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#mapping"},"possible values below"))),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"inheritance"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#inheritance"},"possible values below"))),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"comments"),(0,i.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see ",(0,i.kt)("a",{parentName:"td",href:"#comments"},"possible values below"))))),(0,i.kt)("h4",{id:"mapping"},"Mapping"),(0,i.kt)("ol",null,(0,i.kt)("li",{parentName:"ol"},"disorder was positioned by mapping of the wild type gene"),(0,i.kt)("li",{parentName:"ol"},"disease phenotype itself was mapped"),(0,i.kt)("li",{parentName:"ol"},"molecular basis of the disorder is known"),(0,i.kt)("li",{parentName:"ol"},"disorder is a chromosome deletion or duplication syndrome")),(0,i.kt)("h4",{id:"inheritance"},"Inheritance"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"autosomal recessive"),(0,i.kt)("li",{parentName:"ul"},"autosomal dominant")),(0,i.kt)("h4",{id:"comments"},"Comments"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"contributes to the susceptibility to multifactorial disorders"),(0,i.kt)("li",{parentName:"ul"},"variations that lead to apparently abnormal laboratory test values"),(0,i.kt)("li",{parentName:"ul"},"unconfirmed mapping")))}d.isMDXComponent=!0},99227:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>c,frontMatter:()=>r,metadata:()=>s,toc:()=>m});var a=n(87462),i=(n(67294),n(3905)),o=n(95878);const r={title:"OMIM"},l=void 0,s={unversionedId:"data-sources/omim",id:"version-3.21/data-sources/omim",title:"OMIM",description:"Overview",source:"@site/versioned_docs/version-3.21/data-sources/omim.mdx",sourceDirName:"data-sources",slug:"/data-sources/omim",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/omim",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/omim.mdx",tags:[],version:"3.21",frontMatter:{title:"OMIM"},sidebar:"docs",previous:{title:"MITOMAP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/mitomap"},next:{title:"PhyloP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/phylop"}},m=[{value:"Overview",id:"overview",children:[],level:2},{value:"Parse OMIM data",id:"parse-omim-data",children:[{value:"mim2gene.txt",id:"mim2genetxt",children:[],level:3},{value:"OMIM API",id:"omim-api",children:[{value:"Mapping key to content",id:"mapping-key-to-content",children:[],level:4},{value:"Phenotype character to comment",id:"phenotype-character-to-comment",children:[],level:4}],level:3},{value:"Remove links in OMIM descriptions",id:"remove-links-in-omim-descriptions",children:[],level:3}],level:2},{value:"JSON output",id:"json-output",children:[],level:2},{value:"Building the supplementary files",id:"building-the-supplementary-files",children:[],level:2}],p={toc:m},d="wrapper";function c(e){let{components:t,...n}=e;return(0,i.kt)(d,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"OMIM is a comprehensive, authoritative compendium of human genes and genetic phenotypes that is freely available and updated daily."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publications")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Amberger JS, Bocchini CA, Scott AF, Hamosh A. OMIM.org: leveraging knowledge across phenotype-gene relationships. Nucleic Acids Res. 2019 Jan 8;47(D1):D1038-D1043. doi:10.1093/nar/gky1151. PMID: ",(0,i.kt)("a",{parentName:"p",href:"https://pubmed.ncbi.nlm.nih.gov/30445645/"},"30445645"),"."),(0,i.kt)("p",{parentName:"div"},"Amberger JS, Bocchini CA, Schiettecatte FJM, Scott AF, Hamosh A. OMIM.org: Online Mendelian Inheritance in Man (OMIM\xae), an online catalog of human genes and genetic disorders. Nucleic Acids Res. 2015 Jan;43(Database issue):D789-98. PMID: ",(0,i.kt)("a",{parentName:"p",href:"https://pubmed.ncbi.nlm.nih.gov/25428349/"},"25428349"),"."))),(0,i.kt)("h2",{id:"parse-omim-data"},"Parse OMIM data"),(0,i.kt)("p",null,"Nirvana uses gene symbols as the gene identifiers internally. To generate the OMIM database, we first map the MIM numbers, which are the primary identifiers used by OMIM, to gene symbols supported by Nirvana. Please note that there can be multiple MIM numbers mapped to one gene symbol. Only MIM numbers successfully mapped to a Nirvana gene symbol are further processed. The OMIM API is used to fetch all the information associated with a gene MIM number, except the gene symbols."),(0,i.kt)("h3",{id:"mim2genetxt"},"mim2gene.txt"),(0,i.kt)("p",null,"This mim2gene.txt (",(0,i.kt)("a",{parentName:"p",href:"http://omim.org/static/omim/data/mim2gene.txt"},"http://omim.org/static/omim/data/mim2gene.txt"),") file provides the mapping between MIM numbers and gene symbols. An example of this file is given below:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre"},"# MIM Number MIM Entry Type (see FAQ 1.3 at https://omim.org/help/faq) Entrez Gene ID (NCBI) Approved Gene Symbol (HGNC) Ensembl Gene ID (Ensembl)\n100050 predominantly phenotypes\n100070 phenotype 100329167\n100100 phenotype\n100200 predominantly phenotypes\n100300 phenotype\n100500 moved/removed\n100600 phenotype\n100640 gene 216 ALDH1A1 ENSG00000165092\n100650 gene/phenotype 217 ALDH2 ENSG00000111275\n100660 gene 218 ALDH3A1 ENSG00000108602\n100670 gene 219 ALDH1B1 ENSG00000137124\n100675 predominantly phenotypes\n100678 gene 39 ACAT2 ENSG00000120437\n")),(0,i.kt)("p",null,'The information in the "Entrez Gene ID (NCBI)", "Approved Gene Symbol (HGNC)" and "Ensembl Gene ID (Ensembl)" columns are used to find the proper gene symbol supported by Nirvana, which may or may not be the same as the gene symbol listed here.'),(0,i.kt)("h3",{id:"omim-api"},"OMIM API"),(0,i.kt)("p",null,"Nirvana retrieves the OMIM annotations from the ",(0,i.kt)("a",{parentName:"p",href:"https://www.omim.org/api"},"OMIM API"),' JSON responses. The "entry" handler is used to fetch all the annotations associated with a given OMIM gene. A sample JSON response from the API is provided there.'),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{\n "omim": {\n "version": "1.0",\n "entryList": [\n {\n "entry": {\n "prefix": "*",\n "mimNumber": 100640,\n "status": "live",\n "titles": {\n "preferredTitle": "ALDEHYDE DEHYDROGENASE 1 FAMILY, MEMBER A1; ALDH1A1",\n "alternativeTitles": "ALDEHYDE DEHYDROGENASE 1; ALDH1;;\\nACETALDEHYDE DEHYDROGENASE 1;;\\nALDH, LIVER CYTOSOLIC;;\\nRETINAL DEHYDROGENASE 1; RALDH1"\n },\n "textSectionList": [\n {\n "textSection": {\n "textSectionName": "description",\n "textSectionTitle": "Description",\n "textSectionContent": "The ALDH1A1 gene encodes a liver cytosolic isoform of acetaldehyde dehydrogenase ({EC 1.2.1.3}), an enzyme involved in the major pathway of alcohol metabolism after alcohol dehydrogenase (ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650}), variation in which has been implicated in different responses to alcohol ingestion.\\n\\nALDH1 is associated with a low Km for NAD, a high Km for acetaldehyde, and is strongly inactivated by disulfiram. ALDH2 is associated with a high Km for NAD, and low Km for acetaldehyde, and is insensitive to inhibition by disulfiram ({4:Hsu et al., 1985})."\n }\n }\n ],\n "geneMap": {\n "sequenceID": 7709,\n "chromosome": 9,\n "chromosomeSymbol": "9",\n "chromosomeSort": 225,\n "chromosomeLocationStart": 72900670,\n "chromosomeLocationEnd": 72953052,\n "transcript": "ENST00000297785.7",\n "cytoLocation": "9q21",\n "computedCytoLocation": "9q21.13",\n "mimNumber": 100640,\n "geneSymbols": "ALDH1A1",\n "geneName": "Aldehyde dehydrogenase-1 family, member A1, soluble",\n "mappingMethod": "REa, A",\n "confidence": "P",\n "mouseGeneSymbol": "Aldh1a1",\n "mouseMgiID": "MGI:1353450",\n "geneInheritance": null\n },\n "externalLinks": {\n "geneIDs": "216",\n "hgncID": "402",\n "ensemblIDs": "ENSG00000165092,ENST00000297785.8",\n "approvedGeneSymbols": "ALDH1A1",\n "ncbiReferenceSequences": "1519246465",\n "proteinSequences": "194378740,211947843,2183299,178400,119582947,119582948,178372,40807656,194375548,30582681,209402710,4262707,194739599,4261625,178394,261487497,16306661,21361176,32815082,118495,62089228",\n "uniGenes": "Hs.76392",\n "swissProtIDs": "P00352",\n "decipherGene": false,\n "umlsIDs": "C1412333",\n "gtr": true,\n "cmgGene": false,\n "keggPathways": true,\n "gwasCatalog": false,\n\n }\n }\n },\n {\n "entry": {\n "prefix": "*",\n "mimNumber": 102560,\n "status": "live",\n "titles": {\n "preferredTitle": "ACTIN, GAMMA-1; ACTG1",\n "alternativeTitles": "ACTIN, GAMMA; ACTG;;\\nCYTOSKELETAL GAMMA-ACTIN;;\\nACTIN, CYTOPLASMIC, 2"\n },\n "textSectionList": [\n {\n "textSection": {\n "textSectionName": "description",\n "textSectionTitle": "Description",\n "textSectionContent": "Actins are a family of highly conserved cytoskeletal proteins that play fundamental roles in nearly all aspects of eukaryotic cell biology. The ability of a cell to divide, move, endocytose, generate contractile force, and maintain shape is reliant upon functional actin-based structures. Actin isoforms are grouped according to expression patterns: muscle actins predominate in striated and smooth muscle (e.g., ACTA1, {102610}, and ACTA2, {102620}, respectively), whereas the 2 cytoplasmic nonmuscle actins, gamma-actin (ACTG1) and beta-actin (ACTB; {102630}), are found in all cells ({13:Sonnemann et al., 2006})."\n }\n }\n ],\n "geneMap": {\n "sequenceID": 13666,\n "chromosome": 17,\n "chromosomeSymbol": "17",\n "chromosomeSort": 947,\n "chromosomeLocationStart": 81509970,\n "chromosomeLocationEnd": 81512798,\n "transcript": "ENST00000331925.7",\n "cytoLocation": "17q25.3",\n "computedCytoLocation": "17q25.3",\n "mimNumber": 102560,\n "geneSymbols": "ACTG1, DFNA20, DFNA26, BRWS2",\n "geneName": "Actin, gamma-1",\n "mappingMethod": "REa, A, Fd",\n "confidence": "C",\n "mouseGeneSymbol": "Actg1",\n "mouseMgiID": "MGI:87906",\n "geneInheritance": null,\n "phenotypeMapList": [\n {\n "phenotypeMap": {\n "mimNumber": 102560,\n "phenotype": "Baraitser-Winter syndrome 2",\n "phenotypeMimNumber": 614583,\n "phenotypicSeriesNumber": "PS243310",\n "phenotypeMappingKey": 3,\n "phenotypeInheritance": "Autosomal dominant"\n }\n },\n {\n "phenotypeMap": {\n "mimNumber": 102560,\n "phenotype": "Deafness, autosomal dominant 20/26",\n "phenotypeMimNumber": 604717,\n "phenotypicSeriesNumber": "PS124900",\n "phenotypeMappingKey": 3,\n "phenotypeInheritance": "Autosomal dominant"\n }\n }\n ]\n }\n }\n }\n ]\n }\n}\n')),(0,i.kt)("p",null,"Content from the OMIM API JSON response is reorganized as shown in the Nirvana ",(0,i.kt)("a",{parentName:"p",href:"#json-output"},"JSON Output")),(0,i.kt)("p",null,"Mappings between the Nirvana JSON output and OMIM JSON API are listed in the table below:"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Nirvana JSON key chain"),(0,i.kt)("th",{parentName:"tr",align:"left"},"OMIM API JSON key chain"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:mimNumber")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:geneName"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:geneName")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:textSectionList:textSection:textSectionContent")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:mimNumber"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:mimNumber")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:phenotype"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:description"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:textSectionList:textSection:textSectionContent")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:mapping"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotypeMappingKey (",(0,i.kt)("a",{parentName:"td",href:"#mapping-key-to-content"},"see mapping below"),")")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:inheritances"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotypeInheritance")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:phenotypes:comments"),(0,i.kt)("td",{parentName:"tr",align:"left"},"omim:entryList:entry:geneMap:phenotypeMapList:phenotypeMap:phenotype (",(0,i.kt)("a",{parentName:"td",href:"#phenotype-character-to-comment"},"see mapping below"),")")))),(0,i.kt)("h4",{id:"mapping-key-to-content"},"Mapping key to content"),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"1")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"disorder was positioned by mapping of the wild type gene"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"2")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"disease phenotype itself was mapped"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"3")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"molecular basis of the disorder is known"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"4")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"disorder is a chromosome deletion or duplication syndrome"),(0,i.kt)("br",null)),(0,i.kt)("h4",{id:"phenotype-character-to-comment"},"Phenotype character to comment"),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"?")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"unconfirmed or possibly spurious mapping"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"["),"/",(0,i.kt)("inlineCode",{parentName:"p"},"]")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"nondiseases"),(0,i.kt)("br",null),"\n",(0,i.kt)("inlineCode",{parentName:"p"},"{"),"/",(0,i.kt)("inlineCode",{parentName:"p"},"}")," to ",(0,i.kt)("inlineCode",{parentName:"p"},"contribute to susceptibility to multifactorial disorders or to susceptibility to infection"),(0,i.kt)("br",null)),(0,i.kt)("h3",{id:"remove-links-in-omim-descriptions"},"Remove links in OMIM descriptions"),(0,i.kt)("p",null,"There are different types of link in the OMIM description section. For example, in above JSON response, we have the description of MIM entry 100640:"),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"The ALDH1A1 gene encodes a liver cytosolic isoform of acetaldehyde dehydrogenase ({EC 1.2.1.3}), an enzyme involved in the major pathway of alcohol metabolism after alcohol dehydrogenase (ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650}), variation in which has been implicated in different responses to alcohol ingestion.\\n\\nALDH1 is associated with a low Km for NAD, a high Km for acetaldehyde, and is strongly inactivated by disulfiram. ALDH2 is associated with a high Km for NAD, and low Km for acetaldehyde, and is insensitive to inhibition by disulfiram ({4:Hsu et al., 1985}).")),(0,i.kt)("p",null,"As the descriptions will be shown as plain text, we remove the curry brackets surrounding links and try to make the text still readable with minimal modifications. Briefly:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},'Links referring to another MIM entry (e.g. {100650}) will be removed. Any word(s) specifically associated with the removed link will also be removed. For example, "(ADH, see {103700})" will become "(ADH)" after the process.'),(0,i.kt)("li",{parentName:"ul"},'Links referring to a literature reference will be processed to remove the internal index and curry brackets. For example, "{4:Hsu et al., 1985}" becomes "Hsu et al., 1985".'),(0,i.kt)("li",{parentName:"ul"},'All the other links will simple have their curry brackets removed. For example, "{EC 1.2.1.3}" becomes "EC 1.2.1.3".'),(0,i.kt)("li",{parentName:"ul"},'If the content within a pair of parentheses becomes empty after being processed, the parentheses need to be removed as well and its surrounding white spaces should be properly processed. For example, "ALDH2 ({100650})," will become "ALDH2,".')),(0,i.kt)("p",null,"Here is a list of examples about how the description section supposed to be processed:"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Original text"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Processed text"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"({516030}, {516040}, and {516050})"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., D1, {168461}; D2, {123833}; D3, {123834})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., D1; D2; D3)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(desmocollins; see DSC2, {125645})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(desmocollins; see DSC2)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., see {102700}, {300755})"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(ADH, see {103700}). See also liver mitochondrial ALDH2 ({100650})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(ADH). See also liver mitochondrial ALDH2")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(see, e.g., CACNA1A; {601011})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(see, e.g., CACNA1A)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., GSTA1; {138359}), mu (e.g., {138350})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(e.g., GSTA1), mu")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(NFKB; see {164011})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(NFKB)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(see ISGF3G, {147574})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(see ISGF3G)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"(DCK; {EC 2.7.1.74}; {125450})"),(0,i.kt)("td",{parentName:"tr",align:"left"},"(DCK; EC 2.7.1.74)")))),(0,i.kt)("h2",{id:"json-output"},"JSON output"),(0,i.kt)(o.default,{mdxType:"JSON"}),(0,i.kt)("h2",{id:"building-the-supplementary-files"},"Building the supplementary files"),(0,i.kt)("p",null,"The first step in builing the OMIM ",(0,i.kt)("inlineCode",{parentName:"p"},".nga")," files is to use the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's subcommand ",(0,i.kt)("inlineCode",{parentName:"p"},"downloadOMIM")," to download the necessary data. In order to download the data the user must possess an API key obtained from OMIM. This key has to be set as the environment variable ",(0,i.kt)("em",{parentName:"p"},"OmimApiKey"),"."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},'export OmimApiKey=\ndotnet NirvanaBuild/SAUtils.dll downloadOMIM\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll downloadomim [options]\nDownload the OMIM gene annotation data\n\nOPTIONS:\n --cache, -c \n input cache directory\n --ref, -r input reference filename\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\ndotnet NirvanaBuild/SAUtils.dll downloadOMIM --ref References/7/Homo_sapiens.GRCh38.Nirvana.dat --uga Cache/ --out ExternalDataSources/OMIM/2021-06-14\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\nGene Symbol Update Statistics\n============================================\n{\n "NumGeneSymbolsUpToDate": 16788,\n "NumGeneSymbolsUpdated": 95,\n "NumGenesWhereBothIdsAreNull": 0,\n "NumGeneSymbolsNotInCache": 106,\n "NumResolvedGeneSymbolConflicts": 15,\n "NumUnresolvedGeneSymbolConflicts": 0\n}\n\nTime: 00:04:08.9\n')),(0,i.kt)("p",null,"Once the download has succeeded, the ",(0,i.kt)("inlineCode",{parentName:"p"},"nga")," files can be produced using the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's subcommand ",(0,i.kt)("inlineCode",{parentName:"p"},"omim"),"."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet NirvanaBuild/SAUtils.dll omim\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll omim [options]\nCreates a gene annotation database from OMIM data\n\nOPTIONS:\n --m2g, -m MimToGeneSymbol tsv file\n --json, -j OMIM entry json file\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\n\ndotnet NirvanaBuild/SAUtils.dll omim --m2g ExternalDataSources/OMIM/2021-06-14/MimToGeneSymbol.tsv --json ExternalDataSources/OMIM/2021-06-14/MimEntries.json.gz --out SupplementaryDatabase/63/\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\n\nTime: 00:00:04.5\n")))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/b08f6f21.309be900.js b/assets/js/b08f6f21.309be900.js deleted file mode 100644 index ab239995..00000000 --- a/assets/js/b08f6f21.309be900.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4124,4592],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>g});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var o=a.createContext({}),p=function(e){var t=a.useContext(o),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},c=function(e){var t=p(e.components);return a.createElement(o.Provider,{value:t},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,o=e.parentName,c=s(e,["components","mdxType","originalType","parentName"]),m=p(n),u=i,g=m["".concat(o,".").concat(u)]||m[u]||d[u]||r;return n?a.createElement(g,l(l({ref:t},c),{},{components:n})):a.createElement(g,l({ref:t},c))}));function g(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,l=new Array(r);l[0]=u;var s={};for(var o in t)hasOwnProperty.call(t,o)&&(s[o]=t[o]);s.originalType=e,s[m]="string"==typeof e?e:i,l[1]=s;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>r,metadata:()=>s,toc:()=>o});var a=n(87462),i=(n(67294),n(3905));const r={},l=void 0,s={unversionedId:"data-sources/clinvar-json",id:"version-3.17/data-sources/clinvar-json",title:"clinvar-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/clinvar-json.md",sourceDirName:"data-sources",slug:"/data-sources/clinvar-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/clinvar-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/clinvar-json.md",tags:[],version:"3.17",frontMatter:{}},o=[],p={toc:o},c="wrapper";function m(e){let{components:t,...n}=e;return(0,i.kt)(c,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "id":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "significance":[\n "benign"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "lastUpdatedDate":"2020-03-01",\n "isAlleleSpecific":true\n },\n {\n "id":"RCV000030258.4",\n "variationId":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "alleleOrigins":[\n "germline"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "phenotypes":[\n "Lynch syndrome"\n ],\n "medGenIds":[\n "C1333990"\n ],\n "omimIds":[\n "120435"\n ],\n "significance":[\n "benign"\n ],\n "lastUpdatedDate":"2017-05-01",\n "isAlleleSpecific":true\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"id"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"ClinVar ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"variationId"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"ClinVar VCV ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"reviewStatus"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"alleleOrigins"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"medGenIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"MedGen IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omimIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"OMIM IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"orphanetIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Orphanet IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"significance"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"lastUpdatedDate"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,i.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,i.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the ClinVar alternate allele")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"reviewStatus:")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no assertion provided"),(0,i.kt)("li",{parentName:"ul"},"no assertion criteria provided"),(0,i.kt)("li",{parentName:"ul"},"criteria provided, single submitter"),(0,i.kt)("li",{parentName:"ul"},"practice guideline"),(0,i.kt)("li",{parentName:"ul"},"classified by multiple submitters"),(0,i.kt)("li",{parentName:"ul"},"criteria provided, conflicting interpretations"),(0,i.kt)("li",{parentName:"ul"},"criteria provided, multiple submitters, no conflicts"),(0,i.kt)("li",{parentName:"ul"},"no interpretation for the single variant")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"alleleOrigins:")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"unknown"),(0,i.kt)("li",{parentName:"ul"},"other"),(0,i.kt)("li",{parentName:"ul"},"germline"),(0,i.kt)("li",{parentName:"ul"},"somatic"),(0,i.kt)("li",{parentName:"ul"},"inherited"),(0,i.kt)("li",{parentName:"ul"},"paternal"),(0,i.kt)("li",{parentName:"ul"},"maternal"),(0,i.kt)("li",{parentName:"ul"},"de-novo"),(0,i.kt)("li",{parentName:"ul"},"biparental"),(0,i.kt)("li",{parentName:"ul"},"uniparental"),(0,i.kt)("li",{parentName:"ul"},"not-tested"),(0,i.kt)("li",{parentName:"ul"},"tested-inconclusive")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"significance:")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"uncertain significance"),(0,i.kt)("li",{parentName:"ul"},"not provided"),(0,i.kt)("li",{parentName:"ul"},"benign"),(0,i.kt)("li",{parentName:"ul"},"likely benign"),(0,i.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"pathogenic"),(0,i.kt)("li",{parentName:"ul"},"drug response"),(0,i.kt)("li",{parentName:"ul"},"histocompatibility"),(0,i.kt)("li",{parentName:"ul"},"association"),(0,i.kt)("li",{parentName:"ul"},"risk factor"),(0,i.kt)("li",{parentName:"ul"},"protective"),(0,i.kt)("li",{parentName:"ul"},"affects"),(0,i.kt)("li",{parentName:"ul"},"conflicting data from submitters"),(0,i.kt)("li",{parentName:"ul"},"other"),(0,i.kt)("li",{parentName:"ul"},"no interpretation for the single variant"),(0,i.kt)("li",{parentName:"ul"},"conflicting interpretations of pathogenicity")))}m.isMDXComponent=!0},43949:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>s,default:()=>d,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var a=n(87462),i=(n(67294),n(3905)),r=n(69487);const l={title:"ClinVar"},s=void 0,o={unversionedId:"data-sources/clinvar",id:"version-3.17/data-sources/clinvar",title:"ClinVar",description:"Overview",source:"@site/versioned_docs/version-3.17/data-sources/clinvar.mdx",sourceDirName:"data-sources",slug:"/data-sources/clinvar",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/clinvar",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/clinvar.mdx",tags:[],version:"3.17",frontMatter:{title:"ClinVar"},sidebar:"version-3.17/docs",previous:{title:"ClinGen",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/clingen"},next:{title:"COSMIC",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/cosmic"}},p=[{value:"Overview",id:"overview",children:[],level:2},{value:"RCV File",id:"rcv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Parsing Significance",id:"parsing-significance",children:[],level:4}],level:3}],level:2},{value:"VCV File",id:"vcv-file",children:[{value:"Example",id:"example-1",children:[],level:3},{value:"Parsing",id:"parsing-1",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URLs",id:"download-urls",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2},{value:"Building the supplementary files",id:"building-the-supplementary-files",children:[{value:"Source data files",id:"source-data-files",children:[],level:3}],level:2}],c={toc:p},m="wrapper";function d(e){let{components:t,...l}=e;return(0,i.kt)(m,(0,a.Z)({},c,l,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"ClinVar is a freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence. ClinVar thus facilitates access to and communication about the relationships asserted between human variation and observed health status, and the history of that interpretation."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Melissa J Landrum, Jennifer M Lee, Mark Benson, Garth R Brown, Chen Chao, Shanmuga Chitipiralla, Baoshan Gu, Jennifer Hart, Douglas Hoffman, Wonhee Jang, Karen Karapetyan, Kenneth Katz, Chunlei Liu, Zenith Maddipatla, Adriana Malheiro, Kurt McDaniel, Michael Ovetsky, George Riley, George Zhou, J Bradley Holmes, Brandi L Kattman, Donna R Maglott, ClinVar: improving access to variant interpretations and supporting evidence, ",(0,i.kt)("em",{parentName:"p"},"Nucleic Acids Research"),", ",(0,i.kt)("strong",{parentName:"p"},"46"),", Issue D1, 4 January 2018, Pages D1062\u2013D1067, ",(0,i.kt)("a",{parentName:"p",href:"https://doi.org/10.1093/nar/gkx1153"},"https://doi.org/10.1093/nar/gkx1153")))),(0,i.kt)("h2",{id:"rcv-file"},"RCV File"),(0,i.kt)("h3",{id:"example"},"Example"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{target:"_blank",href:n(86647).Z},"a full RCV entry"),"."),(0,i.kt)("h3",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"ID")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{3}","{3}":!0},'\n \n \n\n')),(0,i.kt)("p",null,"The Acc and Version fields are merged to form the ID (RCV000000001.2)"),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"LastUpdatedDate")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{2}","{2}":!0},'\n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Significance")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{5}","{5}":!0},'\n \n \n no assertion criteria provided\n Pathogenic\n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"ReviewStatus")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4}","{4}":!0},'\n \n \n no assertion criteria provided\n Pathogenic\n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Phenotypes")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{2-8}","{2-8}":!0},'\n \n \n \n Joubert syndrome 9\n \n \n \n\n')),(0,i.kt)("p",null,'We only use the field with Type="Preferred". Multiple phenotypes may be reported'),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Location and Variant Id")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{3,5-12}","{3,5-12}":!0},'\n\n \n \n \n \n \n \n \n\n')),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"The variant position is extracted from the fields for their respective assemblies."),(0,i.kt)("li",{parentName:"ul"},"Updated records contain positionVCF, referenceAlleleVCF and alternateAlleleVCF fields and when present, we use them to create the variant."),(0,i.kt)("li",{parentName:"ul"},'For older records, since "start\' and "stop" fields are not always available, we use the "display_start" and "display_end" fields.'),(0,i.kt)("li",{parentName:"ul"},"If a required allele is not available, we extract it from the reference sequence."),(0,i.kt)("li",{parentName:"ul"},"Only variants having a dbSNP id are extracted."),(0,i.kt)("li",{parentName:"ul"},"Note that a ClinVar accession may have multiple variants associated with it (possible in different locations)"),(0,i.kt)("li",{parentName:"ul"},"VariantId is extracted from the MeasureSet attributes.")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"MedGen, OMIM, Orphanet IDs")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4-7}","{4-7}":!0},'\n \n \n \n \n \n \n \n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"AlleleOrigins")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{2}","{2}":!0},"\n germline\n\n")),(0,i.kt)("p",null,"We only extract all Allele Origins from Submissions (SCV) entries."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"PubMedIds")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4,10,16,21}","{4,10,16,21}":!0},'\n \n \n 12114475\n \n \n \n LMM Criteria\n \n 24033266\n \n \n \n \n \n 9113933\n \n \n \n \n 23757202\n \n\n')),(0,i.kt)("p",null,"We only extract all Pubmed Ids from Submissions (SCV) entries."),(0,i.kt)("h4",{id:"parsing-significance"},"Parsing Significance"),(0,i.kt)("p",null,"Extracting significance(s) may involve parsing multiple fields. Take the following snippets into consideration."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{3,8,13-14}","{3,8,13-14}":!0},'\n no assertion criteria provided\n Pathogenic\n\n\n\n criteria provided, multiple submitters, no conflicts\n Pathogenic/Likely pathogenic\n\n\n\n no assertion criteria provided\n Conflicting interpretations of pathogenicity\n Pathogenic(1);Uncertain significance(1)\n\n')),(0,i.kt)("p",null,"Given the evidence, we converted the significance field into an array of strings which may be parsed out of the ",(0,i.kt)("inlineCode",{parentName:"p"},"Descriptions")," or ",(0,i.kt)("inlineCode",{parentName:"p"},"Explanation")," fields."),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Varying Delimiters")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The delimiters in each field may vary. Currently, the delimiters for ",(0,i.kt)("inlineCode",{parentName:"p"},"Description")," are ",(0,i.kt)("inlineCode",{parentName:"p"},",")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"/"),". The delimiters for ",(0,i.kt)("inlineCode",{parentName:"p"},"Explanation")," are ",(0,i.kt)("inlineCode",{parentName:"p"},";")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"/"),"."))),(0,i.kt)("h2",{id:"vcv-file"},"VCV File"),(0,i.kt)("h3",{id:"example-1"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml"},'\n\n\n current\n Homo sapiens\n \n \n \n \n \n 1p36.31\n \n \n \n 601142\n \n \n \n 1p36.31\n \n \n \n 607215\n \n \n GRCh37/hg19 1p36.31(chr1:6051187-6158763)\n copy number gain\n \n 1p36.31\n \n \n \n no interpretation for the single variant\n \n \n \n \n \n \n no interpretation for the single variant\n \n \n no interpretation for the single variant\n \n \n \n \n \n \n \n \n \n\n\n')),(0,i.kt)("h3",{id:"parsing-1"},"Parsing"),(0,i.kt)("p",null,"In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"id")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml"},'\n')),(0,i.kt)("p",null,"The Acc and Version fields are merged to form the ID (RCV000000001.2)"),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"significance")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{7}","{7}":!0},'\n \n \n \n \n \n no interpretation for the single variant\n \n \n \n \n \n\n')),(0,i.kt)("p",null,"May have multiple significances listed."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"reviewStatus")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4}","{4}":!0},"\n \n \n no interpretation for the single variant\n \n \n\n")),(0,i.kt)("h2",{id:"known-issues"},"Known Issues"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"The XML file contains ~1k more entries (out of 162K) than the VCF file"),(0,i.kt)("li",{parentName:"ul"},"The XML file does not have a field indicating that a record is associated with the reference base - something that was present in VCF"),(0,i.kt)("li",{parentName:"ul"},'The XML file contains entries (e.g. RCV000016645 version=1) which have IUPAC ambiguous bases ("R", "Y", "H",\netc.) as their alternate allele')))),(0,i.kt)("h2",{id:"download-urls"},"Download URLs"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz"},"ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz")),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_00-latest.xml.gz"},"https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_00-latest.xml.gz")),(0,i.kt)("h2",{id:"json-output"},"JSON Output"),(0,i.kt)(r.default,{mdxType:"JSON"}),(0,i.kt)("h2",{id:"building-the-supplementary-files"},"Building the supplementary files"),(0,i.kt)("p",null,"The ClinVar ",(0,i.kt)("inlineCode",{parentName:"p"},".nsa")," for Nirvana can be built using the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's ",(0,i.kt)("inlineCode",{parentName:"p"},"clinvar")," subcommand."),(0,i.kt)("h3",{id:"source-data-files"},"Source data files"),(0,i.kt)("p",null,"Two input ",(0,i.kt)("inlineCode",{parentName:"p"},".xml")," files and a ",(0,i.kt)("inlineCode",{parentName:"p"},".version")," file are required in order to build the ",(0,i.kt)("inlineCode",{parentName:"p"},".nsa")," file. You should have the following files:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"ClinVarFullRelease_2021-06.xml.gz ClinVarVariationRelease_2021-06.xml.gz\nClinVarFullRelease_2021-06.xml.gz.version\n")),(0,i.kt)("p",null,"The version file is a text file with the follwoing format."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"NAME=ClinVar\nVERSION=20210603\nDATE=2021-06-03\nDESCRIPTION=A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence\n")),(0,i.kt)("p",null,"The help menu for the utility is as follows:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet SAUtils.dll clinvar\n---------------------------------------------------------------------------\nSAUtils (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll clinvar [options]\nCreates a supplementary database with ClinVar annotations\n\nOPTIONS:\n --ref, -r compressed reference sequence file\n --rcv, -i ClinVar Full release XML file\n --vcv, -c ClinVar Variation release XML file\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\ndotnet SAUtils.dll clinvar\n")),(0,i.kt)("p",null,"Here is a sample execution:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet ~/development/Nirvana/bin/Debug/netcoreapp3.1/SAUtils.dll clinvar \\\\\n--ref ~/development/References/7/Homo_sapiens.GRCh38.Nirvana.dat --rcv ClinVarFullRelease_2021-06.xml.gz \\\\\n--vcv ClinVarVariationRelease_2021-06.xml.gz --out ~/development/SupplementaryDatabase/63/GRCh38\n---------------------------------------------------------------------------\nSAUtils (c) 2020 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.13.0\n---------------------------------------------------------------------------\n\nFound 983417 VCV records\nChromosome 1 completed in 00:09:46.2\nChromosome 2 completed in 00:00:16.4\nChromosome 3 completed in 00:00:06.9\nUnknown vcv id:982521 found in RCV001262095.1\nChromosome 4 completed in 00:00:03.9\nChromosome 5 completed in 00:00:07.1\nChromosome 6 completed in 00:00:05.7\nChromosome 7 completed in 00:00:06.6\nUnknown vcv id:430873 found in RCV000493222.1\nChromosome 8 completed in 00:00:04.6\nChromosome 9 completed in 00:00:06.2\nChromosome 10 completed in 00:00:05.6\nChromosome 11 completed in 00:00:10.2\nChromosome 12 completed in 00:00:06.9\nChromosome 13 completed in 00:00:05.9\nChromosome 14 completed in 00:00:04.9\nChromosome 15 completed in 00:00:05.4\nChromosome 16 completed in 00:00:08.9\nChromosome 17 completed in 00:00:13.1\nChromosome 18 completed in 00:00:02.4\nChromosome 19 completed in 00:00:07.6\nChromosome 20 completed in 00:00:02.4\nChromosome 21 completed in 00:00:01.6\nChromosome 22 completed in 00:00:02.6\nChromosome MT completed in 00:00:00.3\nChromosome X completed in 00:00:05.5\n2 unknown VCVs found in RCVs.\n982521,430873\nChromosome Y completed in 00:00:00.0\n\nTime: 00:12:08.2\n\n")))}d.isMDXComponent=!0},86647:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/files/clinvar-rcv-example-4e0a2f2ac6c70acd0ce41410690b683b.xml"}}]); \ No newline at end of file diff --git a/assets/js/b2e466e8.3bddd607.js b/assets/js/b2e466e8.3bddd607.js deleted file mode 100644 index 0e1c8750..00000000 --- a/assets/js/b2e466e8.3bddd607.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8577,5160,1912,4899,7942,2508,4648,8462,5702,6192,216,6635,3232,12,9082,4105,6602,1633,2630,829,7870,4246,3805,8633],{3905:(t,e,a)=>{a.d(e,{Zo:()=>m,kt:()=>g});var n=a(67294);function r(t,e,a){return e in t?Object.defineProperty(t,e,{value:a,enumerable:!0,configurable:!0,writable:!0}):t[e]=a,t}function l(t,e){var a=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),a.push.apply(a,n)}return a}function i(t){for(var e=1;e=0||(r[a]=t[a]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,a)&&(r[a]=t[a])}return r}var p=n.createContext({}),d=function(t){var e=n.useContext(p),a=e;return t&&(a="function"==typeof t?t(e):i(i({},e),t)),a},m=function(t){var e=d(t.components);return n.createElement(p.Provider,{value:e},t.children)},s="mdxType",c={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},u=n.forwardRef((function(t,e){var a=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,m=o(t,["components","mdxType","originalType","parentName"]),s=d(a),u=r,g=s["".concat(p,".").concat(u)]||s[u]||c[u]||l;return a?n.createElement(g,i(i({ref:e},m),{},{components:a})):n.createElement(g,i({ref:e},m))}));function g(t,e){var a=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=a.length,i=new Array(l);i[0]=u;var o={};for(var p in e)hasOwnProperty.call(e,p)&&(o[p]=e[p]);o.originalType=t,o[s]="string"==typeof t?t:r,i[1]=o;for(var d=2;d{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/1000Genomes-snv-json",id:"data-sources/1000Genomes-snv-json",title:"1000Genomes-snv-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/1000Genomes-snv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-snv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes-snv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/1000Genomes-snv-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":{\n "allAf":0.200879,\n "afrAf":0.210287,\n "amrAf":0.139769,\n "easAf":0.275794,\n "eurAf":0.181909,\n "sasAf":0.173824,\n "allAn":5008,\n "afrAn":1322,\n "amrAn":694,\n "easAn":1008,\n "eurAn":1006,\n "sasAn":978,\n "allAc":1006,\n "afrAc":278,\n "amrAc":97,\n "easAc":278,\n "eurAc":183,\n "sasAc":170\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the African super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the African super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the Ad Mixed American super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the Ad Mixed American super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the East Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the East Asian super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the European super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the European super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the South Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the South Asian super population. Non-zero integer.")))))}s.isMDXComponent=!0},92590:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/1000Genomes-sv-json",id:"data-sources/1000Genomes-sv-json",title:"1000Genomes-sv-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/1000Genomes-sv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-sv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes-sv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/1000Genomes-sv-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":[\n {\n "chromosome":"1",\n "begin":1595369,\n "end":1612441,\n "variantType": "copy_number_variation",\n "id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",\n "allAn": 5008,\n "allAc": 2702,\n "allAf": 0.539537,\n "afrAf": 0.6052,\n "amrAf": 0.3675,\n "eurAf": 0.5357,\n "easAf": 0.5368,\n "sasAf": 0.5797,\n "reciprocalOverlap": 0.07555\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"range: 0 - 1.")))))}s.isMDXComponent=!0},99729:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/amino-acid-conservation-json",id:"data-sources/amino-acid-conservation-json",title:"amino-acid-conservation-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/amino-acid-conservation-json.md",sourceDirName:"data-sources",slug:"/data-sources/amino-acid-conservation-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/amino-acid-conservation-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/amino-acid-conservation-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"aminoAcidConservation": {\n "scores": [0.34]\n} \n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"aminoAcidConservation"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scores"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array of doubles"),(0,r.kt)("td",{parentName:"tr",align:"left"},"percent conserved with respect to human amino acid residue. Range: 0.01 - 1.00")))))}s.isMDXComponent=!0},37356:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clingen-dosage-json",id:"data-sources/clingen-dosage-json",title:"clingen-dosage-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/clingen-dosage-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-dosage-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-dosage-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clingen-dosage-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingenDosageSensitivityMap": [{\n "chromosome": "15",\n "begin": 30900686,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 0.33994\n},\n{\n "chromosome": "15",\n "begin": 31727418,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "dosage sensitivity unlikely",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 1\n}]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingenDosageSensitivityMap"),(0,r.kt)("td",{parentName:"tr",align:null},"object array"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"haploinsufficiency"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"triplosensitivity"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"(same as haploinsufficiency)\xa0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"haploinsufficiency and triplosensitivity")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no evidence to suggest that dosage sensitivity is associated with clinical phenotype"),(0,r.kt)("li",{parentName:"ul"},"little evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,r.kt)("li",{parentName:"ul"},"emerging evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,r.kt)("li",{parentName:"ul"},"sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,r.kt)("li",{parentName:"ul"},"gene associated with autosomal recessive phenotype"),(0,r.kt)("li",{parentName:"ul"},"dosage sensitivity unlikely")))}s.isMDXComponent=!0},80949:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clingen-gene-validity-json",id:"data-sources/clingen-gene-validity-json",title:"clingen-gene-validity-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/clingen-gene-validity-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-gene-validity-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-gene-validity-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clingen-gene-validity-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingenGeneValidity":[\n {\n "diseaseId":"MONDO_0007893",\n "disease":"Noonan syndrome with multiple lentigines",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n },\n {\n "diseaseId":"MONDO_0015280",\n "disease":"cardiofaciocutaneous syndrome",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingenGeneValidity"),(0,r.kt)("td",{parentName:"tr",align:null},"object"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"diseaseId"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Monarch Disease Ontology ID (MONDO)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"disease"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"disease label")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"classification"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see below for possible values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"classificationDate"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"yyyy-MM-dd")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"classification")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no reported evidence"),(0,r.kt)("li",{parentName:"ul"},"disputed"),(0,r.kt)("li",{parentName:"ul"},"limited"),(0,r.kt)("li",{parentName:"ul"},"moderate"),(0,r.kt)("li",{parentName:"ul"},"definitive"),(0,r.kt)("li",{parentName:"ul"},"strong"),(0,r.kt)("li",{parentName:"ul"},"refuted"),(0,r.kt)("li",{parentName:"ul"},"no known disease relationship")))}s.isMDXComponent=!0},44674:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clingen-json",id:"data-sources/clingen-json",title:"clingen-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/clingen-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clingen-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingen":[\n {\n "chromosome":"17",\n "begin":525,\n "end":14667519,\n "variantType":"copy_number_gain",\n "id":"nsv996083",\n "clinicalInterpretation":"pathogenic",\n "observedGains":1,\n "validated":true,\n "phenotypes":[\n "Intrauterine growth retardation"\n ],\n "phenotypeIds":[\n "HP:0001511",\n "MedGen:C1853481"\n ],\n "reciprocalOverlap":0.00131\n },\n {\n "chromosome":"17",\n "begin":45835,\n "end":7600330,\n "variantType":"copy_number_loss",\n "id":"nsv869419",\n "clinicalInterpretation":"pathogenic",\n "observedLosses":1,\n "validated":true,\n "phenotypes":[\n "Developmental delay AND/OR other significant developmental or morphological phenotypes"\n ],\n "reciprocalOverlap":0.00254\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingen"),(0,r.kt)("td",{parentName:"tr",align:null},"object array"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Any of the\xa0sequence alterations defined here.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Identifier from the data source. Alternatively a VID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clinicalInterpretation"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"observedGains"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,r.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"observedLosses"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,r.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"validated"),(0,r.kt)("td",{parentName:"tr",align:null},"boolean"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:null},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"Description of the phenotype.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"phenotypeIds"),(0,r.kt)("td",{parentName:"tr",align:null},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"Description of the phenotype IDs.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"clinicalInterpretation")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"curated benign"),(0,r.kt)("li",{parentName:"ul"},"curated pathogenic"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"path gain"),(0,r.kt)("li",{parentName:"ul"},"path loss"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"uncertain")))}s.isMDXComponent=!0},90212:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clinvar-json",id:"data-sources/clinvar-json",title:"clinvar-json",description:"small variants:",source:"@site/docs/data-sources/clinvar-json.md",sourceDirName:"data-sources",slug:"/data-sources/clinvar-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clinvar-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clinvar-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"small variants:")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "id":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "significance":[\n "benign"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "lastUpdatedDate":"2020-03-01",\n "isAlleleSpecific":true\n },\n {\n "id":"RCV000030258.4",\n "variationId":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "alleleOrigins":[\n "germline"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "phenotypes":[\n "Lynch syndrome"\n ],\n "medGenIds":[\n "C1333990"\n ],\n "omimIds":[\n "120435"\n ],\n "significance":[\n "benign"\n ],\n "lastUpdatedDate":"2017-05-01",\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"large variants:")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "chromosome":"1", \n "begin":629025, \n "end":8537745, \n "variantType":"copy_number_loss", \n "id":"RCV000051993.4", \n "variationId":"VCV000058242.1", \n "reviewStatus":"criteria provided, single submitter", \n "alleleOrigins":[\n "not provided"\n ], \n "phenotypes":[\n "See cases"\n ], \n "significance":[\n "pathogenic"\n ], \n "lastUpdatedDate":"2022-04-21", \n "pubMedIds":[\n "21844811"\n ]\n },\n {\n "id":"VCV000058242.1",\n "reviewStatus":"criteria provided, single submitter",\n "significance":[\n "pathogenic"\n ],\n "lastUpdatedDate":"2022-04-21"\n },\n ......\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variationId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar VCV ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"variant type")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"reviewStatus"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"alleleOrigins"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"medGenIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MedGen IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"omimIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"orphanetIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Orphanet IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"significance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"lastUpdatedDate"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the ClinVar alternate allele")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"reviewStatus:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no assertion provided"),(0,r.kt)("li",{parentName:"ul"},"no assertion criteria provided"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, single submitter"),(0,r.kt)("li",{parentName:"ul"},"practice guideline"),(0,r.kt)("li",{parentName:"ul"},"classified by multiple submitters"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, conflicting interpretations"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, multiple submitters, no conflicts"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"alleleOrigins:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"unknown"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"germline"),(0,r.kt)("li",{parentName:"ul"},"somatic"),(0,r.kt)("li",{parentName:"ul"},"inherited"),(0,r.kt)("li",{parentName:"ul"},"paternal"),(0,r.kt)("li",{parentName:"ul"},"maternal"),(0,r.kt)("li",{parentName:"ul"},"de-novo"),(0,r.kt)("li",{parentName:"ul"},"biparental"),(0,r.kt)("li",{parentName:"ul"},"uniparental"),(0,r.kt)("li",{parentName:"ul"},"not-tested"),(0,r.kt)("li",{parentName:"ul"},"tested-inconclusive")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"significance:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"uncertain significance"),(0,r.kt)("li",{parentName:"ul"},"not provided"),(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"drug response"),(0,r.kt)("li",{parentName:"ul"},"histocompatibility"),(0,r.kt)("li",{parentName:"ul"},"association"),(0,r.kt)("li",{parentName:"ul"},"risk factor"),(0,r.kt)("li",{parentName:"ul"},"protective"),(0,r.kt)("li",{parentName:"ul"},"affects"),(0,r.kt)("li",{parentName:"ul"},"conflicting data from submitters"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant"),(0,r.kt)("li",{parentName:"ul"},"conflicting interpretations of pathogenicity")))}s.isMDXComponent=!0},11273:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/cosmic-cancer-gene-census",id:"data-sources/cosmic-cancer-gene-census",title:"cosmic-cancer-gene-census",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/cosmic-cancer-gene-census.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-cancer-gene-census",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-cancer-gene-census",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/cosmic-cancer-gene-census.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},' {\n "name": "PRDM16",\n "hgncId": 14000,\n "ncbiGeneId": "63976",\n "ensemblGeneId": "ENSG00000142611",\n "cosmic": {\n "roleInCancer": [\n "oncogene",\n "fusion"\n ]\n }\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"roleInCancer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Possible roles in caner")))))}s.isMDXComponent=!0},40540:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/dann-json",id:"data-sources/dann-json",title:"dann-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/dann-json.md",sourceDirName:"data-sources",slug:"/data-sources/dann-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dann-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/dann-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"dannScore": 0.27\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"dannScore"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1.0")))))}s.isMDXComponent=!0},39156:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/dbsnp-json",id:"data-sources/dbsnp-json",title:"dbsnp-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/dbsnp-json.md",sourceDirName:"data-sources",slug:"/data-sources/dbsnp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dbsnp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/dbsnp-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"dbsnp":[\n "rs1042821"\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"dbsnp"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"dbSNP rsIDs")))))}s.isMDXComponent=!0},94072:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/decipher-json",id:"data-sources/decipher-json",title:"decipher-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/decipher-json.md",sourceDirName:"data-sources",slug:"/data-sources/decipher-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/decipher-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/decipher-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"decipher":[\n {\n "chromosome":"1",\n "begin":13516,\n "end":91073,\n "numDeletions":27,\n "deletionFrequency":0.675,\n "numDuplications":27,\n "duplicationFrequency":0.675,\n "sampleSize":40,\n "reciprocalOverlap": 0.27555,\n "annotationOverlap": 0.5901\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"numDeletions"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"# of observed deletions")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"deletionFrequency"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"deletion frequency")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"numDuplications"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"# of observed duplications")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"duplicationFrequency"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"duplication frequency")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sampleSize"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"total # of samples")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% annotation overlap")))))}s.isMDXComponent=!0},65538:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gerp-json",id:"data-sources/gerp-json",title:"gerp-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gerp-json.md",sourceDirName:"data-sources",slug:"/data-sources/gerp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gerp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gerp-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gerpScore": 1.27\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"gerpScore"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: -\u221e to +\u221e")))))}s.isMDXComponent=!0},48036:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gme-json",id:"data-sources/gme-json",title:"gme-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gme-json.md",sourceDirName:"data-sources",slug:"/data-sources/gme-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gme-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gme-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gmeVariome":{\n "allAc":10,\n "allAn":202,\n "allAf":0.049504,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"GME allele count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"GME allele number")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"GME allele frequency")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}s.isMDXComponent=!0},74859:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gnomad-lof-json",id:"data-sources/gnomad-lof-json",title:"gnomad-lof-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gnomad-lof-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-lof-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-lof-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gnomad-lof-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD":{ \n "pLi":1.00e0,\n "pNull":8.94e-40,\n "pRec":1.84e-16,\n "synZ":-8.44e-2,\n "misZ":5.96e-1,\n "loeuf":1.13e0\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"pLi"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"pNull"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"pRec"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"synZ"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"misZ"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))))}s.isMDXComponent=!0},73827:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gnomad-small-variants-json",id:"data-sources/gnomad-small-variants-json",title:"gnomad-small-variants-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gnomad-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gnomad-small-variants-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomad":{ \n "coverage":20,\n "allAf":0.190317,\n "maleAf":0.193,\n "femaleAf": 0.1935,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "maleAn":15096,\n "femaleAn":15700\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "maleAc":2930,\n "femaleAc": 2931,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "maleHc":280,\n "femaleHc":281,\n "controlsAllAf":0.190317,\n "controlsAllAn":30796,\n "controlsAllAc":5861,\n "lowComplexityRegion":true,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"coverage"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"controlsAllAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the controls subset. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for male population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for female population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"controlsAllAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the controls subset. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for male population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for female population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"controlsAllAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the controls subset. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the South Asian population Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the South Asian population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the South Asian population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"lowComplexityRegion"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant is located in a low complexity region.")))))}s.isMDXComponent=!0},90818:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gnomad-structural-variants-json",id:"data-sources/gnomad-structural-variants-json",title:"gnomad-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gnomad-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gnomad-structural-variants-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD-preview": [\n {\n "chromosome": "1",\n "begin": 40001,\n "end": 47200,\n "variantId": "gnomAD-SV_v2.1_DUP_1_1",\n "variantType": "duplication",\n "failedFilter": true,\n "allAf": 0.068963,\n "afrAf": 0.135694,\n "amrAf": 0.022876,\n "easAf": 0.01101,\n "eurAf": 0.007846,\n "othAf": 0.017544,\n "femaleAf": 0.065288,\n "maleAf": 0.07255,\n "allAc": 943,\n "afrAc": 866,\n "amrAc": 21,\n "easAc": 17,\n "eurAc": 37,\n "othAc": 2,\n "femaleAc": 442,\n "maleAc": 499,\n "allAn": 13674,\n "afrAn": 6382,\n "amrAn": 918,\n "easAn": 1544,\n "eurAn": 4716,\n "othAn": 114,\n "femaleAn": 6770,\n "maleAn": 6878,\n "allHc": 91,\n "afrHc": 90,\n "amrHc": 1,\n "easHc": 0,\n "eurHc": 0,\n "othHc": 55,\n "femaleHc": 44,\n "maleHc": 47,\n "reciprocalOverlap": 0.01839,\n "annotationOverlap": 0.16667\n }\n]\n\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"chromosome number")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"position interval start")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"position internal end")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"structural variant type")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantId"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"gnomAD ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all other populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the African super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Ad Mixed American super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the European super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all other populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for male population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for female population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the African super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Ad Mixed American super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the European super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all other populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for female population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for male population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the African / African American population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Latino population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the East Asian population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the European super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all other populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"boolean"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Reciprocal overlap. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Reciprocal overlap. Range: 0 - 1.0")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Note:")," Following fields are not available in ",(0,r.kt)("em",{parentName:"p"},"GRCh38")," because the source file does not contain this information:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAn")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAn")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter")))))}s.isMDXComponent=!0},88181:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/mitomap-small-variants-json",id:"data-sources/mitomap-small-variants-json",title:"mitomap-small-variants-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/mitomap-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/mitomap-small-variants-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "refAllele":"G",\n "altAllele":"A",\n "diseases":[ \n "Bipolar disorder",\n "Melanoma"\n ],\n "hasHomoplasmy":false,\n "hasHeteroplasmy":true,\n "status":"Reported",\n "clinicalSignificance":"confirmed pathogenic",\n "scorePercentile":83.30,\n "numGenBankFullLengthSeqs":2,\n "pubMedIds":["2316527","6299878","6301949"],\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"diseases"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"associated diseases")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHomoplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHeteroplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"status"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"record status")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"clinicalSignificance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"predicted pathogenicity")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MitoTIP score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numGenBankFullLengthSeqs"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"# of GenBank full-length sequences")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the MITOMAP alternate allele")))))}s.isMDXComponent=!0},58898:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/mitomap-structural-variants-json",id:"data-sources/mitomap-structural-variants-json",title:"mitomap-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/mitomap-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/mitomap-structural-variants-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "chromosome":"MT",\n "begin":3166,\n "end":14152,\n "variantType":"deletion",\n "reciprocalOverlap":0.18068,\n "annotationOverlap":0.42405\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"end"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")))))}s.isMDXComponent=!0},88010:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/omim-json",id:"data-sources/omim-json",title:"omim-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/omim-json.md",sourceDirName:"data-sources",slug:"/data-sources/omim-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/omim-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/omim-json.md",tags:[],version:"current",frontMatter:{}},p=[{value:"Phenotype",id:"phenotype",children:[],level:4},{value:"Mapping",id:"mapping",children:[],level:4},{value:"Inheritance",id:"inheritance",children:[],level:4},{value:"Comments",id:"comments",children:[],level:4}],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"omim":[ \n { \n "mimNumber":600678,\n "geneName":"MutS, E. coli, homolog of, 6",\n "description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",\n "phenotypes":[ \n { \n "mimNumber":614350,\n "phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",\n "description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal dominant"\n ]\n },\n { \n "mimNumber":608089,\n "phenotype":"Endometrial cancer, familial",\n "mapping":"molecular basis of the disorder is known"\n },\n { \n "mimNumber":276300,\n "phenotype":"Mismatch repair cancer syndrome",\n "description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal recessive"\n ],\n "comments" : [\n "contribute to susceptibility to multifactorial disorders or to susceptibility to infection",\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM ID for gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneName"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene name")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"left"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#phenotype"},"Phenotype entry below"))))),(0,r.kt)("h4",{id:"phenotype"},"Phenotype"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mapping"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#mapping"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"inheritance"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#inheritance"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"comments"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#comments"},"possible values below"))))),(0,r.kt)("h4",{id:"mapping"},"Mapping"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"disorder was positioned by mapping of the wild type gene"),(0,r.kt)("li",{parentName:"ol"},"disease phenotype itself was mapped"),(0,r.kt)("li",{parentName:"ol"},"molecular basis of the disorder is known"),(0,r.kt)("li",{parentName:"ol"},"disorder is a chromosome deletion or duplication syndrome")),(0,r.kt)("h4",{id:"inheritance"},"Inheritance"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"autosomal recessive"),(0,r.kt)("li",{parentName:"ul"},"autosomal dominant")),(0,r.kt)("h4",{id:"comments"},"Comments"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"contributes to the susceptibility to multifactorial disorders"),(0,r.kt)("li",{parentName:"ul"},"variations that lead to apparently abnormal laboratory test values"),(0,r.kt)("li",{parentName:"ul"},"unconfirmed mapping")))}s.isMDXComponent=!0},20737:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/primate-ai-json",id:"data-sources/primate-ai-json",title:"primate-ai-json",description:"GRCh38",source:"@site/docs/data-sources/primate-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/primate-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/primate-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/primate-ai-json.md",tags:[],version:"current",frontMatter:{}},p=[{value:"GRCh38",id:"grch38",children:[],level:4},{value:"GRCh37",id:"grch37",children:[],level:4}],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("h4",{id:"grch38"},"GRCh38"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI-3D": [\n {\n "aminoAcidPosition": 2,\n "refAminoAcid": "V",\n "altAminoAcid": "M",\n "score": 0.616944,\n "scorePercentile": 0.52,\n "ensemblTranscriptId": "ENST00000335137.4",\n "refSeqTranscriptId": "NM_001005484.1"\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"aminoAcidPosition"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Amino Acid Position (1-based)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAminoAcid"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Reference Amino Acid")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAminoAcid"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Alternate Amino Acid")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ensemblTranscriptId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Transcript ID (Ensembl)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refSeqTranscriptId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Transcript ID (RefSeq)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"score"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))),(0,r.kt)("h4",{id:"grch37"},"GRCh37"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI": [\n {\n "hgnc":"TP53",\n "scorePercentile":0.3,\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC Gene Symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))))}s.isMDXComponent=!0},60591:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/revel-json",id:"data-sources/revel-json",title:"revel-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/revel-json.md",sourceDirName:"data-sources",slug:"/data-sources/revel-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/revel-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/revel-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"revel":{ \n "score":0.027\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"score"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1.0")))))}s.isMDXComponent=!0},99838:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/splice-ai-json",id:"data-sources/splice-ai-json",title:"splice-ai-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/splice-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/splice-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/splice-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/splice-ai-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"spliceAI":[ \n {\n "hgnc":"BLCAP",\n "acceptorGainDistance":-3,\n "acceptorGainScore":0.3,\n "donorLossDistance":7,\n "donorLossScore":0.9\n },\n { \n "hgnc":"NNAT",\n "acceptorGainDistance":-1,\n "acceptorGainScore":0.2,\n "donorGainDistance":-2,\n "donorGainScore":0.3\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")))))}s.isMDXComponent=!0},49819:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/topmed-json",id:"data-sources/topmed-json",title:"topmed-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/topmed-json.md",sourceDirName:"data-sources",slug:"/data-sources/topmed-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/topmed-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/topmed-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"topmed":{ \n "allAc":20,\n "allAn":125568,\n "allAf":0.000159,\n "allHc":0,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele number. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele frequency (computed by Illumina Connected Annotations)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed homozygous count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}s.isMDXComponent=!0},50120:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>C,default:()=>x,frontMatter:()=>M,metadata:()=>S,toc:()=>R});var n=a(87462),r=(a(67294),a(3905)),l=a(99729),i=a(90212),o=a(44674),p=a(37356),d=a(80949),m=a(39156),s=a(20737),c=a(60591),u=a(40540),g=a(65538),k=a(99838),N=a(88181),f=a(58898),y=a(73827),h=a(74859),b=a(41888),v=a(92590),A=a(88010),j=a(49819),I=a(90818),D=a(48036),w=a(94072),T=a(11273);const M={title:"Illumina Connected Annotations JSON File Format"},C=void 0,S={unversionedId:"file-formats/illumina-annotator-json-file-format",id:"file-formats/illumina-annotator-json-file-format",title:"Illumina Connected Annotations JSON File Format",description:"Overview",source:"@site/docs/file-formats/illumina-annotator-json-file-format.mdx",sourceDirName:"file-formats",slug:"/file-formats/illumina-annotator-json-file-format",permalink:"/IlluminaConnectedAnnotationsDocumentation/file-formats/illumina-annotator-json-file-format",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/file-formats/illumina-annotator-json-file-format.mdx",tags:[],version:"current",frontMatter:{title:"Illumina Connected Annotations JSON File Format"},sidebar:"docs",previous:{title:"TOPMed",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/topmed"},next:{title:"Custom Annotations",permalink:"/IlluminaConnectedAnnotationsDocumentation/file-formats/custom-annotations"}},R=[{value:"Overview",id:"overview",children:[{value:"Conventions",id:"conventions",children:[],level:3},{value:"JSON Layout",id:"json-layout",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Header",id:"header",children:[{value:"Data Source",id:"data-source",children:[],level:4},{value:"Genome Assemblies",id:"genome-assemblies",children:[],level:4}],level:2},{value:"Positions",id:"positions",children:[{value:"ClinGen",id:"clingen",children:[],level:3},{value:"1000 Genomes (SV)",id:"1000-genomes-sv",children:[],level:3},{value:"gnomAD (SV)",id:"gnomad-sv",children:[],level:3},{value:"MITOMAP (SV)",id:"mitomap-sv",children:[],level:3}],level:2},{value:"Samples",id:"samples",children:[],level:2},{value:"Variants",id:"variants",children:[{value:"Transcripts",id:"transcripts",children:[{value:"PolyPhen",id:"polyphen",children:[],level:4},{value:"SIFT",id:"sift",children:[],level:4},{value:"Amino Acid Conservation",id:"amino-acid-conservation",children:[],level:4},{value:"Gene Fusions",id:"gene-fusions",children:[],level:4},{value:"Fusion",id:"fusion",children:[],level:4},{value:"Cancer Hotspots",id:"cancer-hotspots",children:[],level:4}],level:3},{value:"Regulatory Regions",id:"regulatory-regions",children:[{value:"Regulatory Types",id:"regulatory-types",children:[],level:4},{value:"Regulatory Consequences",id:"regulatory-consequences",children:[],level:4}],level:3},{value:"ClinVar",id:"clinvar",children:[],level:3},{value:"1000 Genomes",id:"1000-genomes",children:[],level:3},{value:"DANN",id:"dann",children:[],level:3},{value:"dbSNP",id:"dbsnp",children:[],level:3},{value:"DECIPHER",id:"decipher",children:[],level:3},{value:"GERP",id:"gerp",children:[],level:3},{value:"GME Variome",id:"gme-variome",children:[],level:3},{value:"gnomAD",id:"gnomad",children:[],level:3},{value:"MITOMAP",id:"mitomap",children:[],level:3},{value:"Primate AI",id:"primate-ai",children:[],level:3},{value:"REVEL",id:"revel",children:[],level:3},{value:"Splice AI",id:"splice-ai",children:[],level:3},{value:"TOPMed",id:"topmed",children:[],level:3}],level:2},{value:"Genes",id:"genes",children:[{value:"OMIM",id:"omim",children:[],level:3},{value:"gnomAD LoF Gene Metrics",id:"gnomad-lof-gene-metrics",children:[],level:3},{value:"ClinGen Disease Validity",id:"clingen-disease-validity",children:[],level:3},{value:"COSMIC Cancer Gene Census",id:"cosmic-cancer-gene-census",children:[],level:3}],level:2}],O={toc:R},F="wrapper";function x(t){let{components:e,...M}=t;return(0,r.kt)(F,(0,n.Z)({},O,M,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("h3",{id:"conventions"},"Conventions"),(0,r.kt)("p",null,"In the Illumina Connected Annotations JSON representation, we try to maximize the amount of useful information that is relayed in the output file. As such, we have several conventions that are useful to know about:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"With boolean key/value pairs, we only output the keys that have a true value. I.e. there's no reason to display ",(0,r.kt)("inlineCode",{parentName:"li"},'"isStructuralVariant":false')," a few million times when annotating a small variant VCF."),(0,r.kt)("li",{parentName:"ul"},"When transferring data from the VCF file to the JSON (e.g. for allele depths (AD)), it is common to use a period (.) as a placeholder for missing data in the VCF file. Illumina Connected Annotations treats periods like empty or null strings and therefore will not output those entries.")),(0,r.kt)("h3",{id:"json-layout"},"JSON Layout"),(0,r.kt)("p",null,(0,r.kt)("img",{src:a(66410).Z})),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"In general, each position corresponds to a row in the original VCF file."),(0,r.kt)("p",{parentName:"div"},"For each gene that was referenced in the transcripts found in the positions section, there will be additional gene-level annotation in the gene section."))),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"We've put together a ",(0,r.kt)("a",{parentName:"p",href:"../introduction/parsing-json"},"new section that discusses how to parse our JSON files")," easily using examples in a ",(0,r.kt)("a",{parentName:"p",href:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/blob/master/static/files/parse-json-python.ipynb"},"Python Jupyter notebook")," and a ",(0,r.kt)("a",{parentName:"p",href:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/blob/master/static/files/parse-json-r.ipynb"},"R version")," as well. In addition, we have information about how to quickly dump content from our JSON file using a tabix-like utility called JASIX."))),(0,r.kt)("h2",{id:"header"},"Header"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'{\n "header":{\n "annotator":"IlluminaConnectedAnnotations 3.0.0-alpha.5+g6c52e247",\n "creationTime":"2017-06-14 15:53:13",\n "genomeAssembly":"GRCh37",\n "dataSources":[\n {\n "name":"OMIM",\n "version":"unknown",\n "description":"An Online Catalog of Human Genes and Genetic Disorders",\n "releaseDate":"2017-05-03"\n },\n {\n "name":"VEP",\n "version":"84",\n "description":"BothRefSeqAndEnsembl",\n "releaseDate":"2017-01-16"\n },\n {\n "name":"ClinVar",\n "version":"20170503",\n "description":"A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence",\n "releaseDate":"2017-05-03"\n },\n {\n "name":"phyloP",\n "version":"hg19",\n "description":"46 way conservation score between humans and 45 other vertebrates",\n "releaseDate":"2009-11-10"\n }\n ],\n "samples":[\n "NA12878",\n "NA12891",\n "NA12892"\n ]\n },\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"annotator"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"the name of the annotator and the current version")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"creationTime"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd hh:mm:ss")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genomeAssembly"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#genome-assemblies"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"schemaVersion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"incremented whenever the core structure of the JSON file introduces breaking changes")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"dataVersion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"dataSources"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#data-source"},"Data Source entry below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"samples"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"the order of these sample names will be used throughout the JSON file when enumerating samples")))),(0,r.kt)("h4",{id:"data-source"},"Data Source"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"name"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"version"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"optional description of the data source")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"releaseDate"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")))),(0,r.kt)("h4",{id:"genome-assemblies"},"Genome Assemblies"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"GRCh37"),(0,r.kt)("li",{parentName:"ul"},"GRCh38"),(0,r.kt)("li",{parentName:"ul"},"hg19"),(0,r.kt)("li",{parentName:"ul"},"SARSCoV2")),(0,r.kt)("h2",{id:"positions"},"Positions"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"positions":[\n {\n "chromosome":"chr2",\n "position":48010488,\n "repeatUnit":"GGCCCC",\n "refRepeatCount":3,\n "svEnd":48020488,\n "refAllele":"G",\n "altAlleles":[\n "A",\n "GT"\n ],\n "quality":461,\n "filters":[\n "PASS"\n ],\n "ciPos":[\n -170,\n 170\n ],\n "ciEnd":[\n -175,\n 175\n ],\n "svLength":1000,\n "strandBias":1.23,\n "jointSomaticNormalQuality":29,\n "cytogeneticBand":"2p16.3",\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Variant Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"position"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf (1-based notation). Range: 1 - 250 million")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"repeatUnit"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"STR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by ExpansionHunter")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refRepeatCount"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"STR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by ExpansionHunter")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"svEnd"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"quality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf (Normally an integer, but some variant callers using floating point. Has been observed as high as 500k)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"filters"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ciPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ciEnd"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"svLength"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"strandBias"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"small variant"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by GATK (from SB)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"jointSomaticNormalQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by the Manta variant caller (SOMATICSCORE)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cytogeneticBand"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"e.g. 17p13.1")))),(0,r.kt)("h3",{id:"clingen"},"ClinGen"),(0,r.kt)(o.default,{mdxType:"ClinGen"}),(0,r.kt)(p.default,{mdxType:"ClinGenDosage"}),(0,r.kt)("h3",{id:"1000-genomes-sv"},"1000 Genomes (SV)"),(0,r.kt)(v.default,{mdxType:"ThousandGenomesSV"}),(0,r.kt)("h3",{id:"gnomad-sv"},"gnomAD (SV)"),(0,r.kt)(I.default,{mdxType:"GnomadSV"}),(0,r.kt)("h3",{id:"mitomap-sv"},"MITOMAP (SV)"),(0,r.kt)(f.default,{mdxType:"MitoMapSV"}),(0,r.kt)("h2",{id:"samples"},"Samples"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"samples":[\n {\n "genotype":"0/1",\n "variantFrequencies":[\n 0.333,\n 0.5\n ],\n "totalDepth":57,\n "genotypeQuality":12,\n "copyNumber":3,\n "repeatUnitCounts":[\n 10,\n 20\n ],\n "alleleDepths":[\n 10,\n 20,\n 30\n ],\n "failedFilter":true,\n "splitReadCounts":[\n 10,\n 20\n ],\n "pairedEndReadCounts":[\n 10,\n 20\n ],\n "isDeNovo":true,\n "diseaseAffectedStatuses":[\n "-"\n ],\n "artifactAdjustedQualityScore":89.3,\n "likelihoodRatioQualityScore":78.2,\n "heteroplasmyPercentile":[\n 23.13,\n 12.65\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"center"},"VCF"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genotype"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"GT"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantFrequencies"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"VF, AD"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. One value per alternate allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"totalDepth"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"DP"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genotypeQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"GQ"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values. Typically maxes out at 99")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"copyNumber"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"CN"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"minorHaplotypeCopyNumber"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"MCN"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"repeatUnitCounts"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"REPCN"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ExpansionHunter-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"alleleDepths"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AD"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"center"},"FT"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"splitReadCounts"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Manta-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pairedEndReadCounts"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"PR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Manta-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isDeNovo"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"center"},"DN"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"deNovoQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"DQ"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"diseaseAffectedStatuses"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"DST"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ExpansionHunter-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"artifactAdjustedQualityScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AQ"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PEPE-specific. Range: 0 - 100.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"likelihoodRatioQualityScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"LQ"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PEPE-specific. Range: 0 - 100.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"lossOfHeterozygosity"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"center"},"CN, MCN"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"somaticQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SQ"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"heteroplasmyPercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"VF"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 100. 2 decimal places. One value per alternate allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"binCount"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"BC"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Empty Samples")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"If a sample does not contain any entries, we will create a sample object that contains the ",(0,r.kt)("inlineCode",{parentName:"p"},"isEmpty")," key. This ensures that sample ordering is preserved while indicating that a sample is intentionally empty."),(0,r.kt)("pre",{parentName:"div"},(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"samples":[\n {\n "isEmpty":true\n }\n],\n')))),(0,r.kt)("h2",{id:"variants"},"Variants"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"variants":[\n {\n "vid":"2:48010488:A",\n "chromosome":"chr2",\n "begin":48010488,\n "end":48010488,\n "isReferenceMinorAllele":true,\n "isStructuralVariant":true,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "isDecomposedVariant":true,\n "isRecomposedVariant":true,\n "linkedVids":["2:48010488:GTA:ATC"],\n "hgvsg":"NC_000002.11:g.48010488G>A",\n "phylopScore":0.459\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"vid"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"../core-functionality/variant-ids"},"Variant Identifiers"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"1-based non-negative integer values. Range: 1 - 250 million")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"end"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"1-based non-negative integer values. Range: 1 - 250 million")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isReferenceMinorAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this is a reference minor allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isStructuralVariant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant is a structural variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"inLowComplexityRegion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant lies in a low complexity region (gnomAD low complexity regions)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"parsimonious representation of the reference allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"parsimonious representation of the alternate allele.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"uses\xa0",(0,r.kt)("a",{parentName:"td",href:"http://www.sequenceontology.org/browser/current_svn/term/SO:0001059"},"Sequence Ontology sequence alterations"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isDecomposedVariant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the decomposed variant has been used to create another recomposed variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isRecomposedVariant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant is recomposed from two or more decomposed variants")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"linkedVids"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"list of ",(0,r.kt)("a",{parentName:"td",href:"../core-functionality/variant-ids"},"VIDs")," for variants connecting decomposed and recomposed variants")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsg"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS g. notation")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phylopScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"phyloP conservation score. Range: -14.08 to 6.424")))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Reference Minor Alleles")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Illumina Connected Annotations supports annotating reference minor alleles. In such a case, ",(0,r.kt)("inlineCode",{parentName:"p"},"refAllele")," will be replaced by the global major allele and ",(0,r.kt)("inlineCode",{parentName:"p"},"altAllele")," will be replaced with the original reference allele."))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Flagging Decomposed & Recomposed Variants")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"When two or more decomposed variants are recomposed into an MNV, the decomposed variants will be marked with ",(0,r.kt)("inlineCode",{parentName:"p"},'"isDecomposedVariant":true'),"."),(0,r.kt)("p",{parentName:"div"},"Similarly, the recomposed variant will be shown as a new VCF position. This recomposed variant will be flagged with ",(0,r.kt)("inlineCode",{parentName:"p"},'"isRecomposedVariant":true'),"."))),(0,r.kt)("h3",{id:"transcripts"},"Transcripts"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"transcripts":[\n {\n "transcript":"ENST00000445503.1",\n "source":"Ensembl",\n "bioType":"nonsense_mediated_decay",\n "codons":"gGg/gAg",\n "aminoAcids":"G/E",\n "cdnaPos":"268",\n "cdsPos":"116",\n "exons":"1/9",\n "introns":"1/8",\n "proteinPos":"39",\n "geneId":"ENSG00000116062",\n "hgnc":"MSH6",\n "consequence":[\n "missense_variant",\n "NMD_transcript_variant"\n ],\n "hgvsc":"ENST00000445503.1:c.116G>A",\n "hgvsp":"ENSP00000405294.1:p.(Gly39Glu)",\n "geneFusion":{\n "exon":6,\n "intron":5,\n "fusions":[\n {\n "hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000437180.1}:c.58+568_1443",\n "exon":3,\n "intron":2\n },\n {\n "hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000300305.3}:c.58+568_1443",\n "exon":2,\n "intron":1\n }\n ]\n },\n "isCanonical":true,\n "polyPhenScore":0.95,\n "polyPhenPrediction":"probably damaging",\n "proteinId":"ENSP00000405294.1",\n "siftScore":0.61,\n "siftPrediction":"tolerated",\n "completeOverlap":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"transcript"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"transcript ID. e.g. ENST00000445503.1")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"source"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"RefSeq / Ensembl")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"bioType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"descriptions of the ",(0,r.kt)("a",{parentName:"td",href:"https://uswest.ensembl.org/info/genome/genebuild/biotypes.html"},"biotypes from Ensembl"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"codons"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"aminoAcids"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cdnaPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cdsPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"exons"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exons affected by the variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"introns"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"introns affected by the variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"proteinPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene ID. e.g. ENSG00000116062")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"consequence"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://www.sequenceontology.org/browser/obob.cgi"},"Sequence Ontology Consequences"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS coding nomenclature")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsp"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS protein nomenclature")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneFusion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#gene-fusions"},"Gene Fusions entry below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isCanonical"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this is a canonical transcript")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isManeSelect"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this is a MANE select transcript")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"polyPhenScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"polyPhenPrediction"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#polyphen"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"proteinId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"protein ID. E.g. ENSP00000405294.1")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"siftScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"siftPrediction"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#sift"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"completeOverlap"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this transcript is completely overlapped by the variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cancerHotspots"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#cancer-hotspots"},"Cancer Hotspots entry below"))))),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"MANE Select")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"MANE select tags are only available for RefSeq transcripts on GRCh38."))),(0,r.kt)("h4",{id:"polyphen"},"PolyPhen"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"probably damaging"),(0,r.kt)("li",{parentName:"ul"},"possibly damaging"),(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"unknown")),(0,r.kt)("h4",{id:"sift"},"SIFT"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"tolerated"),(0,r.kt)("li",{parentName:"ul"},"deleterious"),(0,r.kt)("li",{parentName:"ul"},"tolerated - low confidence"),(0,r.kt)("li",{parentName:"ul"},"deleterious - low confidence")),(0,r.kt)("h4",{id:"amino-acid-conservation"},"Amino Acid Conservation"),(0,r.kt)(l.default,{mdxType:"AminoAcidConservation"}),(0,r.kt)("h4",{id:"gene-fusions"},"Gene Fusions"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"exon"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual exon where the breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"intron"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual intron where the breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"fusions"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#fusion"},"Fusion entry below"))))),(0,r.kt)("h4",{id:"fusion"},"Fusion"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"exon"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual exon where the other breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"intron"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual intron where the other breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS coding nomenclature describing the two genes and the transcripts that are fused along with")))),(0,r.kt)("h4",{id:"cancer-hotspots"},"Cancer Hotspots"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"residue"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"how many samples are associated with a variant at the same amino acid position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numAltAminoAcidSamples"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"how many samples are associated with a variant with the same position and alternate amino acid position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"qValue"),(0,r.kt)("td",{parentName:"tr",align:"center"},"double"),(0,r.kt)("td",{parentName:"tr",align:"left"})))),(0,r.kt)("h3",{id:"regulatory-regions"},"Regulatory Regions"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"regulatoryRegions":[\n {\n "id":"ENSR00001542175",\n "type":"promoter",\n "consequence":[\n "regulatory_region_variant"\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"type"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see ",(0,r.kt)("a",{parentName:"td",href:"#regulatory-types"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"consequence"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"see ",(0,r.kt)("a",{parentName:"td",href:"#regulatory-consequences"},"possible values below"))))),(0,r.kt)("h4",{id:"regulatory-types"},"Regulatory Types"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"CTCF_binding_site"),(0,r.kt)("li",{parentName:"ul"},"enhancer"),(0,r.kt)("li",{parentName:"ul"},"open_chromatin_region"),(0,r.kt)("li",{parentName:"ul"},"promoter"),(0,r.kt)("li",{parentName:"ul"},"promoter_flanking_region"),(0,r.kt)("li",{parentName:"ul"},"TF_binding_site")),(0,r.kt)("h4",{id:"regulatory-consequences"},"Regulatory Consequences"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"regulatory_region_variant"),(0,r.kt)("li",{parentName:"ul"},"regulatory_region_ablation"),(0,r.kt)("li",{parentName:"ul"},"regulatory_region_amplification"),(0,r.kt)("li",{parentName:"ul"},"regulatory_region_truncation")),(0,r.kt)("h3",{id:"clinvar"},"ClinVar"),(0,r.kt)(i.default,{mdxType:"ClinVar"}),(0,r.kt)("h3",{id:"1000-genomes"},"1000 Genomes"),(0,r.kt)(b.default,{mdxType:"ThousandGenomesSmall"}),(0,r.kt)("h3",{id:"dann"},"DANN"),(0,r.kt)(u.default,{mdxType:"DANN"}),(0,r.kt)("h3",{id:"dbsnp"},"dbSNP"),(0,r.kt)(m.default,{mdxType:"DbSNP"}),(0,r.kt)("h3",{id:"decipher"},"DECIPHER"),(0,r.kt)(w.default,{mdxType:"DECIPHER"}),(0,r.kt)("h3",{id:"gerp"},"GERP"),(0,r.kt)(g.default,{mdxType:"GERP"}),(0,r.kt)("h3",{id:"gme-variome"},"GME Variome"),(0,r.kt)(D.default,{mdxType:"GME"}),(0,r.kt)("h3",{id:"gnomad"},"gnomAD"),(0,r.kt)(y.default,{mdxType:"GnomadSmall"}),(0,r.kt)("h3",{id:"mitomap"},"MITOMAP"),(0,r.kt)(N.default,{mdxType:"MitoMapSmall"}),(0,r.kt)("h3",{id:"primate-ai"},"Primate AI"),(0,r.kt)(s.default,{mdxType:"PrimateAI"}),(0,r.kt)("h3",{id:"revel"},"REVEL"),(0,r.kt)(c.default,{mdxType:"REVEL"}),(0,r.kt)("h3",{id:"splice-ai"},"Splice AI"),(0,r.kt)(k.default,{mdxType:"SpliceAI"}),(0,r.kt)("h3",{id:"topmed"},"TOPMed"),(0,r.kt)(j.default,{mdxType:"TOPMed"}),(0,r.kt)("h2",{id:"genes"},"Genes"),(0,r.kt)("p",null,"Illumina Connected Annotations repots gene annotations for all genes that have an overlapping variant with the exception of flanking variants (i.e. variants that only cause upstream_gene_variant or downstream_gene_variant)."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"genes":[\n {\n "name":"MSH6",\n "hgncId":7329,\n "summary":"This gene encodes a member of the DNA mismatch repair MutS family. In E. coli, the MutS protein helps in the recognition of mismatched nucleotides prior to their repair. A highly conserved region of approximately 150 aa, called the Walker-A adenine nucleotide binding motif, exists in MutS homologs. The encoded protein heterodimerizes with MSH2 to form a mismatch recognition complex that functions as a bidirectional molecular switch that exchanges ADP and ATP as DNA mismatches are bound and dissociated. Mutations in this gene may be associated with hereditary nonpolyposis colon cancer, colorectal cancer, and endometrial cancer. Transcripts variants encoding different isoforms have been described. [provided by RefSeq, Jul 2013]",\n /* this is where gene-level data sources can be found e.g. OMIM */\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"name"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgncId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"summary"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"short description of the gene from ",(0,r.kt)("a",{parentName:"td",href:"https://www.omim.org/"},"OMIM"))))),(0,r.kt)("h3",{id:"omim"},"OMIM"),(0,r.kt)(A.default,{mdxType:"Omim"}),(0,r.kt)("h3",{id:"gnomad-lof-gene-metrics"},"gnomAD LoF Gene Metrics"),(0,r.kt)(h.default,{mdxType:"GnomadGeneLof"}),(0,r.kt)("h3",{id:"clingen-disease-validity"},"ClinGen Disease Validity"),(0,r.kt)(d.default,{mdxType:"ClinGenDiseaseValidity"}),(0,r.kt)("h3",{id:"cosmic-cancer-gene-census"},"COSMIC Cancer Gene Census"),(0,r.kt)(T.default,{mdxType:"COSMICCGC"}))}x.isMDXComponent=!0},66410:(t,e,a)=>{a.d(e,{Z:()=>n});const n=a.p+"assets/images/JSON-Layout-fc8e5c0cf4c8428981cd206fe9b6feac.svg"}}]); \ No newline at end of file diff --git a/assets/js/b2e466e8.56f0aa3f.js b/assets/js/b2e466e8.56f0aa3f.js new file mode 100644 index 00000000..4c688ce1 --- /dev/null +++ b/assets/js/b2e466e8.56f0aa3f.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8577,5160,1912,4899,7942,2508,4648,8462,5702,6192,216,6635,3232,12,9082,4105,6602,1633,2630,829,7870,4246,3805,8633],{3905:(t,e,a)=>{a.d(e,{Zo:()=>m,kt:()=>g});var n=a(7294);function r(t,e,a){return e in t?Object.defineProperty(t,e,{value:a,enumerable:!0,configurable:!0,writable:!0}):t[e]=a,t}function l(t,e){var a=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),a.push.apply(a,n)}return a}function i(t){for(var e=1;e=0||(r[a]=t[a]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,a)&&(r[a]=t[a])}return r}var p=n.createContext({}),d=function(t){var e=n.useContext(p),a=e;return t&&(a="function"==typeof t?t(e):i(i({},e),t)),a},m=function(t){var e=d(t.components);return n.createElement(p.Provider,{value:e},t.children)},s="mdxType",c={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},u=n.forwardRef((function(t,e){var a=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,m=o(t,["components","mdxType","originalType","parentName"]),s=d(a),u=r,g=s["".concat(p,".").concat(u)]||s[u]||c[u]||l;return a?n.createElement(g,i(i({ref:e},m),{},{components:a})):n.createElement(g,i({ref:e},m))}));function g(t,e){var a=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=a.length,i=new Array(l);i[0]=u;var o={};for(var p in e)hasOwnProperty.call(e,p)&&(o[p]=e[p]);o.originalType=t,o[s]="string"==typeof t?t:r,i[1]=o;for(var d=2;d{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/1000Genomes-snv-json",id:"data-sources/1000Genomes-snv-json",title:"1000Genomes-snv-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/1000Genomes-snv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-snv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes-snv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/1000Genomes-snv-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":{\n "allAf":0.200879,\n "afrAf":0.210287,\n "amrAf":0.139769,\n "easAf":0.275794,\n "eurAf":0.181909,\n "sasAf":0.173824,\n "allAn":5008,\n "afrAn":1322,\n "amrAn":694,\n "easAn":1008,\n "eurAn":1006,\n "sasAn":978,\n "allAc":1006,\n "afrAc":278,\n "amrAc":97,\n "easAc":278,\n "eurAc":183,\n "sasAc":170\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"allAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the African super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the African super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the Ad Mixed American super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the Ad Mixed American super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the East Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"easAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the East Asian super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the European super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the European super population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele count for the South Asian super population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"allele number for the South Asian super population. Non-zero integer.")))))}s.isMDXComponent=!0},2590:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/1000Genomes-sv-json",id:"data-sources/1000Genomes-sv-json",title:"1000Genomes-sv-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/1000Genomes-sv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-sv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes-sv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/1000Genomes-sv-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":[\n {\n "chromosome":"1",\n "begin":1595369,\n "end":1612441,\n "variantType": "copy_number_variation",\n "id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",\n "allAn": 5008,\n "allAc": 2702,\n "allAf": 0.539537,\n "afrAf": 0.6052,\n "amrAf": 0.3675,\n "eurAf": 0.5357,\n "easAf": 0.5368,\n "sasAf": 0.5797,\n "reciprocalOverlap": 0.07555\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"range: 0 - 1.")))))}s.isMDXComponent=!0},9729:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/amino-acid-conservation-json",id:"data-sources/amino-acid-conservation-json",title:"amino-acid-conservation-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/amino-acid-conservation-json.md",sourceDirName:"data-sources",slug:"/data-sources/amino-acid-conservation-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/amino-acid-conservation-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/amino-acid-conservation-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"aminoAcidConservation": {\n "scores": [0.34]\n} \n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"aminoAcidConservation"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scores"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array of doubles"),(0,r.kt)("td",{parentName:"tr",align:"left"},"percent conserved with respect to human amino acid residue. Range: 0.01 - 1.00")))))}s.isMDXComponent=!0},7356:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clingen-dosage-json",id:"data-sources/clingen-dosage-json",title:"clingen-dosage-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/clingen-dosage-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-dosage-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-dosage-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clingen-dosage-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingenDosageSensitivityMap": [{\n "chromosome": "15",\n "begin": 30900686,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 0.33994\n},\n{\n "chromosome": "15",\n "begin": 31727418,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "dosage sensitivity unlikely",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 1\n}]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingenDosageSensitivityMap"),(0,r.kt)("td",{parentName:"tr",align:null},"object array"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"haploinsufficiency"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"triplosensitivity"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"(same as haploinsufficiency)\xa0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"haploinsufficiency and triplosensitivity")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no evidence to suggest that dosage sensitivity is associated with clinical phenotype"),(0,r.kt)("li",{parentName:"ul"},"little evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,r.kt)("li",{parentName:"ul"},"emerging evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,r.kt)("li",{parentName:"ul"},"sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,r.kt)("li",{parentName:"ul"},"gene associated with autosomal recessive phenotype"),(0,r.kt)("li",{parentName:"ul"},"dosage sensitivity unlikely")))}s.isMDXComponent=!0},949:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clingen-gene-validity-json",id:"data-sources/clingen-gene-validity-json",title:"clingen-gene-validity-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/clingen-gene-validity-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-gene-validity-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-gene-validity-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clingen-gene-validity-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingenGeneValidity":[\n {\n "diseaseId":"MONDO_0007893",\n "disease":"Noonan syndrome with multiple lentigines",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n },\n {\n "diseaseId":"MONDO_0015280",\n "disease":"cardiofaciocutaneous syndrome",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingenGeneValidity"),(0,r.kt)("td",{parentName:"tr",align:null},"object"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"diseaseId"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Monarch Disease Ontology ID (MONDO)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"disease"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"disease label")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"classification"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see below for possible values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"classificationDate"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"yyyy-MM-dd")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"classification")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no reported evidence"),(0,r.kt)("li",{parentName:"ul"},"disputed"),(0,r.kt)("li",{parentName:"ul"},"limited"),(0,r.kt)("li",{parentName:"ul"},"moderate"),(0,r.kt)("li",{parentName:"ul"},"definitive"),(0,r.kt)("li",{parentName:"ul"},"strong"),(0,r.kt)("li",{parentName:"ul"},"refuted"),(0,r.kt)("li",{parentName:"ul"},"no known disease relationship")))}s.isMDXComponent=!0},4674:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clingen-json",id:"data-sources/clingen-json",title:"clingen-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/clingen-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clingen-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingen":[\n {\n "chromosome":"17",\n "begin":525,\n "end":14667519,\n "variantType":"copy_number_gain",\n "id":"nsv996083",\n "clinicalInterpretation":"pathogenic",\n "observedGains":1,\n "validated":true,\n "phenotypes":[\n "Intrauterine growth retardation"\n ],\n "phenotypeIds":[\n "HP:0001511",\n "MedGen:C1853481"\n ],\n "reciprocalOverlap":0.00131\n },\n {\n "chromosome":"17",\n "begin":45835,\n "end":7600330,\n "variantType":"copy_number_loss",\n "id":"nsv869419",\n "clinicalInterpretation":"pathogenic",\n "observedLosses":1,\n "validated":true,\n "phenotypes":[\n "Developmental delay AND/OR other significant developmental or morphological phenotypes"\n ],\n "reciprocalOverlap":0.00254\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingen"),(0,r.kt)("td",{parentName:"tr",align:null},"object array"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Any of the\xa0sequence alterations defined here.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Identifier from the data source. Alternatively a VID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clinicalInterpretation"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"observedGains"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,r.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"observedLosses"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,r.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"validated"),(0,r.kt)("td",{parentName:"tr",align:null},"boolean"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:null},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"Description of the phenotype.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"phenotypeIds"),(0,r.kt)("td",{parentName:"tr",align:null},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"Description of the phenotype IDs.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"clinicalInterpretation")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"curated benign"),(0,r.kt)("li",{parentName:"ul"},"curated pathogenic"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"path gain"),(0,r.kt)("li",{parentName:"ul"},"path loss"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"uncertain")))}s.isMDXComponent=!0},212:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/clinvar-json",id:"data-sources/clinvar-json",title:"clinvar-json",description:"small variants:",source:"@site/docs/data-sources/clinvar-json.md",sourceDirName:"data-sources",slug:"/data-sources/clinvar-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clinvar-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clinvar-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"small variants:")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "id":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "significance":[\n "benign"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "lastUpdatedDate":"2020-03-01",\n "isAlleleSpecific":true\n },\n {\n "id":"RCV000030258.4",\n "variationId":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "alleleOrigins":[\n "germline"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "phenotypes":[\n "Lynch syndrome"\n ],\n "medGenIds":[\n "C1333990"\n ],\n "omimIds":[\n "120435"\n ],\n "significance":[\n "benign"\n ],\n "lastUpdatedDate":"2017-05-01",\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"large variants:")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "chromosome":"1", \n "begin":629025, \n "end":8537745, \n "variantType":"copy_number_loss", \n "id":"RCV000051993.4", \n "variationId":"VCV000058242.1", \n "reviewStatus":"criteria provided, single submitter", \n "alleleOrigins":[\n "not provided"\n ], \n "phenotypes":[\n "See cases"\n ], \n "significance":[\n "pathogenic"\n ], \n "lastUpdatedDate":"2022-04-21", \n "pubMedIds":[\n "21844811"\n ]\n },\n {\n "id":"VCV000058242.1",\n "reviewStatus":"criteria provided, single submitter",\n "significance":[\n "pathogenic"\n ],\n "lastUpdatedDate":"2022-04-21"\n },\n ......\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variationId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar VCV ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"variant type")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"reviewStatus"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"alleleOrigins"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"medGenIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MedGen IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"omimIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"orphanetIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Orphanet IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"significance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"lastUpdatedDate"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the ClinVar alternate allele")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"reviewStatus:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no assertion provided"),(0,r.kt)("li",{parentName:"ul"},"no assertion criteria provided"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, single submitter"),(0,r.kt)("li",{parentName:"ul"},"practice guideline"),(0,r.kt)("li",{parentName:"ul"},"classified by multiple submitters"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, conflicting interpretations"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, multiple submitters, no conflicts"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"alleleOrigins:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"unknown"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"germline"),(0,r.kt)("li",{parentName:"ul"},"somatic"),(0,r.kt)("li",{parentName:"ul"},"inherited"),(0,r.kt)("li",{parentName:"ul"},"paternal"),(0,r.kt)("li",{parentName:"ul"},"maternal"),(0,r.kt)("li",{parentName:"ul"},"de-novo"),(0,r.kt)("li",{parentName:"ul"},"biparental"),(0,r.kt)("li",{parentName:"ul"},"uniparental"),(0,r.kt)("li",{parentName:"ul"},"not-tested"),(0,r.kt)("li",{parentName:"ul"},"tested-inconclusive")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"significance:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"uncertain significance"),(0,r.kt)("li",{parentName:"ul"},"not provided"),(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"drug response"),(0,r.kt)("li",{parentName:"ul"},"histocompatibility"),(0,r.kt)("li",{parentName:"ul"},"association"),(0,r.kt)("li",{parentName:"ul"},"risk factor"),(0,r.kt)("li",{parentName:"ul"},"protective"),(0,r.kt)("li",{parentName:"ul"},"affects"),(0,r.kt)("li",{parentName:"ul"},"conflicting data from submitters"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant"),(0,r.kt)("li",{parentName:"ul"},"conflicting interpretations of pathogenicity")))}s.isMDXComponent=!0},1273:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/cosmic-cancer-gene-census",id:"data-sources/cosmic-cancer-gene-census",title:"cosmic-cancer-gene-census",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/cosmic-cancer-gene-census.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-cancer-gene-census",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-cancer-gene-census",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/cosmic-cancer-gene-census.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},' {\n "name": "PRDM16",\n "hgncId": 14000,\n "ncbiGeneId": "63976",\n "ensemblGeneId": "ENSG00000142611",\n "cosmic": {\n "roleInCancer": [\n "oncogene",\n "fusion"\n ]\n }\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"roleInCancer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Possible roles in caner")))))}s.isMDXComponent=!0},540:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/dann-json",id:"data-sources/dann-json",title:"dann-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/dann-json.md",sourceDirName:"data-sources",slug:"/data-sources/dann-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dann-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/dann-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"dannScore": 0.27\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"dannScore"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1.0")))))}s.isMDXComponent=!0},9156:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/dbsnp-json",id:"data-sources/dbsnp-json",title:"dbsnp-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/dbsnp-json.md",sourceDirName:"data-sources",slug:"/data-sources/dbsnp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dbsnp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/dbsnp-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"dbsnp":[\n "rs1042821"\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"dbsnp"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"dbSNP rsIDs")))))}s.isMDXComponent=!0},4072:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/decipher-json",id:"data-sources/decipher-json",title:"decipher-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/decipher-json.md",sourceDirName:"data-sources",slug:"/data-sources/decipher-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/decipher-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/decipher-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"decipher":[\n {\n "chromosome":"1",\n "begin":13516,\n "end":91073,\n "numDeletions":27,\n "deletionFrequency":0.675,\n "numDuplications":27,\n "duplicationFrequency":0.675,\n "sampleSize":40,\n "reciprocalOverlap": 0.27555,\n "annotationOverlap": 0.5901\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"numDeletions"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"# of observed deletions")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"deletionFrequency"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"deletion frequency")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"numDuplications"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"# of observed duplications")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"duplicationFrequency"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"duplication frequency")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sampleSize"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"total # of samples")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% annotation overlap")))))}s.isMDXComponent=!0},5538:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gerp-json",id:"data-sources/gerp-json",title:"gerp-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gerp-json.md",sourceDirName:"data-sources",slug:"/data-sources/gerp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gerp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gerp-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gerpScore": 1.27\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"gerpScore"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: -\u221e to +\u221e")))))}s.isMDXComponent=!0},8036:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gme-json",id:"data-sources/gme-json",title:"gme-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gme-json.md",sourceDirName:"data-sources",slug:"/data-sources/gme-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gme-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gme-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gmeVariome":{\n "allAc":10,\n "allAn":202,\n "allAf":0.049504,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"GME allele count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"GME allele number")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"GME allele frequency")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}s.isMDXComponent=!0},4859:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gnomad-lof-json",id:"data-sources/gnomad-lof-json",title:"gnomad-lof-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gnomad-lof-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-lof-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-lof-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gnomad-lof-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD":{ \n "pLi":1.00e0,\n "pNull":8.94e-40,\n "pRec":1.84e-16,\n "synZ":-8.44e-2,\n "misZ":5.96e-1,\n "loeuf":1.13e0\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"pLi"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"pNull"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"pRec"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"synZ"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"misZ"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))))}s.isMDXComponent=!0},3827:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gnomad-small-variants-json",id:"data-sources/gnomad-small-variants-json",title:"gnomad-small-variants-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gnomad-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gnomad-small-variants-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomad":{ \n "coverage":20,\n "allAf":0.190317,\n "maleAf":0.193,\n "femaleAf": 0.1935,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "maleAn":15096,\n "femaleAn":15700\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "maleAc":2930,\n "femaleAc": 2931,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "maleHc":280,\n "femaleHc":281,\n "controlsAllAf":0.190317,\n "controlsAllAn":30796,\n "controlsAllAc":5861,\n "lowComplexityRegion":true,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"coverage"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"controlsAllAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the controls subset. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for male population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for female population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"controlsAllAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the controls subset. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for male population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for female population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"controlsAllAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the controls subset. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"finHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the South Asian population Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the South Asian population. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the South Asian population. Non-negative integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"lowComplexityRegion"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant is located in a low complexity region.")))))}s.isMDXComponent=!0},818:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/gnomad-structural-variants-json",id:"data-sources/gnomad-structural-variants-json",title:"gnomad-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gnomad-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gnomad-structural-variants-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD-preview": [\n {\n "chromosome": "1",\n "begin": 40001,\n "end": 47200,\n "variantId": "gnomAD-SV_v2.1_DUP_1_1",\n "variantType": "duplication",\n "failedFilter": true,\n "allAf": 0.068963,\n "afrAf": 0.135694,\n "amrAf": 0.022876,\n "easAf": 0.01101,\n "eurAf": 0.007846,\n "othAf": 0.017544,\n "femaleAf": 0.065288,\n "maleAf": 0.07255,\n "allAc": 943,\n "afrAc": 866,\n "amrAc": 21,\n "easAc": 17,\n "eurAc": 37,\n "othAc": 2,\n "femaleAc": 442,\n "maleAc": 499,\n "allAn": 13674,\n "afrAn": 6382,\n "amrAn": 918,\n "easAn": 1544,\n "eurAn": 4716,\n "othAn": 114,\n "femaleAn": 6770,\n "maleAn": 6878,\n "allHc": 91,\n "afrHc": 90,\n "amrHc": 1,\n "easHc": 0,\n "eurHc": 0,\n "othHc": 55,\n "femaleHc": 44,\n "maleHc": 47,\n "reciprocalOverlap": 0.01839,\n "annotationOverlap": 0.16667\n }\n]\n\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"chromosome number")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"position interval start")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"position internal end")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"structural variant type")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantId"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"gnomAD ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all other populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the African super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Ad Mixed American super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the European super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all other populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for male population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for female population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the African super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Ad Mixed American super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the European super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all other populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for female population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for male population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the African / African American population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Latino population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the East Asian population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the European super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all other populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"boolean"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Reciprocal overlap. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Reciprocal overlap. Range: 0 - 1.0")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Note:")," Following fields are not available in ",(0,r.kt)("em",{parentName:"p"},"GRCh38")," because the source file does not contain this information:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAn")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAn")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter")))))}s.isMDXComponent=!0},8181:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/mitomap-small-variants-json",id:"data-sources/mitomap-small-variants-json",title:"mitomap-small-variants-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/mitomap-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/mitomap-small-variants-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "refAllele":"G",\n "altAllele":"A",\n "diseases":[ \n "Bipolar disorder",\n "Melanoma"\n ],\n "hasHomoplasmy":false,\n "hasHeteroplasmy":true,\n "status":"Reported",\n "clinicalSignificance":"confirmed pathogenic",\n "scorePercentile":83.30,\n "numGenBankFullLengthSeqs":2,\n "pubMedIds":["2316527","6299878","6301949"],\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"diseases"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"associated diseases")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHomoplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hasHeteroplasmy"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"status"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"record status")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"clinicalSignificance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"predicted pathogenicity")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MitoTIP score")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numGenBankFullLengthSeqs"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"# of GenBank full-length sequences")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the MITOMAP alternate allele")))))}s.isMDXComponent=!0},8898:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/mitomap-structural-variants-json",id:"data-sources/mitomap-structural-variants-json",title:"mitomap-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/mitomap-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/mitomap-structural-variants-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "chromosome":"MT",\n "begin":3166,\n "end":14152,\n "variantType":"deletion",\n "reciprocalOverlap":0.18068,\n "annotationOverlap":0.42405\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"end"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")))))}s.isMDXComponent=!0},8010:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/omim-json",id:"data-sources/omim-json",title:"omim-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/omim-json.md",sourceDirName:"data-sources",slug:"/data-sources/omim-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/omim-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/omim-json.md",tags:[],version:"current",frontMatter:{}},p=[{value:"Phenotype",id:"phenotype",children:[],level:4},{value:"Mapping",id:"mapping",children:[],level:4},{value:"Inheritance",id:"inheritance",children:[],level:4},{value:"Comments",id:"comments",children:[],level:4}],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"omim":[ \n { \n "mimNumber":600678,\n "geneName":"MutS, E. coli, homolog of, 6",\n "description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",\n "phenotypes":[ \n { \n "mimNumber":614350,\n "phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",\n "description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal dominant"\n ]\n },\n { \n "mimNumber":608089,\n "phenotype":"Endometrial cancer, familial",\n "mapping":"molecular basis of the disorder is known"\n },\n { \n "mimNumber":276300,\n "phenotype":"Mismatch repair cancer syndrome",\n "description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal recessive"\n ],\n "comments" : [\n "contribute to susceptibility to multifactorial disorders or to susceptibility to infection",\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM ID for gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneName"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene name")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"left"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#phenotype"},"Phenotype entry below"))))),(0,r.kt)("h4",{id:"phenotype"},"Phenotype"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mapping"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#mapping"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"inheritance"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#inheritance"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"comments"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#comments"},"possible values below"))))),(0,r.kt)("h4",{id:"mapping"},"Mapping"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"disorder was positioned by mapping of the wild type gene"),(0,r.kt)("li",{parentName:"ol"},"disease phenotype itself was mapped"),(0,r.kt)("li",{parentName:"ol"},"molecular basis of the disorder is known"),(0,r.kt)("li",{parentName:"ol"},"disorder is a chromosome deletion or duplication syndrome")),(0,r.kt)("h4",{id:"inheritance"},"Inheritance"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"autosomal recessive"),(0,r.kt)("li",{parentName:"ul"},"autosomal dominant")),(0,r.kt)("h4",{id:"comments"},"Comments"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"contributes to the susceptibility to multifactorial disorders"),(0,r.kt)("li",{parentName:"ul"},"variations that lead to apparently abnormal laboratory test values"),(0,r.kt)("li",{parentName:"ul"},"unconfirmed mapping")))}s.isMDXComponent=!0},737:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/primate-ai-json",id:"data-sources/primate-ai-json",title:"primate-ai-json",description:"GRCh38",source:"@site/docs/data-sources/primate-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/primate-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/primate-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/primate-ai-json.md",tags:[],version:"current",frontMatter:{}},p=[{value:"GRCh38",id:"grch38",children:[],level:4},{value:"GRCh37",id:"grch37",children:[],level:4}],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("h4",{id:"grch38"},"GRCh38"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI-3D": [\n {\n "aminoAcidPosition": 2,\n "refAminoAcid": "V",\n "altAminoAcid": "M",\n "score": 0.616944,\n "scorePercentile": 0.52,\n "ensemblTranscriptId": "ENST00000335137.4",\n "refSeqTranscriptId": "NM_001005484.1"\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"aminoAcidPosition"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Amino Acid Position (1-based)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAminoAcid"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Reference Amino Acid")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAminoAcid"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Alternate Amino Acid")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ensemblTranscriptId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Transcript ID (Ensembl)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refSeqTranscriptId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Transcript ID (RefSeq)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"score"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))),(0,r.kt)("h4",{id:"grch37"},"GRCh37"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI": [\n {\n "hgnc":"TP53",\n "scorePercentile":0.3,\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC Gene Symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))))}s.isMDXComponent=!0},591:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/revel-json",id:"data-sources/revel-json",title:"revel-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/revel-json.md",sourceDirName:"data-sources",slug:"/data-sources/revel-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/revel-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/revel-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"revel":{ \n "score":0.027\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"score"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1.0")))))}s.isMDXComponent=!0},9838:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/splice-ai-json",id:"data-sources/splice-ai-json",title:"splice-ai-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/splice-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/splice-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/splice-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/splice-ai-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"spliceAI":[ \n {\n "hgnc":"BLCAP",\n "acceptorGainDistance":-3,\n "acceptorGainScore":0.3,\n "donorLossDistance":7,\n "donorLossScore":0.9\n },\n { \n "hgnc":"NNAT",\n "acceptorGainDistance":-1,\n "acceptorGainScore":0.2,\n "donorGainDistance":-2,\n "donorGainScore":0.3\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")))))}s.isMDXComponent=!0},9819:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/topmed-json",id:"data-sources/topmed-json",title:"topmed-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/topmed-json.md",sourceDirName:"data-sources",slug:"/data-sources/topmed-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/topmed-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/topmed-json.md",tags:[],version:"current",frontMatter:{}},p=[],d={toc:p},m="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"topmed":{ \n "allAc":20,\n "allAn":125568,\n "allAf":0.000159,\n "allHc":0,\n "failedFilter":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele number. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed allele frequency (computed by Illumina Connected Annotations)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"int"),(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed homozygous count")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"bool"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}s.isMDXComponent=!0},120:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>C,default:()=>x,frontMatter:()=>M,metadata:()=>S,toc:()=>R});var n=a(7462),r=(a(7294),a(3905)),l=a(9729),i=a(212),o=a(4674),p=a(7356),d=a(949),m=a(9156),s=a(737),c=a(591),u=a(540),g=a(5538),k=a(9838),N=a(8181),f=a(8898),y=a(3827),h=a(4859),b=a(1888),v=a(2590),A=a(8010),j=a(9819),I=a(818),D=a(8036),w=a(4072),T=a(1273);const M={title:"Illumina Connected Annotations JSON File Format"},C=void 0,S={unversionedId:"file-formats/illumina-annotator-json-file-format",id:"file-formats/illumina-annotator-json-file-format",title:"Illumina Connected Annotations JSON File Format",description:"Overview",source:"@site/docs/file-formats/illumina-annotator-json-file-format.mdx",sourceDirName:"file-formats",slug:"/file-formats/illumina-annotator-json-file-format",permalink:"/IlluminaConnectedAnnotationsDocumentation/file-formats/illumina-annotator-json-file-format",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/file-formats/illumina-annotator-json-file-format.mdx",tags:[],version:"current",frontMatter:{title:"Illumina Connected Annotations JSON File Format"},sidebar:"docs",previous:{title:"TOPMed",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/topmed"},next:{title:"Custom Annotations",permalink:"/IlluminaConnectedAnnotationsDocumentation/file-formats/custom-annotations"}},R=[{value:"Overview",id:"overview",children:[{value:"Conventions",id:"conventions",children:[],level:3},{value:"JSON Layout",id:"json-layout",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Header",id:"header",children:[{value:"Data Source",id:"data-source",children:[],level:4},{value:"Genome Assemblies",id:"genome-assemblies",children:[],level:4}],level:2},{value:"Positions",id:"positions",children:[{value:"ClinGen",id:"clingen",children:[],level:3},{value:"1000 Genomes (SV)",id:"1000-genomes-sv",children:[],level:3},{value:"gnomAD (SV)",id:"gnomad-sv",children:[],level:3},{value:"MITOMAP (SV)",id:"mitomap-sv",children:[],level:3}],level:2},{value:"Samples",id:"samples",children:[],level:2},{value:"Variants",id:"variants",children:[{value:"Transcripts",id:"transcripts",children:[{value:"PolyPhen",id:"polyphen",children:[],level:4},{value:"SIFT",id:"sift",children:[],level:4},{value:"Amino Acid Conservation",id:"amino-acid-conservation",children:[],level:4},{value:"Gene Fusions",id:"gene-fusions",children:[],level:4},{value:"Fusion",id:"fusion",children:[],level:4},{value:"Cancer Hotspots",id:"cancer-hotspots",children:[],level:4}],level:3},{value:"Regulatory Regions",id:"regulatory-regions",children:[{value:"Regulatory Types",id:"regulatory-types",children:[],level:4},{value:"Regulatory Consequences",id:"regulatory-consequences",children:[],level:4}],level:3},{value:"ClinVar",id:"clinvar",children:[],level:3},{value:"1000 Genomes",id:"1000-genomes",children:[],level:3},{value:"DANN",id:"dann",children:[],level:3},{value:"dbSNP",id:"dbsnp",children:[],level:3},{value:"DECIPHER",id:"decipher",children:[],level:3},{value:"GERP",id:"gerp",children:[],level:3},{value:"GME Variome",id:"gme-variome",children:[],level:3},{value:"gnomAD",id:"gnomad",children:[],level:3},{value:"MITOMAP",id:"mitomap",children:[],level:3},{value:"Primate AI",id:"primate-ai",children:[],level:3},{value:"REVEL",id:"revel",children:[],level:3},{value:"Splice AI",id:"splice-ai",children:[],level:3},{value:"TOPMed",id:"topmed",children:[],level:3}],level:2},{value:"Genes",id:"genes",children:[{value:"OMIM",id:"omim",children:[],level:3},{value:"gnomAD LoF Gene Metrics",id:"gnomad-lof-gene-metrics",children:[],level:3},{value:"ClinGen Disease Validity",id:"clingen-disease-validity",children:[],level:3},{value:"COSMIC Cancer Gene Census",id:"cosmic-cancer-gene-census",children:[],level:3}],level:2}],O={toc:R},F="wrapper";function x(t){let{components:e,...M}=t;return(0,r.kt)(F,(0,n.Z)({},O,M,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("h3",{id:"conventions"},"Conventions"),(0,r.kt)("p",null,"In the Illumina Connected Annotations JSON representation, we try to maximize the amount of useful information that is relayed in the output file. As such, we have several conventions that are useful to know about:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"With boolean key/value pairs, we only output the keys that have a true value. I.e. there's no reason to display ",(0,r.kt)("inlineCode",{parentName:"li"},'"isStructuralVariant":false')," a few million times when annotating a small variant VCF."),(0,r.kt)("li",{parentName:"ul"},"When transferring data from the VCF file to the JSON (e.g. for allele depths (AD)), it is common to use a period (.) as a placeholder for missing data in the VCF file. Illumina Connected Annotations treats periods like empty or null strings and therefore will not output those entries.")),(0,r.kt)("h3",{id:"json-layout"},"JSON Layout"),(0,r.kt)("p",null,(0,r.kt)("img",{src:a(6837).Z})),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"In general, each position corresponds to a row in the original VCF file."),(0,r.kt)("p",{parentName:"div"},"For each gene that was referenced in the transcripts found in the positions section, there will be additional gene-level annotation in the gene section."))),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"We've put together a ",(0,r.kt)("a",{parentName:"p",href:"../introduction/parsing-json"},"new section that discusses how to parse our JSON files")," easily using examples in a ",(0,r.kt)("a",{parentName:"p",href:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/blob/master/static/files/parse-json-python.ipynb"},"Python Jupyter notebook")," and a ",(0,r.kt)("a",{parentName:"p",href:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/blob/master/static/files/parse-json-r.ipynb"},"R version")," as well. In addition, we have information about how to quickly dump content from our JSON file using a tabix-like utility called JASIX."))),(0,r.kt)("h2",{id:"header"},"Header"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'{\n "header":{\n "annotator":"IlluminaConnectedAnnotations 3.0.0-alpha.5+g6c52e247",\n "creationTime":"2017-06-14 15:53:13",\n "genomeAssembly":"GRCh37",\n "dataSources":[\n {\n "name":"OMIM",\n "version":"unknown",\n "description":"An Online Catalog of Human Genes and Genetic Disorders",\n "releaseDate":"2017-05-03"\n },\n {\n "name":"VEP",\n "version":"84",\n "description":"BothRefSeqAndEnsembl",\n "releaseDate":"2017-01-16"\n },\n {\n "name":"ClinVar",\n "version":"20170503",\n "description":"A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence",\n "releaseDate":"2017-05-03"\n },\n {\n "name":"phyloP",\n "version":"hg19",\n "description":"46 way conservation score between humans and 45 other vertebrates",\n "releaseDate":"2009-11-10"\n }\n ],\n "samples":[\n "NA12878",\n "NA12891",\n "NA12892"\n ]\n },\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"annotator"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"the name of the annotator and the current version")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"creationTime"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd hh:mm:ss")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genomeAssembly"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#genome-assemblies"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"schemaVersion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"left"},"incremented whenever the core structure of the JSON file introduces breaking changes")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"dataVersion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"dataSources"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#data-source"},"Data Source entry below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"samples"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"the order of these sample names will be used throughout the JSON file when enumerating samples")))),(0,r.kt)("h4",{id:"data-source"},"Data Source"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"name"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"version"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"optional description of the data source")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"releaseDate"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")))),(0,r.kt)("h4",{id:"genome-assemblies"},"Genome Assemblies"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"GRCh37"),(0,r.kt)("li",{parentName:"ul"},"GRCh38"),(0,r.kt)("li",{parentName:"ul"},"hg19"),(0,r.kt)("li",{parentName:"ul"},"SARSCoV2")),(0,r.kt)("h2",{id:"positions"},"Positions"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"positions":[\n {\n "chromosome":"chr2",\n "position":48010488,\n "repeatUnit":"GGCCCC",\n "refRepeatCount":3,\n "svEnd":48020488,\n "refAllele":"G",\n "altAlleles":[\n "A",\n "GT"\n ],\n "quality":461,\n "filters":[\n "PASS"\n ],\n "ciPos":[\n -170,\n 170\n ],\n "ciEnd":[\n -175,\n 175\n ],\n "svLength":1000,\n "strandBias":1.23,\n "jointSomaticNormalQuality":29,\n "cytogeneticBand":"2p16.3",\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Variant Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"position"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf (1-based notation). Range: 1 - 250 million")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"repeatUnit"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"STR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by ExpansionHunter")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refRepeatCount"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"STR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by ExpansionHunter")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"svEnd"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"quality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf (Normally an integer, but some variant callers using floating point. Has been observed as high as 500k)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"filters"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exactly as displayed in the vcf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ciPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ciEnd"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"svLength"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"strandBias"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"small variant"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by GATK (from SB)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"jointSomaticNormalQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SV"),(0,r.kt)("td",{parentName:"tr",align:"left"},"provided by the Manta variant caller (SOMATICSCORE)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cytogeneticBand"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"all"),(0,r.kt)("td",{parentName:"tr",align:"left"},"e.g. 17p13.1")))),(0,r.kt)("h3",{id:"clingen"},"ClinGen"),(0,r.kt)(o.default,{mdxType:"ClinGen"}),(0,r.kt)(p.default,{mdxType:"ClinGenDosage"}),(0,r.kt)("h3",{id:"1000-genomes-sv"},"1000 Genomes (SV)"),(0,r.kt)(v.default,{mdxType:"ThousandGenomesSV"}),(0,r.kt)("h3",{id:"gnomad-sv"},"gnomAD (SV)"),(0,r.kt)(I.default,{mdxType:"GnomadSV"}),(0,r.kt)("h3",{id:"mitomap-sv"},"MITOMAP (SV)"),(0,r.kt)(f.default,{mdxType:"MitoMapSV"}),(0,r.kt)("h2",{id:"samples"},"Samples"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"samples":[\n {\n "genotype":"0/1",\n "variantFrequencies":[\n 0.333,\n 0.5\n ],\n "totalDepth":57,\n "genotypeQuality":12,\n "copyNumber":3,\n "repeatUnitCounts":[\n 10,\n 20\n ],\n "alleleDepths":[\n 10,\n 20,\n 30\n ],\n "failedFilter":true,\n "splitReadCounts":[\n 10,\n 20\n ],\n "pairedEndReadCounts":[\n 10,\n 20\n ],\n "isDeNovo":true,\n "diseaseAffectedStatuses":[\n "-"\n ],\n "artifactAdjustedQualityScore":89.3,\n "likelihoodRatioQualityScore":78.2,\n "heteroplasmyPercentile":[\n 23.13,\n 12.65\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"center"},"VCF"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genotype"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"center"},"GT"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantFrequencies"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"VF, AD"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. One value per alternate allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"totalDepth"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"DP"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genotypeQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"GQ"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values. Typically maxes out at 99")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"copyNumber"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"CN"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"minorHaplotypeCopyNumber"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"MCN"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"repeatUnitCounts"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"REPCN"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ExpansionHunter-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"alleleDepths"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AD"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"center"},"FT"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"splitReadCounts"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Manta-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pairedEndReadCounts"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"PR"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Manta-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isDeNovo"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"center"},"DN"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"deNovoQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"DQ"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"diseaseAffectedStatuses"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"center"},"DST"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ExpansionHunter-specific")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"artifactAdjustedQualityScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AQ"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PEPE-specific. Range: 0 - 100.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"likelihoodRatioQualityScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"LQ"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PEPE-specific. Range: 0 - 100.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"lossOfHeterozygosity"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"center"},"CN, MCN"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"somaticQuality"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"SQ"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"heteroplasmyPercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"center"},"VF"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 100. 2 decimal places. One value per alternate allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"binCount"),(0,r.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,r.kt)("td",{parentName:"tr",align:"center"},"BC"),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-negative integer values")))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Empty Samples")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"If a sample does not contain any entries, we will create a sample object that contains the ",(0,r.kt)("inlineCode",{parentName:"p"},"isEmpty")," key. This ensures that sample ordering is preserved while indicating that a sample is intentionally empty."),(0,r.kt)("pre",{parentName:"div"},(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"samples":[\n {\n "isEmpty":true\n }\n],\n')))),(0,r.kt)("h2",{id:"variants"},"Variants"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"variants":[\n {\n "vid":"2:48010488:A",\n "chromosome":"chr2",\n "begin":48010488,\n "end":48010488,\n "isReferenceMinorAllele":true,\n "isStructuralVariant":true,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "isDecomposedVariant":true,\n "isRecomposedVariant":true,\n "linkedVids":["2:48010488:GTA:ATC"],\n "hgvsg":"NC_000002.11:g.48010488G>A",\n "phylopScore":0.459\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"vid"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"../core-functionality/variant-ids"},"Variant Identifiers"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"1-based non-negative integer values. Range: 1 - 250 million")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"end"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"1-based non-negative integer values. Range: 1 - 250 million")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isReferenceMinorAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this is a reference minor allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isStructuralVariant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant is a structural variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"inLowComplexityRegion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant lies in a low complexity region (gnomAD low complexity regions)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"parsimonious representation of the reference allele")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"parsimonious representation of the alternate allele.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"uses\xa0",(0,r.kt)("a",{parentName:"td",href:"http://www.sequenceontology.org/browser/current_svn/term/SO:0001059"},"Sequence Ontology sequence alterations"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isDecomposedVariant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the decomposed variant has been used to create another recomposed variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isRecomposedVariant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant is recomposed from two or more decomposed variants")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"linkedVids"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"list of ",(0,r.kt)("a",{parentName:"td",href:"../core-functionality/variant-ids"},"VIDs")," for variants connecting decomposed and recomposed variants")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsg"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS g. notation")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phylopScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"phyloP conservation score. Range: -14.08 to 6.424")))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Reference Minor Alleles")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Illumina Connected Annotations supports annotating reference minor alleles. In such a case, ",(0,r.kt)("inlineCode",{parentName:"p"},"refAllele")," will be replaced by the global major allele and ",(0,r.kt)("inlineCode",{parentName:"p"},"altAllele")," will be replaced with the original reference allele."))),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Flagging Decomposed & Recomposed Variants")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"When two or more decomposed variants are recomposed into an MNV, the decomposed variants will be marked with ",(0,r.kt)("inlineCode",{parentName:"p"},'"isDecomposedVariant":true'),"."),(0,r.kt)("p",{parentName:"div"},"Similarly, the recomposed variant will be shown as a new VCF position. This recomposed variant will be flagged with ",(0,r.kt)("inlineCode",{parentName:"p"},'"isRecomposedVariant":true'),"."))),(0,r.kt)("h3",{id:"transcripts"},"Transcripts"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"transcripts":[\n {\n "transcript":"ENST00000445503.1",\n "source":"Ensembl",\n "bioType":"nonsense_mediated_decay",\n "codons":"gGg/gAg",\n "aminoAcids":"G/E",\n "cdnaPos":"268",\n "cdsPos":"116",\n "exons":"1/9",\n "introns":"1/8",\n "proteinPos":"39",\n "geneId":"ENSG00000116062",\n "hgnc":"MSH6",\n "consequence":[\n "missense_variant",\n "NMD_transcript_variant"\n ],\n "hgvsc":"ENST00000445503.1:c.116G>A",\n "hgvsp":"ENSP00000405294.1:p.(Gly39Glu)",\n "geneFusion":{\n "exon":6,\n "intron":5,\n "fusions":[\n {\n "hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000437180.1}:c.58+568_1443",\n "exon":3,\n "intron":2\n },\n {\n "hgvsc":"ETV6{ENST00000396373.4}:c.1_1009+3402_RUNX1{ENST00000300305.3}:c.58+568_1443",\n "exon":2,\n "intron":1\n }\n ]\n },\n "isCanonical":true,\n "polyPhenScore":0.95,\n "polyPhenPrediction":"probably damaging",\n "proteinId":"ENSP00000405294.1",\n "siftScore":0.61,\n "siftPrediction":"tolerated",\n "completeOverlap":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"transcript"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"transcript ID. e.g. ENST00000445503.1")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"source"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"RefSeq / Ensembl")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"bioType"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"descriptions of the ",(0,r.kt)("a",{parentName:"td",href:"https://uswest.ensembl.org/info/genome/genebuild/biotypes.html"},"biotypes from Ensembl"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"codons"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"aminoAcids"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cdnaPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cdsPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"exons"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"exons affected by the variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"introns"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"introns affected by the variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"proteinPos"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene ID. e.g. ENSG00000116062")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"consequence"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://www.sequenceontology.org/browser/obob.cgi"},"Sequence Ontology Consequences"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS coding nomenclature")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsp"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS protein nomenclature")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneFusion"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#gene-fusions"},"Gene Fusions entry below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isCanonical"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this is a canonical transcript")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isManeSelect"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this is a MANE select transcript")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"polyPhenScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"polyPhenPrediction"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#polyphen"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"proteinId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"protein ID. E.g. ENSP00000405294.1")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"siftScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"siftPrediction"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#sift"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"completeOverlap"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this transcript is completely overlapped by the variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"cancerHotspots"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#cancer-hotspots"},"Cancer Hotspots entry below"))))),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"MANE Select")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"MANE select tags are only available for RefSeq transcripts on GRCh38."))),(0,r.kt)("h4",{id:"polyphen"},"PolyPhen"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"probably damaging"),(0,r.kt)("li",{parentName:"ul"},"possibly damaging"),(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"unknown")),(0,r.kt)("h4",{id:"sift"},"SIFT"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"tolerated"),(0,r.kt)("li",{parentName:"ul"},"deleterious"),(0,r.kt)("li",{parentName:"ul"},"tolerated - low confidence"),(0,r.kt)("li",{parentName:"ul"},"deleterious - low confidence")),(0,r.kt)("h4",{id:"amino-acid-conservation"},"Amino Acid Conservation"),(0,r.kt)(l.default,{mdxType:"AminoAcidConservation"}),(0,r.kt)("h4",{id:"gene-fusions"},"Gene Fusions"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"exon"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual exon where the breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"intron"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual intron where the breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"fusions"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#fusion"},"Fusion entry below"))))),(0,r.kt)("h4",{id:"fusion"},"Fusion"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"exon"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual exon where the other breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"intron"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"actual intron where the other breakpoint was located")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgvsc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGVS coding nomenclature describing the two genes and the transcripts that are fused along with")))),(0,r.kt)("h4",{id:"cancer-hotspots"},"Cancer Hotspots"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"residue"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"how many samples are associated with a variant at the same amino acid position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numAltAminoAcidSamples"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"how many samples are associated with a variant with the same position and alternate amino acid position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"qValue"),(0,r.kt)("td",{parentName:"tr",align:"center"},"double"),(0,r.kt)("td",{parentName:"tr",align:"left"})))),(0,r.kt)("h3",{id:"regulatory-regions"},"Regulatory Regions"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"regulatoryRegions":[\n {\n "id":"ENSR00001542175",\n "type":"promoter",\n "consequence":[\n "regulatory_region_variant"\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"type"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see ",(0,r.kt)("a",{parentName:"td",href:"#regulatory-types"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"consequence"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"see ",(0,r.kt)("a",{parentName:"td",href:"#regulatory-consequences"},"possible values below"))))),(0,r.kt)("h4",{id:"regulatory-types"},"Regulatory Types"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"CTCF_binding_site"),(0,r.kt)("li",{parentName:"ul"},"enhancer"),(0,r.kt)("li",{parentName:"ul"},"open_chromatin_region"),(0,r.kt)("li",{parentName:"ul"},"promoter"),(0,r.kt)("li",{parentName:"ul"},"promoter_flanking_region"),(0,r.kt)("li",{parentName:"ul"},"TF_binding_site")),(0,r.kt)("h4",{id:"regulatory-consequences"},"Regulatory Consequences"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"regulatory_region_variant"),(0,r.kt)("li",{parentName:"ul"},"regulatory_region_ablation"),(0,r.kt)("li",{parentName:"ul"},"regulatory_region_amplification"),(0,r.kt)("li",{parentName:"ul"},"regulatory_region_truncation")),(0,r.kt)("h3",{id:"clinvar"},"ClinVar"),(0,r.kt)(i.default,{mdxType:"ClinVar"}),(0,r.kt)("h3",{id:"1000-genomes"},"1000 Genomes"),(0,r.kt)(b.default,{mdxType:"ThousandGenomesSmall"}),(0,r.kt)("h3",{id:"dann"},"DANN"),(0,r.kt)(u.default,{mdxType:"DANN"}),(0,r.kt)("h3",{id:"dbsnp"},"dbSNP"),(0,r.kt)(m.default,{mdxType:"DbSNP"}),(0,r.kt)("h3",{id:"decipher"},"DECIPHER"),(0,r.kt)(w.default,{mdxType:"DECIPHER"}),(0,r.kt)("h3",{id:"gerp"},"GERP"),(0,r.kt)(g.default,{mdxType:"GERP"}),(0,r.kt)("h3",{id:"gme-variome"},"GME Variome"),(0,r.kt)(D.default,{mdxType:"GME"}),(0,r.kt)("h3",{id:"gnomad"},"gnomAD"),(0,r.kt)(y.default,{mdxType:"GnomadSmall"}),(0,r.kt)("h3",{id:"mitomap"},"MITOMAP"),(0,r.kt)(N.default,{mdxType:"MitoMapSmall"}),(0,r.kt)("h3",{id:"primate-ai"},"Primate AI"),(0,r.kt)(s.default,{mdxType:"PrimateAI"}),(0,r.kt)("h3",{id:"revel"},"REVEL"),(0,r.kt)(c.default,{mdxType:"REVEL"}),(0,r.kt)("h3",{id:"splice-ai"},"Splice AI"),(0,r.kt)(k.default,{mdxType:"SpliceAI"}),(0,r.kt)("h3",{id:"topmed"},"TOPMed"),(0,r.kt)(j.default,{mdxType:"TOPMed"}),(0,r.kt)("h2",{id:"genes"},"Genes"),(0,r.kt)("p",null,"Illumina Connected Annotations repots gene annotations for all genes that have an overlapping variant with the exception of flanking variants (i.e. variants that only cause upstream_gene_variant or downstream_gene_variant)."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"genes":[\n {\n "name":"MSH6",\n "hgncId":7329,\n "summary":"This gene encodes a member of the DNA mismatch repair MutS family. In E. coli, the MutS protein helps in the recognition of mismatched nucleotides prior to their repair. A highly conserved region of approximately 150 aa, called the Walker-A adenine nucleotide binding motif, exists in MutS homologs. The encoded protein heterodimerizes with MSH2 to form a mismatch recognition complex that functions as a bidirectional molecular switch that exchanges ADP and ATP as DNA mismatches are bound and dissociated. Mutations in this gene may be associated with hereditary nonpolyposis colon cancer, colorectal cancer, and endometrial cancer. Transcripts variants encoding different isoforms have been described. [provided by RefSeq, Jul 2013]",\n /* this is where gene-level data sources can be found e.g. OMIM */\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"name"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgncId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"summary"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"short description of the gene from ",(0,r.kt)("a",{parentName:"td",href:"https://www.omim.org/"},"OMIM"))))),(0,r.kt)("h3",{id:"omim"},"OMIM"),(0,r.kt)(A.default,{mdxType:"Omim"}),(0,r.kt)("h3",{id:"gnomad-lof-gene-metrics"},"gnomAD LoF Gene Metrics"),(0,r.kt)(h.default,{mdxType:"GnomadGeneLof"}),(0,r.kt)("h3",{id:"clingen-disease-validity"},"ClinGen Disease Validity"),(0,r.kt)(d.default,{mdxType:"ClinGenDiseaseValidity"}),(0,r.kt)("h3",{id:"cosmic-cancer-gene-census"},"COSMIC Cancer Gene Census"),(0,r.kt)(T.default,{mdxType:"COSMICCGC"}))}x.isMDXComponent=!0},6837:(t,e,a)=>{a.d(e,{Z:()=>n});const n=a.p+"assets/images/JSON-Layout-fc8e5c0cf4c8428981cd206fe9b6feac.svg"}}]); \ No newline at end of file diff --git a/assets/js/b4210c11.b6826069.js b/assets/js/b4210c11.b6826069.js new file mode 100644 index 00000000..5ce580ce --- /dev/null +++ b/assets/js/b4210c11.b6826069.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7870],{3905:(t,e,n)=>{n.d(e,{Zo:()=>c,kt:()=>g});var a=n(7294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var p=a.createContext({}),u=function(t){var e=a.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},c=function(t){var e=u(t.components);return a.createElement(p.Provider,{value:e},t.children)},d="mdxType",m={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},s=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,c=i(t,["components","mdxType","originalType","parentName"]),d=u(n),s=r,g=d["".concat(p,".").concat(s)]||d[s]||m[s]||l;return n?a.createElement(g,o(o({ref:e},c),{},{components:n})):a.createElement(g,o({ref:e},c))}));function g(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=s;var i={};for(var p in e)hasOwnProperty.call(e,p)&&(i[p]=e[p]);i.originalType=t,i[d]="string"==typeof t?t:r,o[1]=i;for(var u=2;u{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>d,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(7462),r=(n(7294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/clingen-json",id:"data-sources/clingen-json",title:"clingen-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/clingen-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clingen-json.md",tags:[],version:"current",frontMatter:{}},p=[],u={toc:p},c="wrapper";function d(t){let{components:e,...n}=t;return(0,r.kt)(c,(0,a.Z)({},u,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingen":[\n {\n "chromosome":"17",\n "begin":525,\n "end":14667519,\n "variantType":"copy_number_gain",\n "id":"nsv996083",\n "clinicalInterpretation":"pathogenic",\n "observedGains":1,\n "validated":true,\n "phenotypes":[\n "Intrauterine growth retardation"\n ],\n "phenotypeIds":[\n "HP:0001511",\n "MedGen:C1853481"\n ],\n "reciprocalOverlap":0.00131\n },\n {\n "chromosome":"17",\n "begin":45835,\n "end":7600330,\n "variantType":"copy_number_loss",\n "id":"nsv869419",\n "clinicalInterpretation":"pathogenic",\n "observedLosses":1,\n "validated":true,\n "phenotypes":[\n "Developmental delay AND/OR other significant developmental or morphological phenotypes"\n ],\n "reciprocalOverlap":0.00254\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingen"),(0,r.kt)("td",{parentName:"tr",align:null},"object array"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Any of the\xa0sequence alterations defined here.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Identifier from the data source. Alternatively a VID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clinicalInterpretation"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"observedGains"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,r.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"observedLosses"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,r.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"validated"),(0,r.kt)("td",{parentName:"tr",align:null},"boolean"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:null},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"Description of the phenotype.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"phenotypeIds"),(0,r.kt)("td",{parentName:"tr",align:null},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"Description of the phenotype IDs.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"clinicalInterpretation")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"curated benign"),(0,r.kt)("li",{parentName:"ul"},"curated pathogenic"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"path gain"),(0,r.kt)("li",{parentName:"ul"},"path loss"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"uncertain")))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/b4210c11.d34e6f22.js b/assets/js/b4210c11.d34e6f22.js deleted file mode 100644 index a1c63196..00000000 --- a/assets/js/b4210c11.d34e6f22.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7870],{3905:(t,e,n)=>{n.d(e,{Zo:()=>c,kt:()=>g});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var p=a.createContext({}),u=function(t){var e=a.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},c=function(t){var e=u(t.components);return a.createElement(p.Provider,{value:e},t.children)},d="mdxType",m={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},s=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,c=i(t,["components","mdxType","originalType","parentName"]),d=u(n),s=r,g=d["".concat(p,".").concat(s)]||d[s]||m[s]||l;return n?a.createElement(g,o(o({ref:e},c),{},{components:n})):a.createElement(g,o({ref:e},c))}));function g(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=s;var i={};for(var p in e)hasOwnProperty.call(e,p)&&(i[p]=e[p]);i.originalType=t,i[d]="string"==typeof t?t:r,o[1]=i;for(var u=2;u{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>d,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/clingen-json",id:"data-sources/clingen-json",title:"clingen-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/clingen-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clingen-json.md",tags:[],version:"current",frontMatter:{}},p=[],u={toc:p},c="wrapper";function d(t){let{components:e,...n}=t;return(0,r.kt)(c,(0,a.Z)({},u,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingen":[\n {\n "chromosome":"17",\n "begin":525,\n "end":14667519,\n "variantType":"copy_number_gain",\n "id":"nsv996083",\n "clinicalInterpretation":"pathogenic",\n "observedGains":1,\n "validated":true,\n "phenotypes":[\n "Intrauterine growth retardation"\n ],\n "phenotypeIds":[\n "HP:0001511",\n "MedGen:C1853481"\n ],\n "reciprocalOverlap":0.00131\n },\n {\n "chromosome":"17",\n "begin":45835,\n "end":7600330,\n "variantType":"copy_number_loss",\n "id":"nsv869419",\n "clinicalInterpretation":"pathogenic",\n "observedLosses":1,\n "validated":true,\n "phenotypes":[\n "Developmental delay AND/OR other significant developmental or morphological phenotypes"\n ],\n "reciprocalOverlap":0.00254\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingen"),(0,r.kt)("td",{parentName:"tr",align:null},"object array"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Any of the\xa0sequence alterations defined here.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Identifier from the data source. Alternatively a VID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clinicalInterpretation"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"observedGains"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,r.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"observedLosses"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,r.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"validated"),(0,r.kt)("td",{parentName:"tr",align:null},"boolean"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:null},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"Description of the phenotype.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"phenotypeIds"),(0,r.kt)("td",{parentName:"tr",align:null},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"Description of the phenotype IDs.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"clinicalInterpretation")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"curated benign"),(0,r.kt)("li",{parentName:"ul"},"curated pathogenic"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"path gain"),(0,r.kt)("li",{parentName:"ul"},"path loss"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"uncertain")))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/b4d8ffc9.0fc5619e.js b/assets/js/b4d8ffc9.0fc5619e.js deleted file mode 100644 index dff90385..00000000 --- a/assets/js/b4d8ffc9.0fc5619e.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4091],{3905:(t,e,n)=>{n.d(e,{Zo:()=>c,kt:()=>g});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function o(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var p=r.createContext({}),u=function(t){var e=r.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},c=function(t){var e=u(t.components);return r.createElement(p.Provider,{value:e},t.children)},d="mdxType",m={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},s=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,l=t.originalType,p=t.parentName,c=i(t,["components","mdxType","originalType","parentName"]),d=u(n),s=a,g=d["".concat(p,".").concat(s)]||d[s]||m[s]||l;return n?r.createElement(g,o(o({ref:e},c),{},{components:n})):r.createElement(g,o({ref:e},c))}));function g(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var l=n.length,o=new Array(l);o[0]=s;var i={};for(var p in e)hasOwnProperty.call(e,p)&&(i[p]=e[p]);i.originalType=t,i[d]="string"==typeof t?t:a,o[1]=i;for(var u=2;u{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>d,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var r=n(87462),a=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/decipher-json",id:"version-3.21/data-sources/decipher-json",title:"decipher-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/decipher-json.md",sourceDirName:"data-sources",slug:"/data-sources/decipher-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/decipher-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/decipher-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],u={toc:p},c="wrapper";function d(t){let{components:e,...n}=t;return(0,a.kt)(c,(0,r.Z)({},u,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"decipher":[\n {\n "chromosome":"1",\n "begin":13516,\n "end":91073,\n "numDeletions":27,\n "deletionFrequency":0.675,\n "numDuplications":27,\n "duplicationFrequency":0.675,\n "sampleSize":40,\n "reciprocalOverlap": 0.27555,\n "annotationOverlap": 0.5901\n }\n],\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"begin"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"end"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"numDeletions"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"# of observed deletions")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"deletionFrequency"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"deletion frequency")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"numDuplications"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"# of observed duplications")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"duplicationFrequency"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"duplication frequency")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"sampleSize"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"total # of samples")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% annotation overlap")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/b51ccab7.1acea382.js b/assets/js/b51ccab7.1acea382.js new file mode 100644 index 00000000..b007e6d6 --- /dev/null +++ b/assets/js/b51ccab7.1acea382.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[611,2508],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>v});var a=n(7294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},d=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,d=l(e,["components","mdxType","originalType","parentName"]),p=c(n),u=r,v=p["".concat(s,".").concat(u)]||p[u]||m[u]||i;return n?a.createElement(v,o(o({ref:t},d),{},{components:n})):a.createElement(v,o({ref:t},d))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[p]="string"==typeof e?e:r,o[1]=l;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>i,metadata:()=>l,toc:()=>s});var a=n(7462),r=(n(7294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/revel-json",id:"data-sources/revel-json",title:"revel-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/revel-json.md",sourceDirName:"data-sources",slug:"/data-sources/revel-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/revel-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/revel-json.md",tags:[],version:"current",frontMatter:{}},s=[],c={toc:s},d="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"revel":{ \n "score":0.027\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"score"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1.0")))))}p.isMDXComponent=!0},1562:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>o,metadata:()=>s,toc:()=>c});var a=n(7462),r=(n(7294),n(3905)),i=n(591);const o={title:"REVEL"},l=void 0,s={unversionedId:"data-sources/revel",id:"data-sources/revel",title:"REVEL",description:"Overview",source:"@site/docs/data-sources/revel.mdx",sourceDirName:"data-sources",slug:"/data-sources/revel",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/revel",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/revel.mdx",tags:[],version:"current",frontMatter:{title:"REVEL"},sidebar:"docs",previous:{title:"Primate AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/primate-ai"},next:{title:"Splice AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/splice-ai"}},c=[{value:"Overview",id:"overview",children:[],level:2},{value:"CSV File",id:"csv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],d={toc:c},p="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"REVEL is an ensemble method for predicting the pathogenicity of missense variants based on a combination of scores from 13 individual tools: MutPred, FATHMM v2.3, VEST 3.0, PolyPhen-2, SIFT, PROVEAN, MutationAssessor, MutationTaster, LRT, GERP++, SiPhy, phyloP, and phastCons."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Ioannidis, N. M. et al. REVEL: An Ensemble Method for Predicting the Pathogenicity of Rare Missense Variants. ",(0,r.kt)("em",{parentName:"p"},"The American Journal of Human Genetics")," ",(0,r.kt)("strong",{parentName:"p"},"99"),", 877-885 (2016). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1016/j.ajhg.2016.08.016"},"https://doi.org/10.1016/j.ajhg.2016.08.016")))),(0,r.kt)("h2",{id:"csv-file"},"CSV File"),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr,hg19_pos,grch38_pos,ref,alt,aaref,aaalt,REVEL\n1,35142,35142,G,A,T,M,0.027\n1,35142,35142,G,C,T,R,0.035\n1,35142,35142,G,T,T,K,0.043\n1,35143,35143,T,A,T,S,0.018\n1,35143,35143,T,C,T,A,0.034\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the CSV file, we're mainly interested in the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"chr")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"hg19_pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"grch38_pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"ref")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"alt")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"REVEL"))),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Sorting")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Since the input file contains positions for both GRCh37 and GRCh38, we split it into two ",(0,r.kt)("strong",{parentName:"p"},"TSV")," files (for the sake of better readability) with identical format. The positions for GRCh37 were sorted but not for GRCh38. So we re-sort the variants by position in the GRCh38 file."))),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Conflicting Scores")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"When there are multiple scores available for the same variant (i.e. the same position with the same alternative allele), we pick the highest score."))),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://sites.google.com/site/revelgenomics/downloads"},"https://sites.google.com/site/revelgenomics/downloads")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(i.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/b51ccab7.33697843.js b/assets/js/b51ccab7.33697843.js deleted file mode 100644 index f954ae8d..00000000 --- a/assets/js/b51ccab7.33697843.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[611,2508],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>v});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},d=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,d=l(e,["components","mdxType","originalType","parentName"]),p=c(n),u=r,v=p["".concat(s,".").concat(u)]||p[u]||m[u]||i;return n?a.createElement(v,o(o({ref:t},d),{},{components:n})):a.createElement(v,o({ref:t},d))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[p]="string"==typeof e?e:r,o[1]=l;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>i,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/revel-json",id:"data-sources/revel-json",title:"revel-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/revel-json.md",sourceDirName:"data-sources",slug:"/data-sources/revel-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/revel-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/revel-json.md",tags:[],version:"current",frontMatter:{}},s=[],c={toc:s},d="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"revel":{ \n "score":0.027\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"score"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1.0")))))}p.isMDXComponent=!0},41562:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>o,metadata:()=>s,toc:()=>c});var a=n(87462),r=(n(67294),n(3905)),i=n(60591);const o={title:"REVEL"},l=void 0,s={unversionedId:"data-sources/revel",id:"data-sources/revel",title:"REVEL",description:"Overview",source:"@site/docs/data-sources/revel.mdx",sourceDirName:"data-sources",slug:"/data-sources/revel",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/revel",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/revel.mdx",tags:[],version:"current",frontMatter:{title:"REVEL"},sidebar:"docs",previous:{title:"Primate AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/primate-ai"},next:{title:"Splice AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/splice-ai"}},c=[{value:"Overview",id:"overview",children:[],level:2},{value:"CSV File",id:"csv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],d={toc:c},p="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"REVEL is an ensemble method for predicting the pathogenicity of missense variants based on a combination of scores from 13 individual tools: MutPred, FATHMM v2.3, VEST 3.0, PolyPhen-2, SIFT, PROVEAN, MutationAssessor, MutationTaster, LRT, GERP++, SiPhy, phyloP, and phastCons."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Ioannidis, N. M. et al. REVEL: An Ensemble Method for Predicting the Pathogenicity of Rare Missense Variants. ",(0,r.kt)("em",{parentName:"p"},"The American Journal of Human Genetics")," ",(0,r.kt)("strong",{parentName:"p"},"99"),", 877-885 (2016). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1016/j.ajhg.2016.08.016"},"https://doi.org/10.1016/j.ajhg.2016.08.016")))),(0,r.kt)("h2",{id:"csv-file"},"CSV File"),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr,hg19_pos,grch38_pos,ref,alt,aaref,aaalt,REVEL\n1,35142,35142,G,A,T,M,0.027\n1,35142,35142,G,C,T,R,0.035\n1,35142,35142,G,T,T,K,0.043\n1,35143,35143,T,A,T,S,0.018\n1,35143,35143,T,C,T,A,0.034\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the CSV file, we're mainly interested in the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"chr")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"hg19_pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"grch38_pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"ref")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"alt")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"REVEL"))),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Sorting")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Since the input file contains positions for both GRCh37 and GRCh38, we split it into two ",(0,r.kt)("strong",{parentName:"p"},"TSV")," files (for the sake of better readability) with identical format. The positions for GRCh37 were sorted but not for GRCh38. So we re-sort the variants by position in the GRCh38 file."))),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Conflicting Scores")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"When there are multiple scores available for the same variant (i.e. the same position with the same alternative allele), we pick the highest score."))),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://sites.google.com/site/revelgenomics/downloads"},"https://sites.google.com/site/revelgenomics/downloads")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(i.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/b5251121.741e7f06.js b/assets/js/b5251121.741e7f06.js deleted file mode 100644 index f984fe35..00000000 --- a/assets/js/b5251121.741e7f06.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1779],{3905:(e,n,t)=>{t.d(n,{Zo:()=>u,kt:()=>f});var r=t(67294);function a(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function o(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);n&&(r=r.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,r)}return t}function c(e){for(var n=1;n=0||(a[t]=e[t]);return a}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(a[t]=e[t])}return a}var s=r.createContext({}),l=function(e){var n=r.useContext(s),t=n;return e&&(t="function"==typeof e?e(n):c(c({},n),e)),t},u=function(e){var n=l(e.components);return r.createElement(s.Provider,{value:n},e.children)},p="mdxType",m={inlineCode:"code",wrapper:function(e){var n=e.children;return r.createElement(r.Fragment,{},n)}},d=r.forwardRef((function(e,n){var t=e.components,a=e.mdxType,o=e.originalType,s=e.parentName,u=i(e,["components","mdxType","originalType","parentName"]),p=l(t),d=a,f=p["".concat(s,".").concat(d)]||p[d]||m[d]||o;return t?r.createElement(f,c(c({ref:n},u),{},{components:t})):r.createElement(f,c({ref:n},u))}));function f(e,n){var t=arguments,a=n&&n.mdxType;if("string"==typeof e||a){var o=t.length,c=new Array(o);c[0]=d;var i={};for(var s in n)hasOwnProperty.call(n,s)&&(i[s]=n[s]);i.originalType=e,i[p]="string"==typeof e?e:a,c[1]=i;for(var l=2;l{t.r(n),t.d(n,{contentTitle:()=>c,default:()=>p,frontMatter:()=>o,metadata:()=>i,toc:()=>s});var r=t(87462),a=(t(67294),t(3905));const o={},c=void 0,i={unversionedId:"data-sources/cosmic-cancer-gene-census",id:"version-3.21/data-sources/cosmic-cancer-gene-census",title:"cosmic-cancer-gene-census",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/cosmic-cancer-gene-census.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-cancer-gene-census",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/cosmic-cancer-gene-census",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/cosmic-cancer-gene-census.md",tags:[],version:"3.21",frontMatter:{}},s=[],l={toc:s},u="wrapper";function p(e){let{components:n,...t}=e;return(0,a.kt)(u,(0,r.Z)({},l,t,{components:n,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},' {\n "name": "PRDM16",\n "hgncId": 14000,\n "ncbiGeneId": "63976",\n "ensemblGeneId": "ENSG00000142611",\n "cosmic": {\n "roleInCancer": [\n "oncogene",\n "fusion"\n ]\n }\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"roleInCancer"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Possible roles in caner")))))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/b6dcd8b7.1b14dc15.js b/assets/js/b6dcd8b7.1b14dc15.js new file mode 100644 index 00000000..87ff2c2f --- /dev/null +++ b/assets/js/b6dcd8b7.1b14dc15.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6458],{3905:(t,e,n)=>{n.d(e,{Zo:()=>p,kt:()=>f});var a=n(7294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function i(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function l(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var m=a.createContext({}),c=function(t){var e=a.useContext(m),n=e;return t&&(n="function"==typeof t?t(e):l(l({},e),t)),n},p=function(t){var e=c(t.components);return a.createElement(m.Provider,{value:e},t.children)},s="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},u=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,i=t.originalType,m=t.parentName,p=o(t,["components","mdxType","originalType","parentName"]),s=c(n),u=r,f=s["".concat(m,".").concat(u)]||s[u]||d[u]||i;return n?a.createElement(f,l(l({ref:e},p),{},{components:n})):a.createElement(f,l({ref:e},p))}));function f(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var i=n.length,l=new Array(i);l[0]=u;var o={};for(var m in e)hasOwnProperty.call(e,m)&&(o[m]=e[m]);o.originalType=t,o[s]="string"==typeof t?t:r,l[1]=o;for(var c=2;c{n.r(e),n.d(e,{contentTitle:()=>l,default:()=>s,frontMatter:()=>i,metadata:()=>o,toc:()=>m});var a=n(7462),r=(n(7294),n(3905));const i={},l=void 0,o={unversionedId:"data-sources/cosmic-json",id:"data-sources/cosmic-json",title:"cosmic-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/cosmic-json.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/cosmic-json.md",tags:[],version:"current",frontMatter:{}},m=[],c={toc:m},p="wrapper";function s(t){let{components:e,...n}=t;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'{\n "id":"COSV58272668",\n "numSamples":8,\n "refAllele":"-",\n "altAllele":"CCT",\n "histologies":[\n {\n "name":"carcinoma (serous carcinoma)",\n "numSamples":2\n },\n {\n "name":"meningioma (fibroblastic)",\n "numSamples":1\n },\n {\n "name":"carcinoma",\n "numSamples":1\n },\n {\n "name":"carcinoma (squamous cell carcinoma)",\n "numSamples":1\n },\n {\n "name":"meningioma (transitional)",\n "numSamples":1\n },\n {\n "name":"carcinoma (adenocarcinoma)",\n "numSamples":1\n },\n {\n "name":"other (neoplasm)",\n "numSamples":1\n }\n ],\n "sites":[\n {\n "name":"ovary",\n "numSamples":2\n },\n {\n "name":"meninges",\n "numSamples":2\n },\n {\n "name":"thyroid",\n "numSamples":2\n },\n {\n "name":"cervix",\n "numSamples":1\n },\n {\n "name":"large intestine (colon)",\n "numSamples":1\n }\n ],\n "pubMedIds":[\n 25738363,\n 27548314\n ],\n "confirmedSomatic":true,\n "drugResistance":true, /* not in this particular COSMIC variant */\n "isAlleleSpecific":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"COSMIC Genomic Mutation ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"histologies"),(0,r.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypic descriptions")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sites"),(0,r.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"tissue types")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"confirmedSomatic"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant is a confirmed somatic variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"drugResistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant has been associated with drug resistance")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Count")),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"name"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"description")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"})))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/b6dcd8b7.f1969583.js b/assets/js/b6dcd8b7.f1969583.js deleted file mode 100644 index ce94cb47..00000000 --- a/assets/js/b6dcd8b7.f1969583.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6458],{3905:(t,e,n)=>{n.d(e,{Zo:()=>p,kt:()=>f});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function i(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function l(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var m=a.createContext({}),c=function(t){var e=a.useContext(m),n=e;return t&&(n="function"==typeof t?t(e):l(l({},e),t)),n},p=function(t){var e=c(t.components);return a.createElement(m.Provider,{value:e},t.children)},s="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},u=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,i=t.originalType,m=t.parentName,p=o(t,["components","mdxType","originalType","parentName"]),s=c(n),u=r,f=s["".concat(m,".").concat(u)]||s[u]||d[u]||i;return n?a.createElement(f,l(l({ref:e},p),{},{components:n})):a.createElement(f,l({ref:e},p))}));function f(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var i=n.length,l=new Array(i);l[0]=u;var o={};for(var m in e)hasOwnProperty.call(e,m)&&(o[m]=e[m]);o.originalType=t,o[s]="string"==typeof t?t:r,l[1]=o;for(var c=2;c{n.r(e),n.d(e,{contentTitle:()=>l,default:()=>s,frontMatter:()=>i,metadata:()=>o,toc:()=>m});var a=n(87462),r=(n(67294),n(3905));const i={},l=void 0,o={unversionedId:"data-sources/cosmic-json",id:"data-sources/cosmic-json",title:"cosmic-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/cosmic-json.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/cosmic-json.md",tags:[],version:"current",frontMatter:{}},m=[],c={toc:m},p="wrapper";function s(t){let{components:e,...n}=t;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'{\n "id":"COSV58272668",\n "numSamples":8,\n "refAllele":"-",\n "altAllele":"CCT",\n "histologies":[\n {\n "name":"carcinoma (serous carcinoma)",\n "numSamples":2\n },\n {\n "name":"meningioma (fibroblastic)",\n "numSamples":1\n },\n {\n "name":"carcinoma",\n "numSamples":1\n },\n {\n "name":"carcinoma (squamous cell carcinoma)",\n "numSamples":1\n },\n {\n "name":"meningioma (transitional)",\n "numSamples":1\n },\n {\n "name":"carcinoma (adenocarcinoma)",\n "numSamples":1\n },\n {\n "name":"other (neoplasm)",\n "numSamples":1\n }\n ],\n "sites":[\n {\n "name":"ovary",\n "numSamples":2\n },\n {\n "name":"meninges",\n "numSamples":2\n },\n {\n "name":"thyroid",\n "numSamples":2\n },\n {\n "name":"cervix",\n "numSamples":1\n },\n {\n "name":"large intestine (colon)",\n "numSamples":1\n }\n ],\n "pubMedIds":[\n 25738363,\n 27548314\n ],\n "confirmedSomatic":true,\n "drugResistance":true, /* not in this particular COSMIC variant */\n "isAlleleSpecific":true\n}\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"COSMIC Genomic Mutation ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"histologies"),(0,r.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypic descriptions")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"sites"),(0,r.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"tissue types")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"confirmedSomatic"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant is a confirmed somatic variant")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"drugResistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the variant has been associated with drug resistance")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Count")),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"name"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"description")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"})))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/b6e8fda9.5d207f53.js b/assets/js/b6e8fda9.5d207f53.js deleted file mode 100644 index 05e365f4..00000000 --- a/assets/js/b6e8fda9.5d207f53.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9917],{3905:(t,e,a)=>{a.d(e,{Zo:()=>p,kt:()=>g});var n=a(67294);function l(t,e,a){return e in t?Object.defineProperty(t,e,{value:a,enumerable:!0,configurable:!0,writable:!0}):t[e]=a,t}function r(t,e){var a=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),a.push.apply(a,n)}return a}function i(t){for(var e=1;e=0||(l[a]=t[a]);return l}(t,e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,a)&&(l[a]=t[a])}return l}var s=n.createContext({}),m=function(t){var e=n.useContext(s),a=e;return t&&(a="function"==typeof t?t(e):i(i({},e),t)),a},p=function(t){var e=m(t.components);return n.createElement(s.Provider,{value:e},t.children)},d="mdxType",k={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},N=n.forwardRef((function(t,e){var a=t.components,l=t.mdxType,r=t.originalType,s=t.parentName,p=o(t,["components","mdxType","originalType","parentName"]),d=m(a),N=l,g=d["".concat(s,".").concat(N)]||d[N]||k[N]||r;return a?n.createElement(g,i(i({ref:e},p),{},{components:a})):n.createElement(g,i({ref:e},p))}));function g(t,e){var a=arguments,l=e&&e.mdxType;if("string"==typeof t||l){var r=a.length,i=new Array(r);i[0]=N;var o={};for(var s in e)hasOwnProperty.call(e,s)&&(o[s]=e[s]);o.originalType=t,o[d]="string"==typeof t?t:l,i[1]=o;for(var m=2;m{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>d,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var n=a(87462),l=(a(67294),a(3905));const r={title:"Custom Annotations"},i=void 0,o={unversionedId:"file-formats/custom-annotations",id:"version-3.21/file-formats/custom-annotations",title:"Custom Annotations",description:"Overview",source:"@site/versioned_docs/version-3.21/file-formats/custom-annotations.md",sourceDirName:"file-formats",slug:"/file-formats/custom-annotations",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/file-formats/custom-annotations",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/file-formats/custom-annotations.md",tags:[],version:"3.21",frontMatter:{title:"Custom Annotations"},sidebar:"docs",previous:{title:"Nirvana JSON File Format",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/file-formats/nirvana-json-file-format"},next:{title:"Canonical Transcripts",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/core-functionality/canonical-transcripts"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Variant File Format",id:"variant-file-format",children:[{value:"Basic Allele Frequency Example",id:"basic-allele-frequency-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv",children:[],level:4},{value:"Convert to Nirvana Format",id:"convert-to-nirvana-format",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results",children:[],level:4}],level:3},{value:"Categories & Descriptions Example",id:"categories--descriptions-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-1",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana-1",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-1",children:[],level:4},{value:"Using Positional Matches",id:"using-positional-matches",children:[],level:4}],level:3},{value:"Genomic Region Example",id:"genomic-region-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-2",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana-2",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-2",children:[],level:4}],level:3},{value:"Genomic Regions for Structural Variants Example",id:"genomic-regions-for-structural-variants-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-3",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana-3",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-3",children:[],level:4}],level:3},{value:"Mixing Small Variants and Genomic Regions",id:"mixing-small-variants-and-genomic-regions",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-4",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana-4",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-4",children:[],level:4}],level:3}],level:2},{value:"Gene File Format",id:"gene-file-format",children:[{value:"Basic Gene Example",id:"basic-gene-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-5",children:[],level:4},{value:"Annotate with Nirvana",id:"annotate-with-nirvana-5",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-5",children:[],level:4}],level:3}],level:2},{value:"Customizing the Header",id:"customizing-the-header",children:[{value:"Title",id:"title",children:[],level:3},{value:"Genome Assemblies",id:"genome-assemblies",children:[],level:3},{value:"Matching Criteria",id:"matching-criteria",children:[],level:3},{value:"Categories",id:"categories",children:[],level:3},{value:"Descriptions",id:"descriptions",children:[{value:"Populations",id:"populations",children:[],level:4}],level:3},{value:"Data Types",id:"data-types",children:[],level:3}],level:2},{value:"Using SAUtils",id:"using-sautils",children:[{value:"Convert Variant File",id:"convert-variant-file",children:[],level:3},{value:"Convert Gene File",id:"convert-gene-file",children:[],level:3}],level:2}],m={toc:s},p="wrapper";function d(t){let{components:e,...a}=t;return(0,l.kt)(p,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,l.kt)("h2",{id:"overview"},"Overview"),(0,l.kt)("p",null,"While the team tries to keep data sources up-to-date, you might want to start incorporate new annotations ahead of our update cycle. Another\ncommon use case involves protected health information (PHI). Custom annotations are a mechanism that enables both use cases."),(0,l.kt)("p",null,"Here are some examples of how our collaborators use custom annotations:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"associating context from both a patient-level and a patient cohort level with the variant annotations"),(0,l.kt)("li",{parentName:"ul"},"adding content that is licensed (e.g. HGMD) to the variant annotations")),(0,l.kt)("p",null,"At the moment, we have two different custom annotation file formats. One provides additional annotations to variants (both small variants and SVs)\nwhile the other caters to gene annotations."),(0,l.kt)("p",null,"In both cases, the custom annotation file format is a tab-delimited file that is separated into two parts: the header & the data."),(0,l.kt)("p",null,"The header is where you can customize how you want the data to appear in the JSON file and provide context about the genome assembly and how\nNirvana should match the variants."),(0,l.kt)("p",null,"At Illumina, there are usually many components downstream of Nirvana that have to parse our annotations. If a customer provides a custom\nannotation, those downstream tools need to understand more about the data such as:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"data type (e.g. number, boolean, or a string)"),(0,l.kt)("li",{parentName:"ul"},"data category (e.g. is this an allele count, allele number, allele frequency, etc.)"),(0,l.kt)("li",{parentName:"ul"},"associated population (i.e. if this is an allele frequency)")),(0,l.kt)("p",null,"For each custom annotation, Nirvana uses this context to create a ",(0,l.kt)("a",{parentName:"p",href:"https://json-schema.org/"},"JSON schema")," that can be sent to downstream tools. If\na tool knows that this is an allele frequency, it can validate user input to ensure that it's in the range of ","[0, 1]","."),(0,l.kt)("h2",{id:"variant-file-format"},"Variant File Format"),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"File Format")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Nirvana expects plain text (or gzipped text) files. Using tools like Excel can add extra characters that can break parsing. We highly recommend creating and modifying these files with plain text editor like Notepad, Notepad++ or Atom."))),(0,l.kt)("h3",{id:"basic-allele-frequency-example"},"Basic Allele Frequency Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Imagine that you want to create a basic allele frequency custom annotation for small variants. If we visualized the tab-delimited file\n(TSV), it would look something like this:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allAf")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleFrequency")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"number")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"23603511"),(0,l.kt)("td",{parentName:"tr",align:"left"},"TGA"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006579")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"68801894"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006569")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr19"),(0,l.kt)("td",{parentName:"tr",align:"left"},"11107436"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.00003291")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over the header and discuss the contents:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"title")," indicates the name of the JSON key"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"assembly")," indicates that this data is only valid for ",(0,l.kt)("inlineCode",{parentName:"li"},"GRCh38"),"."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"matchVariantsBy")," indicates how annotations should be matched and reported. In this case annotations will be matched and reported by allele."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"categories")," provides hints to downstream tools on how they might want to treat the data. In this case, we indicate that it's an allele frequency."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"descriptions")," are used in special circumstances to provide more context. Even though column 5 is called ",(0,l.kt)("inlineCode",{parentName:"li"},"allAf"),", it might not be clear to a\ndownstream tool that this means a global allele frequency using all sub-populations. In this case, ",(0,l.kt)("inlineCode",{parentName:"li"},"ALL")," indicates the intended population."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"type")," indicates to downstream tools the data type. Since allele frequencies are numbers, we'll write ",(0,l.kt)("inlineCode",{parentName:"li"},"number")," in this column.")),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Reference Base Checking")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Nirvana validates all the reference bases in a custom annotation. If a variant or genomic region is specified that has the wrong reference base, an error will be produced."))),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Sorting")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"The variants within each chromosome must be sorted by genomic position."))),(0,l.kt)("h4",{id:"convert-to-nirvana-format"},"Convert to Nirvana Format"),(0,l.kt)("p",null,"First we need to convert the TSV file to Nirvana's native file format and let's put that file in a new directory called CA:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"$ mkdir CA\n$ dotnet bin/Release/netcoreapp2.1/SAUtils.dll customvar \\\n -r Data/References/Homo_sapiens.GRCh38.Nirvana.dat -i MyDataSource.tsv -o CA\n---------------------------------------------------------------------------\nSAUtils (c) 2020 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0\n---------------------------------------------------------------------------\n\nChromosome 16 completed in 00:00:00.1\nChromosome 19 completed in 00:00:00.0\n\nTime: 00:00:00.2\n")),(0,l.kt)("h4",{id:"annotate-with-nirvana"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's annotate the following VCF (notice that it's one of the variants that we have in our custom annotation):"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n16 68801894 . G A . . .\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA.vcf"},"the full VCF file"),"."),(0,l.kt)("p",null,"Since Nirvana can handle multiple directories with external annotations, all we need to do is specify our new CA directory in addition to\nthe normal Nirvana command-line."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash",metastring:"{3}","{3}":!0},"$ dotnet bin/Release/netcoreapp2.1/Nirvana.dll -c Data/Cache/GRCh38/Both \\\n -r Data/References/Homo_sapiens.GRCh38.Nirvana.dat \\\n --sd Data/SupplementaryAnnotation/GRCh38 --sd CA -i TestCA.vcf -o TestCA\n---------------------------------------------------------------------------\nNirvana (c) 2020 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0\n---------------------------------------------------------------------------\n\nInitialization Time Positions/s\n---------------------------------------------------------------------------\nCache 00:00:01.8\nSA Position Scan 00:00:00.0 19\n\nReference Preload Annotation Variants/s\n---------------------------------------------------------------------------\nchr16 00:00:00.2 00:00:01.3 1\n\nSummary Time Percent\n---------------------------------------------------------------------------\nInitialization 00:00:01.9 25.5 %\nPreload 00:00:00.2 3.3 %\nAnnotation 00:00:01.3 18.2 %\n\nTime: 00:00:06.3\n")),(0,l.kt)("h4",{id:"investigate-the-results"},"Investigate the Results"),(0,l.kt)("p",null,"We would expect the following data to show up in our JSON output file:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-16}","{12-16}":!0},' "variants": [\n {\n "vid": "16-68801894-G-A",\n "chromosome": "16",\n "begin": 68801894,\n "end": 68801894,\n "refAllele": "G",\n "altAllele": "A",\n "variantType": "SNV",\n "hgvsg": "NC_000016.10:g.68801894G>A",\n "phylopScore": 1,\n "MyDataSource": {\n "refAllele": "G",\n "altAllele": "A",\n "allAf": 7e-06\n },\n "clinvar": [\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA.json.gz"},"the full JSON file"),"."),(0,l.kt)("p",null,"Nirvana preserves up to 6 decimal places for allele frequency data."),(0,l.kt)("h3",{id:"categories--descriptions-example"},"Categories & Descriptions Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-1"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Building on the previous example, we can add other types of annotations like predictions and general notes."),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 6"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 7"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,l.kt)("td",{parentName:"tr",align:"left"},"pathogenicity"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleFrequency"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Prediction"),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"number"),(0,l.kt)("td",{parentName:"tr",align:"left"},"string"),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"23603511"),(0,l.kt)("td",{parentName:"tr",align:"left"},"TGA"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006579"),(0,l.kt)("td",{parentName:"tr",align:"left"},"P"),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"68801894"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006569"),(0,l.kt)("td",{parentName:"tr",align:"left"},"LP"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Seen in case 123")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr19"),(0,l.kt)("td",{parentName:"tr",align:"left"},"11107436"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.00003291"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource2.tsv"},"the full TSV file"),"."),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Placeholders")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"You can use a period to denote an empty value (much in the same way as periods are used in VCF files to signify missing values). While\nNirvana also accepts empty columns in the TSV file, we use them in these examples to promote readability."))),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 6")," adds a field called ",(0,l.kt)("inlineCode",{parentName:"li"},"pathogenicity")," which uses the ",(0,l.kt)("inlineCode",{parentName:"li"},"Prediction")," category. When using this category, Nirvana will\nvalidate to make\nsure that the field contains either the abbreviations (B, LB, VUS, LP, and P) or the long-form equivalents (e.g. benign or pathogenic)."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 7")," adds a field called ",(0,l.kt)("inlineCode",{parentName:"li"},"notes")," and it doesn't have a category or description. We're just going to use it to add some internal\nnotes.")),(0,l.kt)("h4",{id:"annotate-with-nirvana-1"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's use a new VCF file. It includes all the same positions as our custom annotation file, but only the middle variant also matches the\nalternate allele (allele-specific match):"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n16 23603511 . TG T . . .\n16 68801894 . G A . . .\n19 11107436 . G C . . .\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA2.vcf"},"the full VCF file"),"."),(0,l.kt)("h4",{id:"investigate-the-results-1"},"Investigate the Results"),(0,l.kt)("p",null,"Because we specified ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=allele")," in our custom annotation file, only the middle variant will get an annotation:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-18}","{12-18}":!0},' "variants": [\n {\n "vid": "16-68801894-G-A",\n "chromosome": "16",\n "begin": 68801894,\n "end": 68801894,\n "refAllele": "G",\n "altAllele": "A",\n "variantType": "SNV",\n "hgvsg": "NC_000016.10:g.68801894G>A",\n "phylopScore": 1,\n "MyDataSource": {\n "refAllele": "G",\n "altAllele": "A",\n "allAf": 7e-06,\n "pathogenicity": "LP",\n "notes": "Seen in case 123"\n },\n "clinvar": [\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA2.json.gz"},"the full JSON file"),"."),(0,l.kt)("h4",{id:"using-positional-matches"},"Using Positional Matches"),(0,l.kt)("p",null,"What would happen if we changed to ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=position"),"? Two things will happen. First, our positional variants will now match:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-17}","{12-17}":!0},' "variants": [\n {\n "vid": "16-23603511-TG-T",\n "chromosome": "16",\n "begin": 23603512,\n "end": 23603512,\n "refAllele": "G",\n "altAllele": "-",\n "variantType": "deletion",\n "hgvsg": "NC_000016.10:g.23603512delG",\n "MyDataSource": [\n {\n "refAllele": "GA",\n "altAllele": "-",\n "allAf": 7e-06,\n "pathogenicity": "P"\n }\n ],\n "clinvar": [\n')),(0,l.kt)("p",null,"In addition, you will now see an extra flag for our allele-specific variant:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-20}","{12-20}":!0},' "variants": [\n {\n "vid": "16-68801894-G-A",\n "chromosome": "16",\n "begin": 68801894,\n "end": 68801894,\n "refAllele": "G",\n "altAllele": "A",\n "variantType": "SNV",\n "hgvsg": "NC_000016.10:g.68801894G>A",\n "phylopScore": 1,\n "MyDataSource": [\n {\n "refAllele": "G",\n "altAllele": "A",\n "allAf": 7e-06,\n "pathogenicity": "LP",\n "notes": "Seen in case 123",\n "isAlleleSpecific": true\n }\n ],\n "clinvar": [\n')),(0,l.kt)("h3",{id:"genomic-region-example"},"Genomic Region Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-2"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"In the previous example, we added a note for the middle variant, but sometimes it's handy to annotate a genomic region. Consider the following example:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"END"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"20000000"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"70000000"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Lots of false positives in this region")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource3.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 5")," now has a field called ",(0,l.kt)("inlineCode",{parentName:"li"},"notes"),". In essence, it looks exactly like column 7 from our previous example."),(0,l.kt)("li",{parentName:"ul"},"The main difference is that now one of our custom annotation entries is actually a genomic region. Any variant that overlaps with that region will get a custom annotation.")),(0,l.kt)("p",null,"In the previous example we learned about positional matching vs allele-specific matching. For genomic regions, ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=allele")," and ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=position")," produce\nthe same result."),(0,l.kt)("h4",{id:"annotate-with-nirvana-2"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's use the same VCF file as our previous example."),(0,l.kt)("h4",{id:"investigate-the-results-2"},"Investigate the Results"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{9-17}","{9-17}":!0},' {\n "chromosome": "16",\n "position": 23603511,\n "refAllele": "TG",\n "altAlleles": [\n "T"\n ],\n "cytogeneticBand": "16p12.2",\n "MyDataSource": [\n {\n "start": 20000000,\n "end": 70000000,\n "notes": "Lots of false positives in this region",\n "reciprocalOverlap": 0,\n "annotationOverlap": 0\n }\n ],\n "variants": [\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA3.json.gz"},"the full JSON file"),"."),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Reciprocal & Annotation Overlap")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"For all intervals, Nirvana internally calculates two overlaps: a ",(0,l.kt)("strong",{parentName:"p"},"variant overlap")," and an ",(0,l.kt)("strong",{parentName:"p"},"annotation overlap"),". Variant overlap is the percentage of the variant's length that is\noverlapped. Annotation overlap is the percentage of the annotation's length that is overlap."),(0,l.kt)("p",{parentName:"div"},(0,l.kt)("strong",{parentName:"p"},"Reciprocal overlap")," is the minimum of those two overlaps. Given that the annotation is 50 Mbp and the deletion is one 1 bp, both overlaps will be pretty close to 0."))),(0,l.kt)("p",null,"We will also see this annotation for the other variant on chr16:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{9-17}","{9-17}":!0},' {\n "chromosome": "16",\n "position": 68801894,\n "refAllele": "G",\n "altAlleles": [\n "A"\n ],\n "cytogeneticBand": "16q22.1",\n "MyDataSource": [\n {\n "start": 20000000,\n "end": 70000000,\n "notes": "Lots of false positives in this region",\n "reciprocalOverlap": 0,\n "annotationOverlap": 0\n }\n ],\n "variants": [\n')),(0,l.kt)("h3",{id:"genomic-regions-for-structural-variants-example"},"Genomic Regions for Structural Variants Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-3"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Often we use genomic regions to represent other known CNVs and SVs in the genome. In this use case, we usually don't want to match these regions to other small variants. To force Nirvana to match regions only to other SVs, use the ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=sv")," option in the header. Here is an example:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=sv"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"END"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"20000000"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"70000000"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Lots of false positives in this region")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource6.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"The main difference is the header field ",(0,l.kt)("inlineCode",{parentName:"li"},"#matchVariantsBy=sv")," which indicates that only structural variants that overlap these genomic regions will receive annotations.")),(0,l.kt)("h4",{id:"annotate-with-nirvana-3"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's use a new VCF file. It contains the first variant from the previous file and a structural variant deletion- both of which overlap the given genomic region."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n16 23603511 . TG T . . .\n16 68801894 . G . . END=73683789;SVTYPE=DEL\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA6.vcf"},"the full VCF file"),"."),(0,l.kt)("h4",{id:"investigate-the-results-3"},"Investigate the Results"),(0,l.kt)("p",null,"Note that this time, ",(0,l.kt)("inlineCode",{parentName:"p"},"MyDataSource")," only showed up for the ",(0,l.kt)("inlineCode",{parentName:"p"},"")," and not the deletion ",(0,l.kt)("inlineCode",{parentName:"p"},"16-23603511-TG-T"),"."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{21-29}","{21-29}":!0},' {\n "chromosome": "16",\n "position": 23603511,\n "refAllele": "TG",\n "altAlleles": [\n "T"\n ],\n "cytogeneticBand": "16p12.2",\n "variants": [\n ...\n ...\n {\n "chromosome": "16",\n "position": 68801894,\n "svEnd": 73683789,\n "refAllele": "G",\n "altAlleles": [\n ""\n ],\n "cytogeneticBand": "16q22.1-q22.3",\n "MyDataSource": [\n {\n "start": 20000000,\n "end": 70000000,\n "notes": "Lots of false positives in this region",\n "reciprocalOverlap": 0.02396,\n "annotationOverlap": 0.02396\n }\n ],\n "variants": [\n\n')),(0,l.kt)("h3",{id:"mixing-small-variants-and-genomic-regions"},"Mixing Small Variants and Genomic Regions"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-4"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Previously we looked at examples that either had small variants or genomic regions. Let's create a file that contains both:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 6"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"END"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"23603511"),(0,l.kt)("td",{parentName:"tr",align:"left"},"TGA"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"68801894"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr19"),(0,l.kt)("td",{parentName:"tr",align:"left"},"11107436"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr21"),(0,l.kt)("td",{parentName:"tr",align:"left"},"10510818"),(0,l.kt)("td",{parentName:"tr",align:"left"},"C"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"10699435"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Interval #1")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr21"),(0,l.kt)("td",{parentName:"tr",align:"left"},"10510818"),(0,l.kt)("td",{parentName:"tr",align:"left"},"C"),(0,l.kt)("td",{parentName:"tr",align:"left"},"<","DEL",">"),(0,l.kt)("td",{parentName:"tr",align:"left"},"10699435"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Interval #2")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr22"),(0,l.kt)("td",{parentName:"tr",align:"left"},"12370388"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T[chr22:12370729["),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"Known false-positive")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource4.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 4")," now has the ",(0,l.kt)("inlineCode",{parentName:"li"},"REF")," field. Exception for the case listed below, this is only used by small variants or translocation breakends."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 5")," now has the ",(0,l.kt)("inlineCode",{parentName:"li"},"END")," field. This is only used by genomic regions."),(0,l.kt)("li",{parentName:"ul"},"There are two custom annotations on chr21 and the start and end coordinates look the same, so what's different? Interval #2 has ",(0,l.kt)("strong",{parentName:"li"},"a symbolic allele in the ALT column"),". When this is used in custom annotation, the start position is treated as the padding base (using VCF conventions). When Nirvana matches a variant to interval #2, it will ignore the padding base and consider the start position to be at position 10510819.")),(0,l.kt)("h4",{id:"annotate-with-nirvana-4"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's use a new VCF file to study how matching works for intervals #1 and #2:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n21 10510818 . C . . END=10699435;SVTYPE=DUP\n22 12370388 . T T[chr22:12370729[ . . SVTYPE=BND\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA3.vcf"},"the full VCF file"),"."),(0,l.kt)("p",null,'The first variant is similar to the custom annotation labelled "interval #2". Position 10510818 is the padding base, so it effectively starts at position 10510819.'),(0,l.kt)("h4",{id:"investigate-the-results-4"},"Investigate the Results"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{11-26}","{11-26}":!0},' "positions": [\n {\n "chromosome": "21",\n "position": 10510818,\n "svEnd": 10699435,\n "refAllele": "C",\n "altAlleles": [\n ""\n ],\n "cytogeneticBand": "21p11.2",\n "MyDataSource": [\n {\n "start": 10510818,\n "end": 10699435,\n "notes": "Interval #1",\n "reciprocalOverlap": 0.99999,\n "annotationOverlap": 0.99999\n },\n {\n "start": 10510819,\n "end": 10699435,\n "notes": "Interval #2",\n "reciprocalOverlap": 1,\n "annotationOverlap": 1\n }\n ],\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA4.json.gz"},"the full JSON file"),"."),(0,l.kt)("p",null,"As expected, the variant and interval #2 have matching endpoints, therefore there is 100% overlap. Interval #1 technically starts 1 bp earlier, so its overlap 99.9%."),(0,l.kt)("p",null,"Further down the JSON file, we find the annotated translocation breakend:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{11-15}","{11-15}":!0},' "variants": [\n {\n "vid": "22-12370388-T-T[chr22:12370729[",\n "chromosome": "22",\n "begin": 12370388,\n "end": 12370388,\n "isStructuralVariant": true,\n "refAllele": "T",\n "altAllele": "T[chr22:12370729[",\n "variantType": "translocation_breakend",\n "MyDataSource": {\n "refAllele": "T",\n "altAllele": "T[chr22:12370729[",\n "notes": "Known false-positive"\n }\n }\n')),(0,l.kt)("h2",{id:"gene-file-format"},"Gene File Format"),(0,l.kt)("h3",{id:"basic-gene-example"},"Basic Gene Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-5"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Previously we looked at examples that either had small variants or genomic regions, however, sometimes we would like to add custom gene annotations. The gene custom annotation file format\nlooks slightly different:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#geneSymbol"),(0,l.kt)("td",{parentName:"tr",align:"left"},"geneId"),(0,l.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string"),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"TP53"),(0,l.kt)("td",{parentName:"tr",align:"left"},"7157"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Colorectal cancer, hereditary nonpolyposis, type 5"),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"KRAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ENSG00000133703"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mismatch repair cancer syndrome"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Seen in cohort 123")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/MyDataSource5.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 2")," has the ",(0,l.kt)("inlineCode",{parentName:"li"},"geneId")," field. This can be either an ",(0,l.kt)("strong",{parentName:"li"},"Entrez Gene ID")," or an ",(0,l.kt)("strong",{parentName:"li"},"Ensembl ID"),".")),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Gene Symbols")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Gene symbols are always in flux and are being updated on a daily basis at the NCBI and at HGNC. Due to this, Nirvana uses the ",(0,l.kt)("inlineCode",{parentName:"p"},"geneId")," to match genes rather than the gene symbol. However, to\nmake the custom annotation files easier to read, we've included the ",(0,l.kt)("inlineCode",{parentName:"p"},"geneSymbol")," column as well."))),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Unknown Gene IDs")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"When Nirvana parses the gene custom annotation file, it will note any gene IDs that are currently not recognized in Nirvana. In such a case, Nirvana will display an error showing all the\nunrecognized gene IDs."))),(0,l.kt)("h4",{id:"annotate-with-nirvana-5"},"Annotate with Nirvana"),(0,l.kt)("p",null,"Let's use a VCF file that contain variants in TP53 and KRAS:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n12 25227255 . A T . . .\n17 7675074 . C A . . .\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA4.vcf"},"the full VCF file"),"."),(0,l.kt)("h4",{id:"investigate-the-results-5"},"Investigate the Results"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{24-27}","{24-27}":!0},' "genes": [\n {\n "name": "KRAS",\n "clingenGeneValidity": [\n {\n "diseaseId": "MONDO_0009026",\n "disease": "Costello syndrome",\n "classification": "disputed",\n "classificationDate": "2018-07-24"\n }\n ],\n "clingenDosageSensitivityMap": {\n "haploinsufficiency": "no evidence to suggest that dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "no evidence to suggest that dosage sensitivity is associated with clinical phenotype"\n },\n "gnomAD": {\n "pLi": 0.000788,\n "pRec": 0.789,\n "pNull": 0.21,\n "synZ": 0.336,\n "misZ": 2.32,\n "loeuf": 1.24\n },\n "MyDataSource": {\n "phenotype": "Mismatch repair cancer syndrome",\n "notes": "Seen in cohort 123"\n }\n },\n')),(0,l.kt)("p",null,"This is the abbreviated output for KRAS. Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestCA5.json.gz"},"the full JSON file")," if you want to see the complete KRAS entry."),(0,l.kt)("h2",{id:"customizing-the-header"},"Customizing the Header"),(0,l.kt)("h3",{id:"title"},"Title"),(0,l.kt)("p",null,"For the title, you can provide any string that hasn't already been used. The title should be unique."),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Make sure that the title does not conflict with other keys in the JSON file."))),(0,l.kt)("p",null,"For small variants, you can't provide a title that conflicts with other keys in the variant object. Some examples of this would be\n",(0,l.kt)("inlineCode",{parentName:"p"},"vid"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"chromosome"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"transcripts"),", etc.. The title should also not conflict with other data source keys like ",(0,l.kt)("inlineCode",{parentName:"p"},"clinvar")," or ",(0,l.kt)("inlineCode",{parentName:"p"},"gnomad"),"."),(0,l.kt)("p",null,"For structural variants, you can't provide a title that conflicts with other keys in the position object. Some examples of this would be\n",(0,l.kt)("inlineCode",{parentName:"p"},"chromosome"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"svLength"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"cytogeneticBand"),", etc. The title should also not conflict with other data source keys like ",(0,l.kt)("inlineCode",{parentName:"p"},"clingen")," or ",(0,l.kt)("inlineCode",{parentName:"p"},"dgv"),"."),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Care should be taken not to annotate using multiple custom annotations that all use the same title."))),(0,l.kt)("h3",{id:"genome-assemblies"},"Genome Assemblies"),(0,l.kt)("p",null,"The following genome assemblies can be specified:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"GRCh37"),(0,l.kt)("li",{parentName:"ul"},"GRCh38")),(0,l.kt)("h3",{id:"matching-criteria"},"Matching Criteria"),(0,l.kt)("p",null,"The matching criteria instructs how Nirvana should match a VCF variant to the custom annotation."),(0,l.kt)("p",null,"The following matching criteria can be specified:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"allele")," - use this when you only want allele-specific matches. This is commonly the case when using allele frequency data sources like ",(0,l.kt)("inlineCode",{parentName:"li"},"gnomAD")),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"position")," - use this when you want positional matches. This is commonly used with disease phenotype data sources like ",(0,l.kt)("inlineCode",{parentName:"li"},"ClinVar")),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"sv")," - use this when you want to match to all other overlapping SVs. This use case arose when we were adding custom annotations for baseline\ncopy number intervals along the genome.")),(0,l.kt)("h3",{id:"categories"},"Categories"),(0,l.kt)("p",null,"Categories are not used by Nirvana, but are often used by downstream tools. Categories provide hints for how those tools should filter or display\nthe annotation data."),(0,l.kt)("p",null,"When a category is specified, Nirvana will provide additional validation for those fields. The following table describes each category:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Category"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Description"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Validation"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleCount"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allele counts for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleNumber"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allele numbers for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleFrequency"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allele frequencies for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Prediction"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ACMG-style pathogenicity classifications"),(0,l.kt)("td",{parentName:"tr",align:"left"},"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"benign")," (B)",(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"likely benign")," (LB)",(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"VUS"),(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"likely pathogenic")," (LP)",(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"pathogenic")," (P)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Filter"),(0,l.kt)("td",{parentName:"tr",align:"left"},"free text that signals downstream tools to add the column to the filter"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Max 20 characters")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Description"),(0,l.kt)("td",{parentName:"tr",align:"left"},"free-text description"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Max 100 characters")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Identifier"),(0,l.kt)("td",{parentName:"tr",align:"left"},"any ID"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Max 50 characters")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"HomozygousCount"),(0,l.kt)("td",{parentName:"tr",align:"left"},"count of homozygous individuals for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Score"),(0,l.kt)("td",{parentName:"tr",align:"left"},"any score value"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Any double-precision floating point number")))),(0,l.kt)("h3",{id:"descriptions"},"Descriptions"),(0,l.kt)("p",null,"Descriptions are used to add more context to the categories. For now, descriptions are mainly used to associate allele counts, numbers, and frequencies with their respective populations."),(0,l.kt)("h4",{id:"populations"},"Populations"),(0,l.kt)("p",null,"The following populations were specified in the HapMap project, 1000 Genomes Project, ExAC, and gnomAD."),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Population Code"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Super-population Code"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Description"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ACB"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"African Caribbeans in Barbados")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"African")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"All populations")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Ad Mixed American")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ASJ"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"},"Ashkenazi Jewish")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ASW"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Americans of African Ancestry in SW USA")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"BEB"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Bengali from Bangladesh")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CDX"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Chinese Dai in Xishuangbanna, China")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CEU"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Utah Residents (CEPH) with Northern and Western European Ancestry")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CHB"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Han Chinese in Beijing, China")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CHS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Southern Han Chinese")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CLM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Colombians from Medellin, Colombia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"East Asian")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ESN"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Esan in Nigeria")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"European")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"FIN"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Finnish in Finland")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"GBR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"British in England and Scotland")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"GIH"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Gujarati Indian from Houston, Texas")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"GWD"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Gambian in Western Divisions in the Gambia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"IBS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Iberian population in Spain")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ITU"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Indian Telugu from the UK")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"JPT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Japanese in Tokyo, Japan")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"KHV"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Kinh in Ho Chi Minh City, Vietnam")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"LWK"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Luhya in Webuye, Kenya")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MAG"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mandinka in the Gambia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MKK"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Maasai in Kinyawa, Kenya")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MSL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mende in Sierra Leone")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MXL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mexican Ancestry from Los Angeles, USA")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"NFE"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"European (Non-Finnish)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"OTH"),(0,l.kt)("td",{parentName:"tr",align:"left"},"OTH"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Other")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"PEL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Peruvians from Lima, Peru")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"PJL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Punjabi from Lahore, Pakistan")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"PUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Puerto Ricans from Puerto Rico")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"South Asian")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"STU"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Sri Lankan Tamil from the UK")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"TSI"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Toscani in Italia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"YRI"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Yoruba in Ibadan, Nigeria")))),(0,l.kt)("h3",{id:"data-types"},"Data Types"),(0,l.kt)("p",null,"Each custom annotation can be one of the following data types:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"bool")," - true or false"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"number")," - any integer or floating-point number"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"string")," - text")),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"For boolean variables, only keys with a ",(0,l.kt)("inlineCode",{parentName:"p"},"true")," value will be output to the JSON object."))),(0,l.kt)("h2",{id:"using-sautils"},"Using SAUtils"),(0,l.kt)("p",null,"Nirvana includes a tool called ",(0,l.kt)("inlineCode",{parentName:"p"},"SAUtils")," that converts various data sources into Nirvana's native binary format. The sub-commands ",(0,l.kt)("inlineCode",{parentName:"p"},"customvar")," and ",(0,l.kt)("inlineCode",{parentName:"p"},"customgene")," are used to specify a variant file or a gene file respectively."),(0,l.kt)("h3",{id:"convert-variant-file"},"Convert Variant File"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/SAUtils.dll customvar \\\n -r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n -i MyDataSource.tsv \\\n -o SupplementaryAnnotation\n")),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-r")," argument specifies the compressed reference path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input TSV path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output directory")),(0,l.kt)("h3",{id:"convert-gene-file"},"Convert Gene File"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/SAUtils.dll customgene \\\n -r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n -c Data/Cache \\\n -i MyDataSource.tsv \\\n -o SupplementaryAnnotation\n")),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-c")," argument specifies the Nirvana cache path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input TSV path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output directory")))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/b6f92e46.cdad25a5.js b/assets/js/b6f92e46.cdad25a5.js deleted file mode 100644 index 80c86e0e..00000000 --- a/assets/js/b6f92e46.cdad25a5.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2167,9491],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>h});var o=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function a(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);t&&(o=o.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,o)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);for(o=0;o=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=o.createContext({}),p=function(e){var t=o.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=p(e.components);return o.createElement(s.Provider,{value:t},e.children)},d="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return o.createElement(o.Fragment,{},t)}},m=o.forwardRef((function(e,t){var n=e.components,r=e.mdxType,a=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),d=p(n),m=r,h=d["".concat(s,".").concat(m)]||d[m]||u[m]||a;return n?o.createElement(h,i(i({ref:t},c),{},{components:n})):o.createElement(h,i({ref:t},c))}));function h(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var a=n.length,i=new Array(a);i[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[d]="string"==typeof e?e:r,i[1]=l;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>d,frontMatter:()=>a,metadata:()=>l,toc:()=>s});var o=n(87462),r=(n(67294),n(3905));const a={},i=void 0,l={unversionedId:"data-sources/phylop-json",id:"version-3.18/data-sources/phylop-json",title:"phylop-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/phylop-json.md",sourceDirName:"data-sources",slug:"/data-sources/phylop-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/phylop-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/phylop-json.md",tags:[],version:"3.18",frontMatter:{}},s=[],p={toc:s},c="wrapper";function d(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,o.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"variants":[\n {\n "vid":"2:48010488:A",\n "chromosome":"chr2",\n "begin":48010488,\n "end":48010488,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "phylopScore":0.459\n }\n] \n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phylopScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: -14.08 to 6.424")))))}d.isMDXComponent=!0},64383:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>i,metadata:()=>s,toc:()=>p});var o=n(87462),r=(n(67294),n(3905)),a=n(22571);const i={title:"PhyloP"},l=void 0,s={unversionedId:"data-sources/phylop",id:"version-3.18/data-sources/phylop",title:"PhyloP",description:"Overview",source:"@site/versioned_docs/version-3.18/data-sources/phylop.mdx",sourceDirName:"data-sources",slug:"/data-sources/phylop",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/phylop",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/phylop.mdx",tags:[],version:"3.18",frontMatter:{title:"PhyloP"},sidebar:"docs",previous:{title:"OMIM",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/omim"},next:{title:"Primate AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/primate-ai"}},p=[{value:"Overview",id:"overview",children:[],level:2},{value:"WigFix File",id:"wigfix-file",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:p},d="wrapper";function u(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,o.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"PhyloP (phylogenetic p-values) conservation scores are obtained from the ","[PHAST package]"," (",(0,r.kt)("a",{parentName:"p",href:"http://compgen.bscb.cornell.edu/phast/"},"http://compgen.bscb.cornell.edu/phast/"),") for multiple alignments of vertebrate genomes to the human genome. For GRCh38, the multiple alignments are against 19 mammals and for GRCh37, it is against 45 vertebrate genomes."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Siepel A, Bejerano G, Pedersen JS, Hinrichs AS, Hou M, Rosenbloom K, Clawson H, Spieth J, Hillier LW, Richards S, et al. Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. ",(0,r.kt)("strong",{parentName:"p"},"Genome Res. 2005")," Aug;15(8):1034-50. (",(0,r.kt)("a",{parentName:"p",href:"http://www.genome.org/cgi/doi/10.1101/gr.3715005"},"http://www.genome.org/cgi/doi/10.1101/gr.3715005"),")"))),(0,r.kt)("h2",{id:"wigfix-file"},"WigFix File"),(0,r.kt)("p",null,"The data is provided in WigFix files which is a text file that provides conservation scores for contiguous intervals in the following format:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"fixedStep chrom=chr1 start=10918 step=1\n0.064\n0.058\n0.064\n0.058\n0.064\n0.064\nfixedStep chrom=chr1 start=34045 step=1\n0.111\n0.100\n0.111\n0.111\n0.100\n0.111\n0.111\n0.111\n0.100\n0.111\n-1.636\n")),(0,r.kt)("p",null,"We convert them to binary files with indexes for fast query. Note that these are scores for genomic positions and are reported only for SNVs."),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,"GRCh37: ",(0,r.kt)("a",{parentName:"p",href:"http://hgdownload.cse.ucsc.edu/goldenpath/hg19/phyloP46way/vertebrate/"},"http://hgdownload.cse.ucsc.edu/goldenpath/hg19/phyloP46way/vertebrate/")),(0,r.kt)("p",null,"GRCh38: ",(0,r.kt)("a",{parentName:"p",href:"http://hgdownload.cse.ucsc.edu/goldenPath/hg38/phyloP20way/"},"http://hgdownload.cse.ucsc.edu/goldenPath/hg38/phyloP20way/")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)("p",null,"Unlike other supplemetary datasources, phyloP scores are reported in the variants section."),(0,r.kt)(a.default,{mdxType:"JSON"}))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/b9e7e906.bf6253bd.js b/assets/js/b9e7e906.bf6253bd.js deleted file mode 100644 index a334ef78..00000000 --- a/assets/js/b9e7e906.bf6253bd.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1643],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>u});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},p=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},m="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},h=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),m=d(n),h=i,u=m["".concat(s,".").concat(h)]||m[h]||c[h]||r;return n?a.createElement(u,o(o({ref:t},p),{},{components:n})):a.createElement(u,o({ref:t},p))}));function u(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,o=new Array(r);o[0]=h;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[m]="string"==typeof e?e:i,o[1]=l;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>m,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={title:"Mitochondrial Heteroplasmy"},o=void 0,l={unversionedId:"data-sources/mito-heteroplasmy",id:"version-3.16/data-sources/mito-heteroplasmy",title:"Mitochondrial Heteroplasmy",description:"Overview",source:"@site/versioned_docs/version-3.16/data-sources/mito-heteroplasmy.md",sourceDirName:"data-sources",slug:"/data-sources/mito-heteroplasmy",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/mito-heteroplasmy",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/mito-heteroplasmy.md",tags:[],version:"3.16",frontMatter:{title:"Mitochondrial Heteroplasmy"},sidebar:"version-3.16/docs",previous:{title:"gnomAD",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/gnomad"},next:{title:"MITOMAP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/mitomap"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"JSON File",id:"json-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Binning VRF Data",id:"binning-vrf-data",children:[],level:4},{value:"Pre-processing the Data",id:"pre-processing-the-data",children:[],level:4},{value:"Algorithm",id:"algorithm",children:[],level:4}],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],d={toc:s},p="wrapper";function m(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"Mitochondrial Heteroplasmy is an aggregate population data set that characterizes the amount of heteroplasmy observed for each variant. The latest version of this data set is based on re-processed 1000 Genomes Project data using the Illumina DRAGEN pipeline."),(0,i.kt)("h2",{id:"json-file"},"JSON File"),(0,i.kt)("h3",{id:"example"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{\n "T:C":{\n "ad":[\n 1,\n 1,\n 1,\n 1,\n 1,\n 1\n ],\n "allele_type":"alt",\n "vrf":[\n 0.002369668246445498,\n 0.0024937655860349127,\n 0.0016129032258064516,\n 0.0025188916876574307,\n 0.0022935779816513763,\n 0.002008032128514056\n ],\n "vrf_stats":{\n "kurtosis":38.889891511122556,\n "max":0.0025188916876574307,\n "mean":5.4052190471990743e-05,\n "min":0.0,\n "nobs":246,\n "skewness":6.346664692283075,\n "stdev":0.0003461416264750575,\n "variance":1.1981402557879823e-07\n }\n }\n}\n\n')),(0,i.kt)("h3",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"From the JSON file, we're mainly interested in the following keys:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"variant")," (i.e. ",(0,i.kt)("inlineCode",{parentName:"li"},"T:C"),")"),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"ad")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"vrf")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("inlineCode",{parentName:"li"},"nobs")," (number of observations)")),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Adjusting for null observations")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The ",(0,i.kt)("inlineCode",{parentName:"p"},"nobs")," value indicates how many observations were made. Ideally this would have been represented in the ",(0,i.kt)("inlineCode",{parentName:"p"},"ad")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"vrf")," arrays, but it's left as an exercise for the reader."))),(0,i.kt)("h4",{id:"binning-vrf-data"},"Binning VRF Data"),(0,i.kt)("p",null,"The ",(0,i.kt)("inlineCode",{parentName:"p"},"vrf")," (variant read frequency) array in the JSON object above is paired with with the ",(0,i.kt)("inlineCode",{parentName:"p"},"ad")," array (allele depths) shown above."),(0,i.kt)("p",null,"The data in the JSON object has a crazy number of significant digits. This means that as the number of samples increase, this array will grow. To make this more future-proof, Nirvana bins everything according to 0.1% increments."),(0,i.kt)("p",null,"With the binned data, we end up having 775 distinct ",(0,i.kt)("inlineCode",{parentName:"p"},"vrf")," values in the entire JSON file. This also means that the variant with the largest number of VRFs would originally have 246 entries, but due to binning this will decrease to 143."),(0,i.kt)("h4",{id:"pre-processing-the-data"},"Pre-processing the Data"),(0,i.kt)("p",null,"The JSON file is converted into a small TSV file that is ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/Illumina/Nirvana/blob/main/MitoHeteroplasmy/Resources/MitoHeteroplasmy.tsv.gz"},"embedded in Nirvana"),". Here is an example of the TSV file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS REF ALT VRF_BINS VRF_COUNTS\nchrM 1 G . 0.981,0.987,0.988,0.989,0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.998,0.999 1,2,2,4,7,8,11,19,43,60,48,64,499,1736\nchrM 2 A . 0.981,0.987,0.988,0.989,0.99,0.991,0.992,0.993,0.994,0.995,0.996,0.997,0.998,0.999 1,2,2,4,7,8,11,19,43,60,48,64,499,1736\n")),(0,i.kt)("h4",{id:"algorithm"},"Algorithm"),(0,i.kt)("p",null,"Nirvana will calculate mitochondrial heteroplasmy data for every sample in the VCF. Using the computed VRF for each sample, we compute where in the empirical mitochondrial heteroplasmy distribution that VRF occurs and express that as a percentile."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Percentiles")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Nirvana uses the ",(0,i.kt)("a",{parentName:"p",href:"https://en.wikipedia.org/wiki/Percentile"},"statistical definition of percentile")," (indicating the value below which a given percentage of observations in a group of observations falls). Unless the sample's VRF is higher than all the VRFs represented in the distribution, the range will be [0, 1)."))),(0,i.kt)("h2",{id:"download-url"},"Download URL"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Unavailable")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The original data set is only available internally at Illumina at the moment."))),(0,i.kt)("h2",{id:"json-output"},"JSON Output"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{14-17}","{14-17}":!0},'"samples":[\n {\n "genotype":"0/1",\n "variantFrequencies":[\n 0.333,\n 0.5\n ],\n ],\n "alleleDepths":[\n 10,\n 20,\n 30\n ],\n "heteroplasmyPercentile":[\n 23.13,\n 12.65\n ]\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"heteroplasmyPercentile"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"one percentile for each variant frequency (each alternate allele)")))))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/ba2982bf.8adaf04d.js b/assets/js/ba2982bf.8adaf04d.js new file mode 100644 index 00000000..f89679fd --- /dev/null +++ b/assets/js/ba2982bf.8adaf04d.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2038,4899],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>D});var a=n(7294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var p=a.createContext({}),s=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},c=function(e){var t=s(e.components);return a.createElement(p.Provider,{value:t},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,p=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),d=s(n),u=r,D=d["".concat(p,".").concat(u)]||d[u]||m[u]||i;return n?a.createElement(D,o(o({ref:t},c),{},{components:n})):a.createElement(D,o({ref:t},c))}));function D(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var p in t)hasOwnProperty.call(t,p)&&(l[p]=t[p]);l.originalType=e,l[d]="string"==typeof e?e:r,o[1]=l;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>l,toc:()=>p});var a=n(7462),r=(n(7294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/splice-ai-json",id:"data-sources/splice-ai-json",title:"splice-ai-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/splice-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/splice-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/splice-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/splice-ai-json.md",tags:[],version:"current",frontMatter:{}},p=[],s={toc:p},c="wrapper";function d(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"spliceAI":[ \n {\n "hgnc":"BLCAP",\n "acceptorGainDistance":-3,\n "acceptorGainScore":0.3,\n "donorLossDistance":7,\n "donorLossScore":0.9\n },\n { \n "hgnc":"NNAT",\n "acceptorGainDistance":-1,\n "acceptorGainScore":0.2,\n "donorGainDistance":-2,\n "donorGainScore":0.3\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")))))}d.isMDXComponent=!0},8295:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>o,metadata:()=>p,toc:()=>s});var a=n(7462),r=(n(7294),n(3905)),i=n(9838);const o={title:"Splice AI"},l=void 0,p={unversionedId:"data-sources/splice-ai",id:"data-sources/splice-ai",title:"Splice AI",description:"Overview",source:"@site/docs/data-sources/splice-ai.mdx",sourceDirName:"data-sources",slug:"/data-sources/splice-ai",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/splice-ai",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/splice-ai.mdx",tags:[],version:"current",frontMatter:{title:"Splice AI"},sidebar:"docs",previous:{title:"REVEL",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/revel"},next:{title:"TOPMed",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/topmed"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"VCF File",id:"vcf-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Pre-processing",id:"pre-processing",children:[{value:"Filtering",id:"filtering",children:[],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:s},d="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"SpliceAI, a 32-layer deep neural network, predicts splicing from a pre-mRNA sequence."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"K. Jaganathan, et al. Predicting splicing from primary sequence with deep learning. ",(0,r.kt)("em",{parentName:"p"},"Cell"),", ",(0,r.kt)("strong",{parentName:"p"},"176")," (3) (2019), pp. 535-548 e24"))),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Professional data source")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"This is a Professional data source and is not available freely. Please contact ",(0,r.kt)("a",{parentName:"p",href:"mailto:annotation_support@illumina.com"},"annotation_support@illumina.com")," if you would like to obtain it."))),(0,r.kt)("h2",{id:"vcf-file"},"VCF File"),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},'##fileformat=VCFv4.0\n##assembly=GRCh37/hg19\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n#CHROM POS ID REF ALT QUAL FILTER INFO\n10 92946 . C T . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0000;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=-26;DP_AL=-10;DP_DG=3;DP_DL=35\n10 92946 . C G . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0008;DS_AL=0.0000;DS_DG=0.0003;DS_DL=0.0000;DP_AG=34;DP_AL=-27;DP_DG=35;DP_DL=1\n10 92946 . C A . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0004;DS_AL=0.0000;DS_DG=0.0001;DS_DL=0.0000;DP_AG=-10;DP_AL=-48;DP_DG=35;DP_DL=-21\n10 92947 . A C . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0002;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=-49;DP_AL=-11;DP_DG=0;DP_DL=34\n10 92947 . A T . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0002;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=33;DP_AL=-11;DP_DG=-22;DP_DL=34\n10 92947 . A G . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0006;DS_AL=0.0000;DS_DG=0.0001;DS_DL=0.0000;DP_AG=33;DP_AL=-11;DP_DG=34;DP_DL=32\n')),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the VCF file, we're mainly interested in the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_AG")," - \u0394 score (acceptor gain)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_AL")," - \u0394 score (acceptor loss)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_DG")," - \u0394 score (donor gain)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_DL")," - \u0394 score (donor loss)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_AG")," - \u0394 position (acceptor gain) relative to the variant position"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_AL")," - \u0394 position (acceptor loss) relative to the variant position"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_DG")," - \u0394 position (donor gain) relative to the variant position"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_DL")," - \u0394 position (donor loss) relative to the variant position")),(0,r.kt)("p",null,"The Splice AI team suggests the following interpretation for the scores:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"center"},"Range"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Confidence"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Pathogenicity"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"0 \u2264 x < 0.1"),(0,r.kt)("td",{parentName:"tr",align:"left"},"low"),(0,r.kt)("td",{parentName:"tr",align:"left"},"likely benign")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"0.1 \u2264 x \u2264 0.5"),(0,r.kt)("td",{parentName:"tr",align:"left"},"medium"),(0,r.kt)("td",{parentName:"tr",align:"left"},"likely pathogenic")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"x > 0.5"),(0,r.kt)("td",{parentName:"tr",align:"left"},"high"),(0,r.kt)("td",{parentName:"tr",align:"left"},"pathogenic")))),(0,r.kt)("h2",{id:"pre-processing"},"Pre-processing"),(0,r.kt)("h3",{id:"filtering"},"Filtering"),(0,r.kt)("p",null,"Splice AI provides a comprehensive list of entries throughout the genome. However, many of the entries have little value. I.e. observing low splice scores in intergenic regions. Not only do these extra entries require more storage, but the unused content has a negative impact on annotation speed."),(0,r.kt)("p",null,"As a result, Illumina Connected Annotations filters out all the values in the low confidence tier except for regions within 15 bp of nascent splice sites. For those regions, we found it useful to see if Splice AI predicted an interruption of the splicing mechanism."),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://basespace.illumina.com/s/5u6ThOblecrh"},"https://basespace.illumina.com/s/5u6ThOblecrh")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(i.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/ba2982bf.d6c73718.js b/assets/js/ba2982bf.d6c73718.js deleted file mode 100644 index 67532964..00000000 --- a/assets/js/ba2982bf.d6c73718.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2038,4899],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>D});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var p=a.createContext({}),s=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},c=function(e){var t=s(e.components);return a.createElement(p.Provider,{value:t},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,p=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),d=s(n),u=r,D=d["".concat(p,".").concat(u)]||d[u]||m[u]||i;return n?a.createElement(D,o(o({ref:t},c),{},{components:n})):a.createElement(D,o({ref:t},c))}));function D(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var p in t)hasOwnProperty.call(t,p)&&(l[p]=t[p]);l.originalType=e,l[d]="string"==typeof e?e:r,o[1]=l;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>l,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/splice-ai-json",id:"data-sources/splice-ai-json",title:"splice-ai-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/splice-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/splice-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/splice-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/splice-ai-json.md",tags:[],version:"current",frontMatter:{}},p=[],s={toc:p},c="wrapper";function d(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"spliceAI":[ \n {\n "hgnc":"BLCAP",\n "acceptorGainDistance":-3,\n "acceptorGainScore":0.3,\n "donorLossDistance":7,\n "donorLossScore":0.9\n },\n { \n "hgnc":"NNAT",\n "acceptorGainDistance":-1,\n "acceptorGainScore":0.2,\n "donorGainDistance":-2,\n "donorGainScore":0.3\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"acceptorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorGainScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossDistance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"donorLossScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")))))}d.isMDXComponent=!0},48295:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>o,metadata:()=>p,toc:()=>s});var a=n(87462),r=(n(67294),n(3905)),i=n(99838);const o={title:"Splice AI"},l=void 0,p={unversionedId:"data-sources/splice-ai",id:"data-sources/splice-ai",title:"Splice AI",description:"Overview",source:"@site/docs/data-sources/splice-ai.mdx",sourceDirName:"data-sources",slug:"/data-sources/splice-ai",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/splice-ai",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/splice-ai.mdx",tags:[],version:"current",frontMatter:{title:"Splice AI"},sidebar:"docs",previous:{title:"REVEL",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/revel"},next:{title:"TOPMed",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/topmed"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"VCF File",id:"vcf-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Pre-processing",id:"pre-processing",children:[{value:"Filtering",id:"filtering",children:[],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:s},d="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"SpliceAI, a 32-layer deep neural network, predicts splicing from a pre-mRNA sequence."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"K. Jaganathan, et al. Predicting splicing from primary sequence with deep learning. ",(0,r.kt)("em",{parentName:"p"},"Cell"),", ",(0,r.kt)("strong",{parentName:"p"},"176")," (3) (2019), pp. 535-548 e24"))),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Professional data source")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"This is a Professional data source and is not available freely. Please contact ",(0,r.kt)("a",{parentName:"p",href:"mailto:annotation_support@illumina.com"},"annotation_support@illumina.com")," if you would like to obtain it."))),(0,r.kt)("h2",{id:"vcf-file"},"VCF File"),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},'##fileformat=VCFv4.0\n##assembly=GRCh37/hg19\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n#CHROM POS ID REF ALT QUAL FILTER INFO\n10 92946 . C T . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0000;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=-26;DP_AL=-10;DP_DG=3;DP_DL=35\n10 92946 . C G . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0008;DS_AL=0.0000;DS_DG=0.0003;DS_DL=0.0000;DP_AG=34;DP_AL=-27;DP_DG=35;DP_DL=1\n10 92946 . C A . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-53;DS_AG=0.0004;DS_AL=0.0000;DS_DG=0.0001;DS_DL=0.0000;DP_AG=-10;DP_AL=-48;DP_DG=35;DP_DL=-21\n10 92947 . A C . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0002;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=-49;DP_AL=-11;DP_DG=0;DP_DL=34\n10 92947 . A T . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0002;DS_AL=0.0000;DS_DG=0.0000;DS_DL=0.0000;DP_AG=33;DP_AL=-11;DP_DG=-22;DP_DL=34\n10 92947 . A G . . SYMBOL=TUBB8;STRAND=-;TYPE=E;DIST=-54;DS_AG=0.0006;DS_AL=0.0000;DS_DG=0.0001;DS_DL=0.0000;DP_AG=33;DP_AL=-11;DP_DG=34;DP_DL=32\n')),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the VCF file, we're mainly interested in the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_AG")," - \u0394 score (acceptor gain)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_AL")," - \u0394 score (acceptor loss)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_DG")," - \u0394 score (donor gain)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DS_DL")," - \u0394 score (donor loss)"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_AG")," - \u0394 position (acceptor gain) relative to the variant position"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_AL")," - \u0394 position (acceptor loss) relative to the variant position"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_DG")," - \u0394 position (donor gain) relative to the variant position"),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"DP_DL")," - \u0394 position (donor loss) relative to the variant position")),(0,r.kt)("p",null,"The Splice AI team suggests the following interpretation for the scores:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"center"},"Range"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Confidence"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Pathogenicity"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"0 \u2264 x < 0.1"),(0,r.kt)("td",{parentName:"tr",align:"left"},"low"),(0,r.kt)("td",{parentName:"tr",align:"left"},"likely benign")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"0.1 \u2264 x \u2264 0.5"),(0,r.kt)("td",{parentName:"tr",align:"left"},"medium"),(0,r.kt)("td",{parentName:"tr",align:"left"},"likely pathogenic")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"x > 0.5"),(0,r.kt)("td",{parentName:"tr",align:"left"},"high"),(0,r.kt)("td",{parentName:"tr",align:"left"},"pathogenic")))),(0,r.kt)("h2",{id:"pre-processing"},"Pre-processing"),(0,r.kt)("h3",{id:"filtering"},"Filtering"),(0,r.kt)("p",null,"Splice AI provides a comprehensive list of entries throughout the genome. However, many of the entries have little value. I.e. observing low splice scores in intergenic regions. Not only do these extra entries require more storage, but the unused content has a negative impact on annotation speed."),(0,r.kt)("p",null,"As a result, Illumina Connected Annotations filters out all the values in the low confidence tier except for regions within 15 bp of nascent splice sites. For those regions, we found it useful to see if Splice AI predicted an interruption of the splicing mechanism."),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://basespace.illumina.com/s/5u6ThOblecrh"},"https://basespace.illumina.com/s/5u6ThOblecrh")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(i.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/ba77cd73.8cc1ebb1.js b/assets/js/ba77cd73.8cc1ebb1.js deleted file mode 100644 index 974bbd6b..00000000 --- a/assets/js/ba77cd73.8cc1ebb1.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4291],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>g});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var p=a.createContext({}),u=function(t){var e=a.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},m=function(t){var e=u(t.components);return a.createElement(p.Provider,{value:e},t.children)},s="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},c=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),s=u(n),c=r,g=s["".concat(p,".").concat(c)]||s[c]||d[c]||l;return n?a.createElement(g,o(o({ref:e},m),{},{components:n})):a.createElement(g,o({ref:e},m))}));function g(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=c;var i={};for(var p in e)hasOwnProperty.call(e,p)&&(i[p]=e[p]);i.originalType=t,i[s]="string"==typeof t?t:r,o[1]=i;for(var u=2;u{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>s,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/1000Genomes-sv-json",id:"version-3.17/data-sources/1000Genomes-sv-json",title:"1000Genomes-sv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/1000Genomes-sv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-sv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/1000Genomes-sv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/1000Genomes-sv-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],u={toc:p},m="wrapper";function s(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},u,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":[\n {\n "chromosome":"1",\n "begin":1595369,\n "end":1612441,\n "variantType": "copy_number_variation",\n "id": "esv3635753;esv3635754;esv3635755;esv3635756;esv3635757",\n "allAn": 5008,\n "allAc": 2702,\n "allAf": 0.539537,\n "afrAf": 0.6052,\n "amrAf": 0.3675,\n "eurAf": 0.5357,\n "easAf": 0.5368,\n "sasAf": 0.5797,\n "reciprocalOverlap": 0.07555\n }\n],\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"range: 0 - 1.")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/bb06941c.2ceb66e9.js b/assets/js/bb06941c.2ceb66e9.js deleted file mode 100644 index a2d4d540..00000000 --- a/assets/js/bb06941c.2ceb66e9.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4692,2154],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>v});var a=n(67294);function l(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(l[n]=e[n]);return l}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(l[n]=e[n])}return l}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},m="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,l=e.mdxType,r=e.originalType,s=e.parentName,c=o(e,["components","mdxType","originalType","parentName"]),m=d(n),u=l,v=m["".concat(s,".").concat(u)]||m[u]||p[u]||r;return n?a.createElement(v,i(i({ref:t},c),{},{components:n})):a.createElement(v,i({ref:t},c))}));function v(e,t){var n=arguments,l=t&&t.mdxType;if("string"==typeof e||l){var r=n.length,i=new Array(r);i[0]=u;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[m]="string"==typeof e?e:l,i[1]=o;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>m,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var a=n(87462),l=(n(67294),n(3905));const r={},i=void 0,o={unversionedId:"data-sources/dbsnp-json",id:"version-3.16/data-sources/dbsnp-json",title:"dbsnp-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/dbsnp-json.md",sourceDirName:"data-sources",slug:"/data-sources/dbsnp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/dbsnp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/dbsnp-json.md",tags:[],version:"3.16",frontMatter:{}},s=[],d={toc:s},c="wrapper";function m(e){let{components:t,...n}=e;return(0,l.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"dbsnp":[\n "rs1042821"\n]\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,l.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"dbsnp"),(0,l.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,l.kt)("td",{parentName:"tr",align:"left"},"dbSNP rsIDs")))))}m.isMDXComponent=!0},50298:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>i,metadata:()=>s,toc:()=>d});var a=n(87462),l=(n(67294),n(3905)),r=n(69841);const i={title:"dbSNP"},o=void 0,s={unversionedId:"data-sources/dbsnp",id:"version-3.16/data-sources/dbsnp",title:"dbSNP",description:"Overview",source:"@site/versioned_docs/version-3.16/data-sources/dbsnp.mdx",sourceDirName:"data-sources",slug:"/data-sources/dbsnp",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/dbsnp",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/dbsnp.mdx",tags:[],version:"3.16",frontMatter:{title:"dbSNP"},sidebar:"version-3.16/docs",previous:{title:"COSMIC",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/cosmic"},next:{title:"FusionCatcher",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/fusioncatcher"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"VCF File",id:"vcf-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Global allele extraction",id:"global-allele-extraction",children:[],level:4},{value:"Equal Allele Frequency Example (2 alleles)",id:"equal-allele-frequency-example-2-alleles",children:[],level:4},{value:"Equal Allele Frequency Example (3 alleles)",id:"equal-allele-frequency-example-3-alleles",children:[],level:4},{value:"Equal Allele Frequency in Alternate Alleles",id:"equal-allele-frequency-in-alternate-alleles",children:[],level:4},{value:"Equal Allele Frequency Between Reference & Alternate Allele",id:"equal-allele-frequency-between-reference--alternate-allele",children:[],level:4}],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:d},m="wrapper";function p(e){let{components:t,...n}=e;return(0,l.kt)(m,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("h2",{id:"overview"},"Overview"),(0,l.kt)("p",null,"dbSNP contains human single nucleotide variations, microsatellites, and small-scale insertions and deletions along with publication, population frequency, molecular consequence, and genomic and RefSeq mapping information for both common variations and clinical mutations."),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Sherry, S.T., Ward, M. and Sirotkin, K. (1999) dbSNP\u2014Database for Single Nucleotide Polymorphisms and Other Classes of Minor Genetic Variation. ",(0,l.kt)("em",{parentName:"p"},"Genome Res."),", ",(0,l.kt)("strong",{parentName:"p"},"9"),", 677\u2013679."))),(0,l.kt)("h2",{id:"vcf-file"},"VCF File"),(0,l.kt)("h3",{id:"example"},"Example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 10177 rs367896724 A AC . . RS=367896724;RSPOS=10177;dbSNPBuildID=138; \\ \n SSR=0;SAO=0;VP=0x050000020005130026000200;GENEINFO=DDX11L1:100287102;WGT=1; \\\n VC=DIV;R5;ASP;G5A;G5;KGPhase3;CAF=0.5747,0.4253;COMMON=1; \\\n TOPMED=0.76728147298674821,0.23271852701325178\n")),(0,l.kt)("h3",{id:"parsing"},"Parsing"),(0,l.kt)("p",null,"From the VCF file, we're mainly interested in the following:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"rsID")," from the ",(0,l.kt)("inlineCode",{parentName:"li"},"ID")," field"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"CAF")," from the ",(0,l.kt)("inlineCode",{parentName:"li"},"INFO")," field")),(0,l.kt)("h4",{id:"global-allele-extraction"},"Global allele extraction"),(0,l.kt)("p",null,"The global major and minor alleles are extracted based on the frequency of the alleles provided in the CAF field. The global minor allele frequency is the second highest value of the CAF comma delimited field (ignoring '.' values). "),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Tie Breaking: Global Major Allele")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"If there are two candidates for global major and the reference allele is one of them, we prefer the reference allele."))),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Tie Breaking: Global Minor Allele")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"If there are two candidates for global minor and the reference allele is one of them, we prefer the other allele. If the reference allele is not involved, they are chosen arbitrarily."))),(0,l.kt)("h4",{id:"equal-allele-frequency-example-2-alleles"},"Equal Allele Frequency Example (2 alleles)"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C CAF=0.5,0.5\n")),(0,l.kt)("p",null,"We will select A to be the global major allele and C to be the global minor allele."),(0,l.kt)("h4",{id:"equal-allele-frequency-example-3-alleles"},"Equal Allele Frequency Example (3 alleles)"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C,T CAF=0.33,0.33,0.33\n")),(0,l.kt)("p",null,"We will select A to be the global major allele and either C or T is chosen (arbitrarily) to be the global minor allele."),(0,l.kt)("h4",{id:"equal-allele-frequency-in-alternate-alleles"},"Equal Allele Frequency in Alternate Alleles"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C,T CAF=0.2,0.4,0.4\n")),(0,l.kt)("p",null,"We will select C or T to be arbitrarily assigned to be the global major or global minor allele."),(0,l.kt)("h4",{id:"equal-allele-frequency-between-reference--alternate-allele"},"Equal Allele Frequency Between Reference & Alternate Allele"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C,T CAF=0.2,0.2,0.6\n")),(0,l.kt)("p",null,"We will select T to be the global major allele and C to be the global minor allele."),(0,l.kt)("h2",{id:"known-issues"},"Known Issues"),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"If there are multiple entries with different CAF values for the same allele, we use the first CAF value."))),(0,l.kt)("h2",{id:"download-url"},"Download URL"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://ftp.ncbi.nih.gov/snp/organisms/"},"https://ftp.ncbi.nih.gov/snp/organisms/")),(0,l.kt)("h2",{id:"json-output"},"JSON Output"),(0,l.kt)(r.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/bd05f965.c385f9cc.js b/assets/js/bd05f965.c385f9cc.js deleted file mode 100644 index 87a7c953..00000000 --- a/assets/js/bd05f965.c385f9cc.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8493],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>k});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function i(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var o=a.createContext({}),s=function(t){var e=a.useContext(o),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},m=function(t){var e=s(t.components);return a.createElement(o.Provider,{value:e},t.children)},c="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},u=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,o=t.parentName,m=p(t,["components","mdxType","originalType","parentName"]),c=s(n),u=r,k=c["".concat(o,".").concat(u)]||c[u]||d[u]||l;return n?a.createElement(k,i(i({ref:e},m),{},{components:n})):a.createElement(k,i({ref:e},m))}));function k(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,i=new Array(l);i[0]=u;var p={};for(var o in e)hasOwnProperty.call(e,o)&&(p[o]=e[o]);p.originalType=t,p[c]="string"==typeof t?t:r,i[1]=p;for(var s=2;s{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>c,frontMatter:()=>l,metadata:()=>p,toc:()=>o});var a=n(87462),r=(n(67294),n(3905));const l={},i=void 0,p={unversionedId:"data-sources/clinvar-json",id:"version-3.14/data-sources/clinvar-json",title:"clinvar-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/clinvar-json.md",sourceDirName:"data-sources",slug:"/data-sources/clinvar-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/clinvar-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/clinvar-json.md",tags:[],version:"3.14",frontMatter:{}},o=[],s={toc:o},m="wrapper";function c(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},s,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "id":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "significance":[\n "benign"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "lastUpdatedDate":"2020-03-01",\n "isAlleleSpecific":true\n },\n {\n "id":"RCV000030258.4",\n "variationId":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "alleleOrigins":[\n "germline"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "phenotypes":[\n "Lynch syndrome"\n ],\n "medGenIds":[\n "C1333990"\n ],\n "omimIds":[\n "120435"\n ],\n "significance":[\n "benign"\n ],\n "lastUpdatedDate":"2017-05-01",\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"variationId"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar VCV ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"reviewStatus"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"alleleOrigins"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"medGenIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MedGen IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"omimIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"orphanetIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Orphanet IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"significance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"lastUpdatedDate"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the ClinVar alternate allele")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"reviewStatus:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no assertion provided"),(0,r.kt)("li",{parentName:"ul"},"no assertion criteria provided"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, single submitter"),(0,r.kt)("li",{parentName:"ul"},"practice guideline"),(0,r.kt)("li",{parentName:"ul"},"classified by multiple submitters"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, conflicting interpretations"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, multiple submitters, no conflicts"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"alleleOrigins:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"unknown"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"germline"),(0,r.kt)("li",{parentName:"ul"},"somatic"),(0,r.kt)("li",{parentName:"ul"},"inherited"),(0,r.kt)("li",{parentName:"ul"},"paternal"),(0,r.kt)("li",{parentName:"ul"},"maternal"),(0,r.kt)("li",{parentName:"ul"},"de-novo"),(0,r.kt)("li",{parentName:"ul"},"biparental"),(0,r.kt)("li",{parentName:"ul"},"uniparental"),(0,r.kt)("li",{parentName:"ul"},"not-tested"),(0,r.kt)("li",{parentName:"ul"},"tested-inconclusive")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"significance:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"uncertain significance"),(0,r.kt)("li",{parentName:"ul"},"not provided"),(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"drug response"),(0,r.kt)("li",{parentName:"ul"},"histocompatibility"),(0,r.kt)("li",{parentName:"ul"},"association"),(0,r.kt)("li",{parentName:"ul"},"risk factor"),(0,r.kt)("li",{parentName:"ul"},"protective"),(0,r.kt)("li",{parentName:"ul"},"affects"),(0,r.kt)("li",{parentName:"ul"},"conflicting data from submitters"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant"),(0,r.kt)("li",{parentName:"ul"},"conflicting interpretations of pathogenicity")))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/bd1a7c92.c758c6ae.js b/assets/js/bd1a7c92.c758c6ae.js deleted file mode 100644 index 9f75a8f4..00000000 --- a/assets/js/bd1a7c92.c758c6ae.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3499],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function o(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var p=r.createContext({}),u=function(e){var t=r.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},c=function(e){var t=u(e.components);return r.createElement(p.Provider,{value:t},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},s=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,l=e.originalType,p=e.parentName,c=i(e,["components","mdxType","originalType","parentName"]),d=u(n),s=a,f=d["".concat(p,".").concat(s)]||d[s]||m[s]||l;return n?r.createElement(f,o(o({ref:t},c),{},{components:n})):r.createElement(f,o({ref:t},c))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var l=n.length,o=new Array(l);o[0]=s;var i={};for(var p in t)hasOwnProperty.call(t,p)&&(i[p]=t[p]);i.originalType=e,i[d]="string"==typeof e?e:a,o[1]=i;for(var u=2;u{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var r=n(87462),a=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/topmed-json",id:"version-3.18/data-sources/topmed-json",title:"topmed-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/topmed-json.md",sourceDirName:"data-sources",slug:"/data-sources/topmed-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/topmed-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/topmed-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],u={toc:p},c="wrapper";function d(e){let{components:t,...n}=e;return(0,a.kt)(c,(0,r.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"topmed":{ \n "allAc":20,\n "allAn":125568,\n "allAf":0.000159,\n "allHc":0,\n "failedFilter":true\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAc"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"TOPMed allele count")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAn"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"TOPMed allele number. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAf"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"TOPMed allele frequency (computed by Nirvana)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allHc"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"TOPMed homozygous count")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,a.kt)("td",{parentName:"tr",align:null},"bool"),(0,a.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/beb1e3f1.65e51543.js b/assets/js/beb1e3f1.65e51543.js deleted file mode 100644 index cff2b08b..00000000 --- a/assets/js/beb1e3f1.65e51543.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7944,8660],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>g});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var o=a.createContext({}),p=function(e){var t=a.useContext(o),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},c=function(e){var t=p(e.components);return a.createElement(o.Provider,{value:t},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,o=e.parentName,c=s(e,["components","mdxType","originalType","parentName"]),d=p(n),u=i,g=d["".concat(o,".").concat(u)]||d[u]||m[u]||r;return n?a.createElement(g,l(l({ref:t},c),{},{components:n})):a.createElement(g,l({ref:t},c))}));function g(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,l=new Array(r);l[0]=u;var s={};for(var o in t)hasOwnProperty.call(t,o)&&(s[o]=t[o]);s.originalType=e,s[d]="string"==typeof e?e:i,l[1]=s;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>d,frontMatter:()=>r,metadata:()=>s,toc:()=>o});var a=n(87462),i=(n(67294),n(3905));const r={},l=void 0,s={unversionedId:"data-sources/clinvar-json",id:"version-3.2.5/data-sources/clinvar-json",title:"clinvar-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.2.5/data-sources/clinvar-json.md",sourceDirName:"data-sources",slug:"/data-sources/clinvar-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/clinvar-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/data-sources/clinvar-json.md",tags:[],version:"3.2.5",frontMatter:{}},o=[],p={toc:o},c="wrapper";function d(e){let{components:t,...n}=e;return(0,i.kt)(c,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "id":"RCV000030258.4",\n "reviewStatus":"reviewed by expert panel",\n "alleleOrigins":[\n "germline"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "phenotypes":[\n "Lynch syndrome"\n ],\n "medGenIds":[\n "C1333990"\n ],\n "omimIds":[\n "120435"\n ],\n "significance":[\n "benign"\n ],\n "lastUpdatedDate":"2017-05-01",\n "isAlleleSpecific":true\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"id"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"ClinVar ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"reviewStatus"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"alleleOrigins"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"medGenIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"MedGen IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omimIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"OMIM IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"orphanetIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Orphanet IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"significance"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"lastUpdatedDate"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,i.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,i.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the ClinVar alternate allele")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"reviewStatus:")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no assertion provided"),(0,i.kt)("li",{parentName:"ul"},"no assertion criteria provided"),(0,i.kt)("li",{parentName:"ul"},"criteria provided, single submitter"),(0,i.kt)("li",{parentName:"ul"},"practice guideline"),(0,i.kt)("li",{parentName:"ul"},"classified by multiple submitters"),(0,i.kt)("li",{parentName:"ul"},"criteria provided, conflicting interpretations"),(0,i.kt)("li",{parentName:"ul"},"criteria provided, multiple submitters, no conflicts"),(0,i.kt)("li",{parentName:"ul"},"no interpretation for the single variant")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"alleleOrigins:")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"unknown"),(0,i.kt)("li",{parentName:"ul"},"other"),(0,i.kt)("li",{parentName:"ul"},"germline"),(0,i.kt)("li",{parentName:"ul"},"somatic"),(0,i.kt)("li",{parentName:"ul"},"inherited"),(0,i.kt)("li",{parentName:"ul"},"paternal"),(0,i.kt)("li",{parentName:"ul"},"maternal"),(0,i.kt)("li",{parentName:"ul"},"de-novo"),(0,i.kt)("li",{parentName:"ul"},"biparental"),(0,i.kt)("li",{parentName:"ul"},"uniparental"),(0,i.kt)("li",{parentName:"ul"},"not-tested"),(0,i.kt)("li",{parentName:"ul"},"tested-inconclusive")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"significance:")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"uncertain significance"),(0,i.kt)("li",{parentName:"ul"},"not provided"),(0,i.kt)("li",{parentName:"ul"},"benign"),(0,i.kt)("li",{parentName:"ul"},"likely benign"),(0,i.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"pathogenic"),(0,i.kt)("li",{parentName:"ul"},"drug response"),(0,i.kt)("li",{parentName:"ul"},"histocompatibility"),(0,i.kt)("li",{parentName:"ul"},"association"),(0,i.kt)("li",{parentName:"ul"},"risk factor"),(0,i.kt)("li",{parentName:"ul"},"protective"),(0,i.kt)("li",{parentName:"ul"},"affects"),(0,i.kt)("li",{parentName:"ul"},"conflicting data from submitters"),(0,i.kt)("li",{parentName:"ul"},"other"),(0,i.kt)("li",{parentName:"ul"},"no interpretation for the single variant"),(0,i.kt)("li",{parentName:"ul"},"conflicting interpretations of pathogenicity")))}d.isMDXComponent=!0},87755:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>s,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var a=n(87462),i=(n(67294),n(3905)),r=n(86631);const l={title:"ClinVar"},s=void 0,o={unversionedId:"data-sources/clinvar",id:"version-3.2.5/data-sources/clinvar",title:"ClinVar",description:"Overview",source:"@site/versioned_docs/version-3.2.5/data-sources/clinvar.mdx",sourceDirName:"data-sources",slug:"/data-sources/clinvar",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/clinvar",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/data-sources/clinvar.mdx",tags:[],version:"3.2.5",frontMatter:{title:"ClinVar"},sidebar:"version-3.2.5/docs",previous:{title:"1000 Genomes",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/1000Genomes"},next:{title:"dbSNP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/dbsnp"}},p=[{value:"Overview",id:"overview",children:[],level:2},{value:"RCV File",id:"rcv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Parsing Significance",id:"parsing-significance",children:[],level:4}],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:p},d="wrapper";function m(e){let{components:t,...l}=e;return(0,i.kt)(d,(0,a.Z)({},c,l,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"ClinVar is a freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence. ClinVar thus facilitates access to and communication about the relationships asserted between human variation and observed health status, and the history of that interpretation."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Melissa J Landrum, Jennifer M Lee, Mark Benson, Garth R Brown, Chen Chao, Shanmuga Chitipiralla, Baoshan Gu, Jennifer Hart, Douglas Hoffman, Wonhee Jang, Karen Karapetyan, Kenneth Katz, Chunlei Liu, Zenith Maddipatla, Adriana Malheiro, Kurt McDaniel, Michael Ovetsky, George Riley, George Zhou, J Bradley Holmes, Brandi L Kattman, Donna R Maglott, ClinVar: improving access to variant interpretations and supporting evidence, ",(0,i.kt)("em",{parentName:"p"},"Nucleic Acids Research"),", ",(0,i.kt)("strong",{parentName:"p"},"46"),", Issue D1, 4 January 2018, Pages D1062\u2013D1067, ",(0,i.kt)("a",{parentName:"p",href:"https://doi.org/10.1093/nar/gkx1153"},"https://doi.org/10.1093/nar/gkx1153")))),(0,i.kt)("h2",{id:"rcv-file"},"RCV File"),(0,i.kt)("h3",{id:"example"},"Example"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{target:"_blank",href:n(28075).Z},"a full RCV entry"),"."),(0,i.kt)("h3",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"ID")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{3}","{3}":!0},'\n \n \n\n')),(0,i.kt)("p",null,"The Acc and Version fields are merged to form the ID (RCV000000001.2)"),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"LastUpdatedDate")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{2}","{2}":!0},'\n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Significance")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{5}","{5}":!0},'\n \n \n no assertion criteria provided \n Pathogenic \n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"ReviewStatus")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4}","{4}":!0},'\n \n \n no assertion criteria provided \n Pathogenic \n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Phenotypes")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{2-8}","{2-8}":!0},'\n \n \n \n Joubert syndrome 9\n \n \n \n\n')),(0,i.kt)("p",null,'We only use the field with Type="Preferred". Multiple phenotypes may be reported'),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Location and Variant Id")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{3,5-12}","{3,5-12}":!0},'\n\n \n \n \n \n \n \n \n\n')),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"The variant position is extracted from the fields for their respective assemblies."),(0,i.kt)("li",{parentName:"ul"},"Updated records contain positionVCF, referenceAlleleVCF and alternateAlleleVCF fields and when present, we use them to create the variant."),(0,i.kt)("li",{parentName:"ul"},'For older records, since "start\' and "stop" fields are not always available, we use the "display_start" and "display_end" fields.'),(0,i.kt)("li",{parentName:"ul"},"If a required allele is not available, we extract it from the reference sequence."),(0,i.kt)("li",{parentName:"ul"},"Only variants having a dbSNP id are extracted."),(0,i.kt)("li",{parentName:"ul"},"Note that a ClinVar accession may have multiple variants associated with it (possible in different locations)"),(0,i.kt)("li",{parentName:"ul"},"VariantId is extracted from the MeasureSet attributes.")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"MedGen, OMIM, Orphanet IDs")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4-7}","{4-7}":!0},'\n \n \n \n \n \n \n \n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"AlleleOrigins")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{2}","{2}":!0},"\n germline\n\n")),(0,i.kt)("p",null,"We only extract all Allele Origins from Submissions (SCV) entries."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"PubMedIds")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4,10,16,21}","{4,10,16,21}":!0},'\n \n \n 12114475\n \n \n \n LMM Criteria\n \n 24033266\n \n \n \n \n \n 9113933\n \n \n \n \n 23757202\n \n\n')),(0,i.kt)("p",null,"We only extract all Pubmed Ids from Submissions (SCV) entries."),(0,i.kt)("h4",{id:"parsing-significance"},"Parsing Significance"),(0,i.kt)("p",null,"Extracting significance(s) may involve parsing multiple fields. Take the following snippets into consideration."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{3,8,13-14}","{3,8,13-14}":!0},'\n no assertion criteria provided\n Pathogenic\n\n\n\n criteria provided, multiple submitters, no conflicts\n Pathogenic/Likely pathogenic\n\n\n\n no assertion criteria provided\n Conflicting interpretations of pathogenicity\n Pathogenic(1);Uncertain significance(1)\n\n')),(0,i.kt)("p",null,"Given the evidence, we converted the significance field into an array of strings which may be parsed out of the ",(0,i.kt)("inlineCode",{parentName:"p"},"Descriptions")," or ",(0,i.kt)("inlineCode",{parentName:"p"},"Explanation")," fields. "),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Varying Delimiters")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The delimiters in each field may vary. Currently, the delimiters for ",(0,i.kt)("inlineCode",{parentName:"p"},"Description")," are ",(0,i.kt)("inlineCode",{parentName:"p"},",")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"/"),". The delimiters for ",(0,i.kt)("inlineCode",{parentName:"p"},"Explanation")," are ",(0,i.kt)("inlineCode",{parentName:"p"},";")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"/"),"."))),(0,i.kt)("h2",{id:"known-issues"},"Known Issues"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"The XML file contains ~1k more entries (out of 162K) than the VCF file"),(0,i.kt)("li",{parentName:"ul"},"The XML file does not have a field indicating that a record is associated with the reference base - something that was present in VCF"),(0,i.kt)("li",{parentName:"ul"},'The XML file contains entries (e.g. RCV000016645 version=1) which have IUPAC ambiguous bases ("R", "Y", "H",\netc.) as their alternate allele')))),(0,i.kt)("h2",{id:"download-url"},"Download URL"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz"},"ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz")),(0,i.kt)("h2",{id:"json-output"},"JSON Output"),(0,i.kt)(r.default,{mdxType:"JSON"}))}m.isMDXComponent=!0},28075:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/files/clinvar-rcv-example-4e0a2f2ac6c70acd0ce41410690b683b.xml"}}]); \ No newline at end of file diff --git a/assets/js/c252ba0e.006e420d.js b/assets/js/c252ba0e.006e420d.js deleted file mode 100644 index 0818ce43..00000000 --- a/assets/js/c252ba0e.006e420d.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2970,833],{3905:(e,n,t)=>{t.d(n,{Zo:()=>d,kt:()=>h});var a=t(67294);function r(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function o(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function i(e){for(var n=1;n=0||(r[t]=e[t]);return r}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(r[t]=e[t])}return r}var l=a.createContext({}),c=function(e){var n=a.useContext(l),t=n;return e&&(t="function"==typeof e?e(n):i(i({},n),e)),t},d=function(e){var n=c(e.components);return a.createElement(l.Provider,{value:n},e.children)},u="mdxType",p={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},m=a.forwardRef((function(e,n){var t=e.components,r=e.mdxType,o=e.originalType,l=e.parentName,d=s(e,["components","mdxType","originalType","parentName"]),u=c(t),m=r,h=u["".concat(l,".").concat(m)]||u[m]||p[m]||o;return t?a.createElement(h,i(i({ref:n},d),{},{components:t})):a.createElement(h,i({ref:n},d))}));function h(e,n){var t=arguments,r=n&&n.mdxType;if("string"==typeof e||r){var o=t.length,i=new Array(o);i[0]=m;var s={};for(var l in n)hasOwnProperty.call(n,l)&&(s[l]=n[l]);s.originalType=e,s[u]="string"==typeof e?e:r,i[1]=s;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>s,toc:()=>l});var a=t(87462),r=(t(67294),t(3905));const o={},i=void 0,s={unversionedId:"data-sources/amino-acid-conservation-json",id:"version-3.21/data-sources/amino-acid-conservation-json",title:"amino-acid-conservation-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/amino-acid-conservation-json.md",sourceDirName:"data-sources",slug:"/data-sources/amino-acid-conservation-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/amino-acid-conservation-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/amino-acid-conservation-json.md",tags:[],version:"3.21",frontMatter:{}},l=[],c={toc:l},d="wrapper";function u(e){let{components:n,...t}=e;return(0,r.kt)(d,(0,a.Z)({},c,t,{components:n,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"aminoAcidConservation": {\n "scores": [0.34]\n} \n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"aminoAcidConservation"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scores"),(0,r.kt)("td",{parentName:"tr",align:"center"},"object array of doubles"),(0,r.kt)("td",{parentName:"tr",align:"left"},"percent conserved with respect to human amino acid residue. Range: 0.01 - 1.00")))))}u.isMDXComponent=!0},80882:(e,n,t)=>{t.r(n),t.d(n,{contentTitle:()=>s,default:()=>p,frontMatter:()=>i,metadata:()=>l,toc:()=>c});var a=t(87462),r=(t(67294),t(3905)),o=t(60617);const i={title:"Amino Acid Conservation"},s=void 0,l={unversionedId:"data-sources/amino-acid-conservation",id:"version-3.21/data-sources/amino-acid-conservation",title:"Amino Acid Conservation",description:"Overview",source:"@site/versioned_docs/version-3.21/data-sources/amino-acid-conservation.mdx",sourceDirName:"data-sources",slug:"/data-sources/amino-acid-conservation",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/amino-acid-conservation",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/amino-acid-conservation.mdx",tags:[],version:"3.21",frontMatter:{title:"Amino Acid Conservation"},sidebar:"docs",previous:{title:"1000 Genomes",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/1000Genomes"},next:{title:"Cancer Hotspots",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/cancer-hotspots"}},c=[{value:"Overview",id:"overview",children:[],level:2},{value:"FASTA File",id:"fasta-file",children:[],level:2},{value:"Parsing FASTA",id:"parsing-fasta",children:[],level:2},{value:"Assigning scores to Nirvana transcripts",id:"assigning-scores-to-nirvana-transcripts",children:[{value:"GRCh37",id:"grch37",children:[],level:3},{value:"GRCh38",id:"grch38",children:[],level:3}],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],d={toc:c},u="wrapper";function p(e){let{components:n,...t}=e;return(0,r.kt)(u,(0,a.Z)({},d,t,{components:n,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"Amino acid conservation scores are obtained from multiple alignments of vertebrate exomes to the human ones. The score indicate the frequency with which a particular AA is observed in Humans."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Siepel A, Bejerano G, Pedersen JS, Hinrichs AS, Hou M, Rosenbloom K, Clawson H, Spieth J, Hillier LW, Richards S, et al. Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. ",(0,r.kt)("strong",{parentName:"p"},"Genome Res. 2005")," Aug;15(8):1034-50. (",(0,r.kt)("a",{parentName:"p",href:"http://www.genome.org/cgi/doi/10.1101/gr.3715005"},"http://www.genome.org/cgi/doi/10.1101/gr.3715005"),")"))),(0,r.kt)("h2",{id:"fasta-file"},"FASTA File"),(0,r.kt)("p",null,"The exon alignments are provided in FASTA files as follows:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},">ENST00000641515.2_hg38_1_2 3 0 0 chr1:65565-65573+\nMKK\n>ENST00000641515.2_panTro4_1_2 3 0 0 chrUn_GL393541:146907-146915+\nMKK\n>ENST00000641515.2_gorGor3_1_2 3 0 0\n---\n>ENST00000641515.2_ponAbe2_1_2 3 0 0 chr15:99141417-99141425-\nMKK\n>ENST00000641515.2_hg38_2_2 324 0 0 chr1:69037-70008+\nVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLLHFFGGSEMVILIAMGFDRYIAICKPLHYTTIMCGNACVGIMAVTWGIGFLHSVSQLAFAVHLLFCGPNEVDSFYCDLPRVIKLACTDTYRLDIMVIANSGVLTVCSFVLLIISYTIILMTIQHRPLDKSSKALSTLTAHITVVLLFFGPCVFIYAWPFPIKSLDKFLAVFYSVITPLLNPIIYTLRNKDMKTAIRQLRKWDAHSSVKFZ\n>ENST00000641515.2_panTro4_2_2 324 0 0 chrUn_GL393541:151333-152303+\n")),(0,r.kt)("h2",{id:"parsing-fasta"},"Parsing FASTA"),(0,r.kt)("p",null,"For each Ensembl transcript, we will need to aggregate all the exons together for each of the 100 species. From there, we should get a full alignment that can be used to determine conservation. For example, for ENST00000641515.2 we have:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"Human (hg38) MKKVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVITVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLL\nChimp MKKVTAEAISWNESTSETNNSMVTEFIFLGLSDSQELQTFL-MLFFVFYGGIVFGNLLIVRIVVSDSHLHSPMYFLLANLSLIDLSLCSVTAPKMITDFFSQRKVISFKGCLVQIFLL\nGorilla ----------------------------------------------------------------------------------------------------------------------\nOrangutan MKKVTAEAISWNESTSKTNNSVVTEFIFLGLSDSQELQTFLFMLFFVFYGGIVFGNLLIVIIVVSDSHLHSPMYFLLANLSLIDLSLSSVTAPKMITDFFSQRKVISFKGCLVQIFLL\nGibbon ----------------------------------------------------------------------------------------------------------------------\nRhesus MKKVTEAAISWNESTSETNNSIVTEFIFLGLSDSQELQIFLFVLFLVFYGGIVFGNLLIVITVVSDSHLHSPMYLLLANLSVVDLSLSSVTAPKMITDFFSQRKAISFKGCLVQIFLL\nMacaque MKKVTEAAISWNESTSETNNSIVTEFIFLGLSDSQELQIFLFVLFLVFYGGIVFGNLLIVITVVSDSHLHSPMYLLLANLSVIDLSLSSVTAPKMITDFFSQRKAISFKGCLVQIFLL\n")),(0,r.kt)("p",null,"If we look at position 6, we see that humans have an Alanine (A) residue. This residue is shared by Chimp and Orangutan. However, Rhesus and Macaque have a Glutamic acid (E) residue at that position. Moreover, Gorilla and Gibbon don't even have data for that transcript.\nFor position 6, we would say that we have 43% conservation (3/7) since three organisms share the same residue as humans."),(0,r.kt)("h2",{id:"assigning-scores-to-nirvana-transcripts"},"Assigning scores to Nirvana transcripts"),(0,r.kt)("p",null,"The source FASTA file comes with Ensembl/UCSC transcript ids of the transcripts used for alignments. The Nirvana cache has RefSeq and Ensembl transcripts and our first attempt was to map the given Ensembl/UCSC ids to their equivalent RefSeq/Ensembl ids. This attempt was unsuccessful since UCSC Table Browser provided mapping without version numbers. So we proceeded as follows:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"Take proteins which have a unique mapping (and hence one set of conservation scores). For ones that mapped to both ChrX and ChrY, we accepted the one from ChrX."),(0,r.kt)("li",{parentName:"ul"},"A Nirvana transcript having an exact peptide sequence match with a uniquely aligned protein is assigned the corresponding conservation scores.")),(0,r.kt)("p",null,"Unfortunately this left us with a very small number of transcripts having conservation scores."),(0,r.kt)("h3",{id:"grch37"},"GRCh37"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"Source FASTA contained 41957 protein alignments."),(0,r.kt)("li",{parentName:"ul"},"38165 proteins had unique scores."),(0,r.kt)("li",{parentName:"ul"},"88 aligned proteins existed in Nirvana cache."),(0,r.kt)("li",{parentName:"ul"},"118 transcripts had conservation scores.")),(0,r.kt)("h3",{id:"grch38"},"GRCh38"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"Source FASTA contained 110024 protein alignments."),(0,r.kt)("li",{parentName:"ul"},"88961 proteins had unique scores."),(0,r.kt)("li",{parentName:"ul"},"11688 aligned proteins existed in Nirvana cache."),(0,r.kt)("li",{parentName:"ul"},"12098 transcripts had conservation scores.")),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,"GRCh37: ",(0,r.kt)("a",{parentName:"p",href:"http://hgdownload.soe.ucsc.edu/goldenPath/hg19/multiz100way/alignments/knownGene.exonAA.fa.gz"},"http://hgdownload.soe.ucsc.edu/goldenPath/hg19/multiz100way/alignments/knownGene.exonAA.fa.gz")),(0,r.kt)("p",null,"GRCh38: ",(0,r.kt)("a",{parentName:"p",href:"http://hgdownload.soe.ucsc.edu/goldenPath/hg38/multiz100way/alignments/knownGene.exonAA.fa.gz"},"http://hgdownload.soe.ucsc.edu/goldenPath/hg38/multiz100way/alignments/knownGene.exonAA.fa.gz")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)("p",null,"Conservation scores are reported in the transcript section. One score is reported for each alt allele"),(0,r.kt)(o.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/c2bf78c4.498879d5.js b/assets/js/c2bf78c4.498879d5.js deleted file mode 100644 index 27a95c80..00000000 --- a/assets/js/c2bf78c4.498879d5.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4829],{3905:(e,t,r)=>{r.d(t,{Zo:()=>p,kt:()=>f});var n=r(67294);function a(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function o(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function c(e){for(var t=1;t=0||(a[r]=e[r]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(a[r]=e[r])}return a}var i=n.createContext({}),s=function(e){var t=n.useContext(i),r=t;return e&&(r="function"==typeof e?e(t):c(c({},t),e)),r},p=function(e){var t=s(e.components);return n.createElement(i.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},d=n.forwardRef((function(e,t){var r=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),u=s(r),d=a,f=u["".concat(i,".").concat(d)]||u[d]||m[d]||o;return r?n.createElement(f,c(c({ref:t},p),{},{components:r})):n.createElement(f,c({ref:t},p))}));function f(e,t){var r=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=r.length,c=new Array(o);c[0]=d;var l={};for(var i in t)hasOwnProperty.call(t,i)&&(l[i]=t[i]);l.originalType=e,l[u]="string"==typeof e?e:a,c[1]=l;for(var s=2;s{r.r(t),r.d(t,{contentTitle:()=>c,default:()=>u,frontMatter:()=>o,metadata:()=>l,toc:()=>i});var n=r(87462),a=(r(67294),r(3905));const o={},c=void 0,l={unversionedId:"data-sources/revel-json",id:"version-3.17/data-sources/revel-json",title:"revel-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/revel-json.md",sourceDirName:"data-sources",slug:"/data-sources/revel-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/revel-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/revel-json.md",tags:[],version:"3.17",frontMatter:{}},i=[],s={toc:i},p="wrapper";function u(e){let{components:t,...r}=e;return(0,a.kt)(p,(0,n.Z)({},s,r,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"revel":{ \n "score":0.027\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"score"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1.0")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/c42e2541.f37fdc0d.js b/assets/js/c42e2541.f37fdc0d.js deleted file mode 100644 index 30b80baa..00000000 --- a/assets/js/c42e2541.f37fdc0d.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7366],{3905:(t,e,n)=>{n.d(e,{Zo:()=>c,kt:()=>g});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var p=a.createContext({}),u=function(t){var e=a.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},c=function(t){var e=u(t.components);return a.createElement(p.Provider,{value:e},t.children)},s="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},m=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,c=i(t,["components","mdxType","originalType","parentName"]),s=u(n),m=r,g=s["".concat(p,".").concat(m)]||s[m]||d[m]||l;return n?a.createElement(g,o(o({ref:e},c),{},{components:n})):a.createElement(g,o({ref:e},c))}));function g(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=m;var i={};for(var p in e)hasOwnProperty.call(e,p)&&(i[p]=e[p]);i.originalType=t,i[s]="string"==typeof t?t:r,o[1]=i;for(var u=2;u{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>s,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/clingen-json",id:"version-3.21/data-sources/clingen-json",title:"clingen-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/clingen-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/clingen-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/clingen-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],u={toc:p},c="wrapper";function s(t){let{components:e,...n}=t;return(0,r.kt)(c,(0,a.Z)({},u,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clingen":[\n {\n "chromosome":"17",\n "begin":525,\n "end":14667519,\n "variantType":"copy_number_gain",\n "id":"nsv996083",\n "clinicalInterpretation":"pathogenic",\n "observedGains":1,\n "validated":true,\n "phenotypes":[\n "Intrauterine growth retardation"\n ],\n "phenotypeIds":[\n "HP:0001511",\n "MedGen:C1853481"\n ],\n "reciprocalOverlap":0.00131\n },\n {\n "chromosome":"17",\n "begin":45835,\n "end":7600330,\n "variantType":"copy_number_loss",\n "id":"nsv869419",\n "clinicalInterpretation":"pathogenic",\n "observedLosses":1,\n "validated":true,\n "phenotypes":[\n "Developmental delay AND/OR other significant developmental or morphological phenotypes"\n ],\n "reciprocalOverlap":0.00254\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clingen"),(0,r.kt)("td",{parentName:"tr",align:null},"object array"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Any of the\xa0sequence alterations defined here.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"id"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"Identifier from the data source. Alternatively a VID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"clinicalInterpretation"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"observedGains"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,r.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"observedLosses"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,r.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"validated"),(0,r.kt)("td",{parentName:"tr",align:null},"boolean"),(0,r.kt)("td",{parentName:"tr",align:null})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:null},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"Description of the phenotype.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"phenotypeIds"),(0,r.kt)("td",{parentName:"tr",align:null},"string array"),(0,r.kt)("td",{parentName:"tr",align:null},"Description of the phenotype IDs.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"clinicalInterpretation")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"curated benign"),(0,r.kt)("li",{parentName:"ul"},"curated pathogenic"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"path gain"),(0,r.kt)("li",{parentName:"ul"},"path loss"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"uncertain")))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/c53c2ca7.a7188580.js b/assets/js/c53c2ca7.a7188580.js deleted file mode 100644 index 4e694935..00000000 --- a/assets/js/c53c2ca7.a7188580.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3476],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function o(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var p=r.createContext({}),u=function(e){var t=r.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},c=function(e){var t=u(e.components);return r.createElement(p.Provider,{value:t},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},s=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,l=e.originalType,p=e.parentName,c=i(e,["components","mdxType","originalType","parentName"]),d=u(n),s=a,f=d["".concat(p,".").concat(s)]||d[s]||m[s]||l;return n?r.createElement(f,o(o({ref:t},c),{},{components:n})):r.createElement(f,o({ref:t},c))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var l=n.length,o=new Array(l);o[0]=s;var i={};for(var p in t)hasOwnProperty.call(t,p)&&(i[p]=t[p]);i.originalType=e,i[d]="string"==typeof e?e:a,o[1]=i;for(var u=2;u{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var r=n(87462),a=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/topmed-json",id:"version-3.17/data-sources/topmed-json",title:"topmed-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/topmed-json.md",sourceDirName:"data-sources",slug:"/data-sources/topmed-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/topmed-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/topmed-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],u={toc:p},c="wrapper";function d(e){let{components:t,...n}=e;return(0,a.kt)(c,(0,r.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"topmed":{ \n "allAc":20,\n "allAn":125568,\n "allAf":0.000159,\n "allHc":0,\n "failedFilter":true\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAc"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"TOPMed allele count")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAn"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"TOPMed allele number. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAf"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"TOPMed allele frequency (computed by Nirvana)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allHc"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"TOPMed homozygous count")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,a.kt)("td",{parentName:"tr",align:null},"bool"),(0,a.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/c53cda23.cc85cd1f.js b/assets/js/c53cda23.cc85cd1f.js deleted file mode 100644 index b3801465..00000000 --- a/assets/js/c53cda23.cc85cd1f.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1064,9373],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>v});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var l=a.createContext({}),c=function(e){var t=a.useContext(l),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},p=function(e){var t=c(e.components);return a.createElement(l.Provider,{value:t},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,l=e.parentName,p=s(e,["components","mdxType","originalType","parentName"]),d=c(n),u=r,v=d["".concat(l,".").concat(u)]||d[u]||m[u]||i;return n?a.createElement(v,o(o({ref:t},p),{},{components:n})):a.createElement(v,o({ref:t},p))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var s={};for(var l in t)hasOwnProperty.call(t,l)&&(s[l]=t[l]);s.originalType=e,s[d]="string"==typeof e?e:r,o[1]=s;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>s,toc:()=>l});var a=n(87462),r=(n(67294),n(3905));const i={},o=void 0,s={unversionedId:"data-sources/primate-ai-json",id:"version-3.21/data-sources/primate-ai-json",title:"primate-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/primate-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/primate-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/primate-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/primate-ai-json.md",tags:[],version:"3.21",frontMatter:{}},l=[],c={toc:l},p="wrapper";function d(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI":[\n {\n "hgnc":"TP53",\n "scorePercentile":0.3,\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))))}d.isMDXComponent=!0},8357:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>s,default:()=>m,frontMatter:()=>o,metadata:()=>l,toc:()=>c});var a=n(87462),r=(n(67294),n(3905)),i=n(3775);const o={title:"Primate AI"},s=void 0,l={unversionedId:"data-sources/primate-ai",id:"version-3.21/data-sources/primate-ai",title:"Primate AI",description:"Overview",source:"@site/versioned_docs/version-3.21/data-sources/primate-ai.mdx",sourceDirName:"data-sources",slug:"/data-sources/primate-ai",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/primate-ai",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/primate-ai.mdx",tags:[],version:"3.21",frontMatter:{title:"Primate AI"},sidebar:"docs",previous:{title:"PhyloP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/phylop"},next:{title:"REVEL",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/revel"}},c=[{value:"Overview",id:"overview",children:[],level:2},{value:"TSV File",id:"tsv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Pre-processing",id:"pre-processing",children:[{value:"Converting UCSC IDs",id:"converting-ucsc-ids",children:[],level:3},{value:"Running the Pre-Processor",id:"running-the-pre-processor",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],p={toc:c},d="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"Primate AI is a deep residual neural network for classifying the pathogenicity of missense mutations. The method is described in the publication:"),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Sundaram, L., Gao, H., Padigepati, S.R. et al. Predicting the clinical impact of human mutation with deep neural networks. ",(0,r.kt)("em",{parentName:"p"},"Nat Genet")," ",(0,r.kt)("strong",{parentName:"p"},"50"),", 1161\u20131170 (2018). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1038/s41588-018-0167-z"},"https://doi.org/10.1038/s41588-018-0167-z")))),(0,r.kt)("h2",{id:"tsv-file"},"TSV File"),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr pos ref alt refAA altAA strand_1pos_0neg trinucleotide_context UCSC_gene ExAC_coverage primateDL_score\nchr10 1046704 C T R C 1 CCG uc001ift.3 45.49 0.849114537239\nchr10 1046704 C G R G 1 CCG uc001ift.3 45.49 0.795686006546\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the TSV file, we're mainly interested in the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"chr")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"ref")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"alt")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"primateDL_score"))),(0,r.kt)("p",null,"We also use ",(0,r.kt)("inlineCode",{parentName:"p"},"UCSC_gene")," to filter out variants that don't have matching gene models in Nirvana."),(0,r.kt)("h2",{id:"pre-processing"},"Pre-processing"),(0,r.kt)("h3",{id:"converting-ucsc-ids"},"Converting UCSC IDs"),(0,r.kt)("p",null,"Primate AI only provides UCSC IDs. As an initial pre-processing step, we'll need to convert these to either Entrez or Ensembl Gene IDs."),(0,r.kt)("p",null,"The following queries are used to download the conversions from UCSC:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-bash"},'mysql -h genome-mysql.soe.ucsc.edu -u genome -A -P 3306 \\\n -e "select * FROM knownToLocusLink;" hg19 > ucsc_locuslink.tsv\n\nmysql -h genome-mysql.soe.ucsc.edu -u genome -A -P 3306 \\\n -e "select knownToEnsembl.name, knownToEnsembl.value, ensGene.name2 FROM knownToEnsembl, ensGene WHERE knownToEnsembl.value = ensGene.name;" \\\n hg19 > ucsc_ensembl.tsv\n')),(0,r.kt)("h3",{id:"running-the-pre-processor"},"Running the Pre-Processor"),(0,r.kt)("p",null,"The Primate AI pre-processor can be run as follows:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet PrimateAiPreProcessor.dll UGA_develop.tsv PrimateAI_scores_v0.2.tsv.gz \\\n ucsc_locuslink.tsv ucsc_ensembl.tsv PrimateAI_0.2_GRCh37.tsv.gz\n")),(0,r.kt)("p",null,"During conversion, 0.5% of the UCSC Ids cannot be converted to either Entrez or Ensembl gene IDs. Once the gene IDs have been acquired, we check to see which are available in Nirvana."),(0,r.kt)("p",null,"The following Entrez Gene IDs were not found:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"399753\n401980\n504189\n504191\n100293534\n")),(0,r.kt)("p",null,"Here is the output from the pre-processor:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"- loading UCSC to Entrez Gene ID dictionary... 73,432 genes loaded.\n- loading UCSC to Ensembl Gene ID dictionary... 76,178 genes loaded.\n- loading UGA gene ID to gene dictionary... 103,277 genes loaded.\n- parsing Primate AI variants... 70,121,953 variants parsed.\n \n# variants with unknown gene ID: 27,253 / 70,121,953\n# genes with unknown gene ID: 109 / 19,614\n \n# variants not in UGA: 2,036 / 70,121,953\n# genes not in UGA: 6 / 19,614\n")),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"The Primate AI data set provides raw scores, but the scores are biased according to gene context. I.e. a 0.4 means something different in ",(0,r.kt)("inlineCode",{parentName:"p"},"TP53")," than it does in ",(0,r.kt)("inlineCode",{parentName:"p"},"KRAS"),"."),(0,r.kt)("p",{parentName:"div"},"As a result, the Primate AI team provided guidance on aggregating these scores and presenting them as percentiles with respect to the associated gene. According to their research, the 25",(0,r.kt)("sup",null,"th")," percentile is a good proxy for benign variants and the 75",(0,r.kt)("sup",null,"th")," percentile is a good proxy for pathogenic variants."))),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://basespace.illumina.com/s/cPgCSmecvhb4"},"https://basespace.illumina.com/s/cPgCSmecvhb4")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(i.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/c5f9e065.fab28387.js b/assets/js/c5f9e065.fab28387.js deleted file mode 100644 index 158d6ac6..00000000 --- a/assets/js/c5f9e065.fab28387.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2854,7859,8244],{3905:(e,t,n)=>{n.d(t,{Zo:()=>m,kt:()=>c});var a=n(67294);function l(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(l[n]=e[n]);return l}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(l[n]=e[n])}return l}var s=a.createContext({}),p=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},m=function(e){var t=p(e.components);return a.createElement(s.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},g=a.forwardRef((function(e,t){var n=e.components,l=e.mdxType,r=e.originalType,s=e.parentName,m=i(e,["components","mdxType","originalType","parentName"]),u=p(n),g=l,c=u["".concat(s,".").concat(g)]||u[g]||d[g]||r;return n?a.createElement(c,o(o({ref:t},m),{},{components:n})):a.createElement(c,o({ref:t},m))}));function c(e,t){var n=arguments,l=t&&t.mdxType;if("string"==typeof e||l){var r=n.length,o=new Array(r);o[0]=g;var i={};for(var s in t)hasOwnProperty.call(t,s)&&(i[s]=t[s]);i.originalType=e,i[u]="string"==typeof e?e:l,o[1]=i;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>u,frontMatter:()=>r,metadata:()=>i,toc:()=>s});var a=n(87462),l=(n(67294),n(3905));const r={},o=void 0,i={unversionedId:"data-sources/gnomad-lof-json",id:"version-3.17/data-sources/gnomad-lof-json",title:"gnomad-lof-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/gnomad-lof-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-lof-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/gnomad-lof-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/gnomad-lof-json.md",tags:[],version:"3.17",frontMatter:{}},s=[],p={toc:s},m="wrapper";function u(e){let{components:t,...n}=e;return(0,l.kt)(m,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD":{ \n "pLi":1.00e0,\n "pNull":8.94e-40,\n "pRec":1.84e-16,\n "synZ":-8.44e-2,\n "misZ":5.96e-1,\n "loeuf":1.13e0\n}\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pLi"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pNull"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pRec"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"synZ"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"misZ"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))))}u.isMDXComponent=!0},56249:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>u,frontMatter:()=>r,metadata:()=>i,toc:()=>s});var a=n(87462),l=(n(67294),n(3905));const r={},o=void 0,i={unversionedId:"data-sources/gnomad-small-variants-json",id:"version-3.17/data-sources/gnomad-small-variants-json",title:"gnomad-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/gnomad-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/gnomad-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/gnomad-small-variants-json.md",tags:[],version:"3.17",frontMatter:{}},s=[],p={toc:s},m="wrapper";function u(e){let{components:t,...n}=e;return(0,l.kt)(m,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomad":{ \n "coverage":20,\n "allAf":0.190317,\n "maleAf":0.193,\n "femaleAf": 0.1935,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "maleAn":15096,\n "femaleAn":15700\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "maleAc":2930,\n "femaleAc": 2931,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "maleHc":280,\n "femaleHc":281,\n "controlsAllAf":0.190317,\n "controlsAllAn":30796,\n "controlsAllAc":5861,\n "lowComplexityRegion":true,\n "failedFilter":true\n}\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"coverage"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the controls subset. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for male population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for female population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the controls subset. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for male population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for female population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the controls subset. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the South Asian population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the South Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the South Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"lowComplexityRegion"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant is located in a low complexity region.")))))}u.isMDXComponent=!0},19348:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>s,default:()=>g,frontMatter:()=>i,metadata:()=>p,toc:()=>m});var a=n(87462),l=(n(67294),n(3905)),r=n(56249),o=n(84094);const i={title:"gnomAD"},s=void 0,p={unversionedId:"data-sources/gnomad",id:"version-3.17/data-sources/gnomad",title:"gnomAD",description:"Overview",source:"@site/versioned_docs/version-3.17/data-sources/gnomad.mdx",sourceDirName:"data-sources",slug:"/data-sources/gnomad",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/gnomad",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/gnomad.mdx",tags:[],version:"3.17",frontMatter:{title:"gnomAD"},sidebar:"version-3.17/docs",previous:{title:"FusionCatcher",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/fusioncatcher"},next:{title:"Mitochondrial Heteroplasmy",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/mito-heteroplasmy"}},m=[{value:"Overview",id:"overview",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF extraction",id:"vcf-extraction",children:[],level:3},{value:"Computation",id:"computation",children:[],level:3},{value:"Merging genomes and exomes",id:"merging-genomes-and-exomes",children:[],level:3},{value:"Filters",id:"filters",children:[],level:3},{value:"VCF download instructions",id:"vcf-download-instructions",children:[],level:3},{value:"JSON output",id:"json-output",children:[],level:3}],level:2},{value:"LoF Gene Metrics",id:"lof-gene-metrics",children:[{value:"Tab delimited file example",id:"tab-delimited-file-example",children:[],level:3},{value:"JSON key to TSV column mapping",id:"json-key-to-tsv-column-mapping",children:[],level:3},{value:"Gene symbol update",id:"gene-symbol-update",children:[],level:3},{value:"Conflict resolution",id:"conflict-resolution",children:[],level:3},{value:"Download URL",id:"download-url",children:[],level:3},{value:"JSON output",id:"json-output-1",children:[],level:3}],level:2}],u={toc:m},d="wrapper";function g(e){let{components:t,...n}=e;return(0,l.kt)(d,(0,a.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("h2",{id:"overview"},"Overview"),(0,l.kt)("p",null,"The Genome Aggregation Database (",(0,l.kt)("a",{parentName:"p",href:"https://gnomad.broadinstitute.org/"},"gnomAD"),") is a resource developed by an international coalition of investigators, with the goal of aggregating and harmonizing both exome and genome sequencing data from a wide variety of large-scale sequencing projects, and making summary data available for the wider scientific community."),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Koch, L., 2020. Exploring human genomic diversity with gnomAD. ",(0,l.kt)("em",{parentName:"p"},"Nature Reviews Genetics"),", ",(0,l.kt)("strong",{parentName:"p"},"21(8)"),", pp.448-448."))),(0,l.kt)("h2",{id:"small-variants"},"Small Variants"),(0,l.kt)("h3",{id:"vcf-extraction"},"VCF extraction"),(0,l.kt)("p",null,"We currently extract the following info fields from gnomAD genome and exome VCF files:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},'##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n##INFO=\n')),(0,l.kt)("p",null,"We also extract the following extra fields from gnomAD exome VCF file:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},'##INFO=\n##INFO=\n##INFO=\n')),(0,l.kt)("h3",{id:"computation"},"Computation"),(0,l.kt)("p",null,"Using these, we compute the following:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"Coverage"),(0,l.kt)("li",{parentName:"ul"},"Allele count, Homozygous count, allele number and allele frequencies for:",(0,l.kt)("ul",{parentName:"li"},(0,l.kt)("li",{parentName:"ul"},"Global population"),(0,l.kt)("li",{parentName:"ul"},"African/African Americans"),(0,l.kt)("li",{parentName:"ul"},"Admixed Americans"),(0,l.kt)("li",{parentName:"ul"},"Ashkenazi Jews"),(0,l.kt)("li",{parentName:"ul"},"East Asians"),(0,l.kt)("li",{parentName:"ul"},"Finnish"),(0,l.kt)("li",{parentName:"ul"},"Non-Finnish Europeans"),(0,l.kt)("li",{parentName:"ul"},"South Asian"),(0,l.kt)("li",{parentName:"ul"},"Others (population not assigned)"),(0,l.kt)("li",{parentName:"ul"},"Male"),(0,l.kt)("li",{parentName:"ul"},"Female"),(0,l.kt)("li",{parentName:"ul"},"Controls")))),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Note")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("ul",{parentName:"div"},(0,l.kt)("li",{parentName:"ul"},"Coverage = DP / AN. Frequencies are computed using AC/AN for each population."),(0,l.kt)("li",{parentName:"ul"},"Please note that currently there is no genome sequencing data of south asian (SAS) population available in gnomAD."),(0,l.kt)("li",{parentName:"ul"},"Allele Count, Homozygous count, allele number and allele frequencies for control groups are also provided for the global population.")))),(0,l.kt)("h3",{id:"merging-genomes-and-exomes"},"Merging genomes and exomes"),(0,l.kt)("p",null,"When merging the genomes and exomes, the allele counts and allele numbers will be summed across both of the data sets."),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"info")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("ul",{parentName:"div"},(0,l.kt)("li",{parentName:"ul"},"For GRCh37, Nirvana currently uses gnomAD version 2.1 which contains both genomes and exomes data. Genomes and exomes data are merged in the output."),(0,l.kt)("li",{parentName:"ul"},"For GRCh38, Nirvana currently uses gnomAD version 3.0 which doesn't contain the exomes data. Therefore, only genomes data are presented in the output.")))),(0,l.kt)("h3",{id:"filters"},"Filters"),(0,l.kt)("p",null,"The following strategy will be used when there's a conflict in filter status:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"center"}),(0,l.kt)("th",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"th"},"Genomes PASS")),(0,l.kt)("th",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"th"},"Genomes Filtered")))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"td"},"Exomes PASS")),(0,l.kt)("td",{parentName:"tr",align:"center"},"PASS"),(0,l.kt)("td",{parentName:"tr",align:"center"},"Only use exome data")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"center"},(0,l.kt)("strong",{parentName:"td"},"Exomes Filtered")),(0,l.kt)("td",{parentName:"tr",align:"center"},"Only use genome data"),(0,l.kt)("td",{parentName:"tr",align:"center"},"Filtered")))),(0,l.kt)("h3",{id:"vcf-download-instructions"},"VCF download instructions"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://gnomad.broadinstitute.org/downloads"},"https://gnomad.broadinstitute.org/downloads")),(0,l.kt)("h3",{id:"json-output"},"JSON output"),(0,l.kt)(r.default,{mdxType:"JSONV"}),(0,l.kt)("h2",{id:"lof-gene-metrics"},"LoF Gene Metrics"),(0,l.kt)("h3",{id:"tab-delimited-file-example"},"Tab delimited file example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"gene transcript obs_mis exp_mis oe_mis mu_mis possible_mis obs_mis_pphen exp_mis_pphen oe_mis_pphen possible_mis_pphen obs_syn exp_syn oe_syn mu_syn possible_syn obs_lof mu_lof possible_lof exp_lof pLI pNull pRec oe_lof oe_syn_lower oe_syn_upper oe_mis_lower oe_mis_upper oe_lof_lower oe_lof_upper constraint_flag syn_zmis_z lof_z oe_lof_upper_rank oe_lof_upper_bin oe_lof_upper_bin_6 n_sites classic_caf max_af no_lofs obs_het_lof obs_hom_lof defined p exp_hom_lof classic_caf_afr classic_caf_amr classic_caf_asj classic_caf_eas classic_caf_fin classic_caf_nfe classic_caf_oth classic_caf_sas p_afr p_amr p_asj p_eas p_fin p_nfep_oth p_sas transcript_type gene_id transcript_level cds_length num_coding_exons gene_type gene_length exac_pLI exac_obs_lof exac_exp_lof exac_oe_lof brain_expression chromosome start_positionend_position\nMED13 ENST00000397786 871 1.1178e+03 7.7921e-01 5.5598e-05 14195 314 5.2975e+02 5.9273e-01 6708 422 3.8753e+02 1.0890e+00 1.9097e-05 4248 0 4.9203e-06 1257 9.8429e+01 1.0000e+00 8.9436e-40 1.8383e-16 0.0000e+00 1.0050e+00 1.1800e+00 7.3600e-01 8.2400e-01 0.0000e+00 3.0000e-02 -1.3765e+00 2.6232e+00 9.1935e+00 0 0 0 2 1.2058e-05 8.0492e-06 124782 3 0 124785 1.2021e-05 1.8031e-05 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 9.2812e-05 8.8571e-06 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 0.0000e+00 9.2760e-05 8.8276e-06 0.0000e+00 0.0000e+00 protein_coding ENSG00000108510 2 6522 30 protein_coding 122678 1.0000e+00 0 6.4393e+01 0.0000e+00 NA 17 60019966 60142643\n")),(0,l.kt)("h3",{id:"json-key-to-tsv-column-mapping"},"JSON key to TSV column mapping"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"JSON key"),(0,l.kt)("th",{parentName:"tr",align:null},"TSV column"),(0,l.kt)("th",{parentName:"tr",align:null},"Description"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pLi"),(0,l.kt)("td",{parentName:"tr",align:null},"pLI"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of a single loss-of-function variant (like haploinsufficient genes, observed ~ 0.1*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pNull"),(0,l.kt)("td",{parentName:"tr",align:null},"pNull"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being completely tolerant of loss of function variation (observed = expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"pRec"),(0,l.kt)("td",{parentName:"tr",align:null},"pRec"),(0,l.kt)("td",{parentName:"tr",align:null},"probability of being intolerant of two loss of function variants (like recessive genes, observed ~ 0.5*expected)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"synZ"),(0,l.kt)("td",{parentName:"tr",align:null},"syn_z"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected synonymous Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"misZ"),(0,l.kt)("td",{parentName:"tr",align:null},"mis_z"),(0,l.kt)("td",{parentName:"tr",align:null},"corrected missense Z score")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"loeuf"),(0,l.kt)("td",{parentName:"tr",align:null},"oe_lof_upper"),(0,l.kt)("td",{parentName:"tr",align:null},"loss of function observed/expected upper bound fraction (LOEUF)")))),(0,l.kt)("h3",{id:"gene-symbol-update"},"Gene symbol update"),(0,l.kt)("p",null,"The input file provides Ensembl gene ids for each entry. We observed that they were unique while gene symbols may be repeated (multiple lines may have the same gene symbol). Since Ensembl gene Ids are more stable, and Nirvana transcript cache data contains Ensembl gene ids, we use these ids to extract the gene symbols from the transcript cache. For example, if ENSG0001 has gene symbol GENE1 in the input but Nirvana cache say ENSG0001 maps to GENE2, we use GENE2 as the gene symbol for that entry."),(0,l.kt)("h3",{id:"conflict-resolution"},"Conflict resolution"),(0,l.kt)("p",null,"gnomAD uses Ensembl GeneID as unique identifiers in the ",(0,l.kt)("a",{parentName:"p",href:"https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz"},"source file")," but Nirvana uses HGNC gene symbols. Multiple Ensembl GeneIDs can map to the same HGNC symbol and therefore may result is conflict."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"MDGA2 ENST00000426342 306 4.0043e+02 7.6419e-01 2.1096e-05 4724 78 1.6525e+02 4.7202e-01 1923 125 1.3737e+02 9.0993e-01 7.1973e-06 1413 4 2.0926e-06 453 3.8316e+01 9.9922e-01 8.6490e-12 7.8128e-04 1.0440e-01 7.8600e-01 1.0560e+00 6.9500e-01 8.4000e-01 5.0000e-02 2.3900e-01 8.2988e-01 1.6769e+00 5.1372e+00 1529 0 0 7 2.8103e-05 4.0317e-06 124784 7 0 124791 2.8047e-05 9.8167e-05 0.0000e+00 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 3.5391e-05 1.6672e-04 3.2680e-05 0.0000e+00 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 3.5308e-05 1.6492e-04 3.2678e-05 protein_coding ENSG00000139915 2 2181 13 protein_coding 835332 9.9322e-01 3 2.7833e+01 1.0779e-01 NA 14 47308826 48144157\nMDGA2 ENST00000439988 438 5.5311e+02 7.9189e-01 2.9490e-05 6608 105 2.0496e+02 5.1228e-01 2386 180 1.9491e+02 9.2351e-01 9.8371e-06 2048 11 2.8074e-06 627 5.1882e+01 6.6457e-01 5.5841e-10 3.3543e-01 2.1202e-01 8.1700e-01 1.0450e+00 7.3100e-01 8.5700e-01 1.3200e-01 3.5100e-01 8.3940e-01 1.7393e+00 5.2595e+00 2989 1 0 9 3.6173e-05 4.0463e-06 124782 9 0 124791 3.6061e-05 1.6228e-04 6.4986e-05 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 4.4275e-05 1.6672e-04 3.2680e-05 6.4577e-05 2.8962e-05 0.0000e+00 0.0000e+00 0.0000e+00 4.4135e-05 1.6492e-04 3.2678e-05 protein_coding ENSG00000272781 3 3075 17 protein_coding 832866 NA NA NA NA NA 14 47311134 48143999\n")),(0,l.kt)("p",null,'In such cases, Nirvana chooses the entry with the smallest "LOEUF" value. The reason for choosing this value can be highlighted by the following table:'),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"right"},"LOEUF decile"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Haplo-insufficient"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Autosomal Dominant"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Autosomal Recessive"),(0,l.kt)("th",{parentName:"tr",align:"right"},"Olfactory Genes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"0-10%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"104"),(0,l.kt)("td",{parentName:"tr",align:"right"},"140"),(0,l.kt)("td",{parentName:"tr",align:"right"},"36"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"10-20%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"47"),(0,l.kt)("td",{parentName:"tr",align:"right"},"128"),(0,l.kt)("td",{parentName:"tr",align:"right"},"72"),(0,l.kt)("td",{parentName:"tr",align:"right"},"1")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"20-30%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"17"),(0,l.kt)("td",{parentName:"tr",align:"right"},"86"),(0,l.kt)("td",{parentName:"tr",align:"right"},"112"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"30-40%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"8"),(0,l.kt)("td",{parentName:"tr",align:"right"},"80"),(0,l.kt)("td",{parentName:"tr",align:"right"},"173"),(0,l.kt)("td",{parentName:"tr",align:"right"},"4")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"40-50%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"7"),(0,l.kt)("td",{parentName:"tr",align:"right"},"65"),(0,l.kt)("td",{parentName:"tr",align:"right"},"206"),(0,l.kt)("td",{parentName:"tr",align:"right"},"8")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"50-60%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"4"),(0,l.kt)("td",{parentName:"tr",align:"right"},"54"),(0,l.kt)("td",{parentName:"tr",align:"right"},"207"),(0,l.kt)("td",{parentName:"tr",align:"right"},"6")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"60-70%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0"),(0,l.kt)("td",{parentName:"tr",align:"right"},"46"),(0,l.kt)("td",{parentName:"tr",align:"right"},"154"),(0,l.kt)("td",{parentName:"tr",align:"right"},"18")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"70-80%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"2"),(0,l.kt)("td",{parentName:"tr",align:"right"},"49"),(0,l.kt)("td",{parentName:"tr",align:"right"},"120"),(0,l.kt)("td",{parentName:"tr",align:"right"},"49")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"80-90%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0"),(0,l.kt)("td",{parentName:"tr",align:"right"},"34"),(0,l.kt)("td",{parentName:"tr",align:"right"},"58"),(0,l.kt)("td",{parentName:"tr",align:"right"},"96")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"right"},"90-100%"),(0,l.kt)("td",{parentName:"tr",align:"right"},"0"),(0,l.kt)("td",{parentName:"tr",align:"right"},"26"),(0,l.kt)("td",{parentName:"tr",align:"right"},"40"),(0,l.kt)("td",{parentName:"tr",align:"right"},"174")))),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Note")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("ul",{parentName:"div"},(0,l.kt)("li",{parentName:"ul"},"Table source: ",(0,l.kt)("a",{parentName:"li",href:"https://www.biorxiv.org/content/biorxiv/early/2019/01/28/531210.full-text.pdf"},"https://www.biorxiv.org/content/biorxiv/early/2019/01/28/531210.full-text.pdf")),(0,l.kt)("li",{parentName:"ul"},"This table indicates that lower LOEUF scores have more deleterious effect on genes."),(0,l.kt)("li",{parentName:"ul"},"Only 15 out of 19685 genes have conflicting entries.")))),(0,l.kt)("p",null,(0,l.kt)("strong",{parentName:"p"},"List of genes with conflicting entries")),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},'MDGA2:\n {"pLI":9.99e-1,"pRec":7.81e-4,"pNull":8.65e-12,"synZ":8.30e-1,"misZ":1.68e0,"loeuf":2.39e-1}\n {"pLI":6.65e-1,"pRec":3.35e-1,"pNull":5.58e-10,"synZ":8.39e-1,"misZ":1.74e0,"loeuf":3.51e-1}\nCRYBG3:\n {"pLI":9.27e-5,"pRec":1.00e0,"pNull":1.88e-7,"synZ":1.82e0,"misZ":4.68e-1,"loeuf":4.93e-1}\n {"pLI":2.69e-4,"pRec":1.00e0,"pNull":1.20e-4,"synZ":2.63e0,"misZ":9.80e-1,"loeuf":5.98e-1}\nCHTF8:\n {"pLI":8.29e-1,"pRec":1.67e-1,"pNull":3.21e-3,"synZ":1.94e0,"misZ":9.48e-1,"loeuf":5.13e-1}\n {"pLI":3.73e-1,"pRec":5.84e-1,"pNull":4.29e-2,"synZ":3.33e-1,"misZ":2.91e-1,"loeuf":9.92e-1}\nSEPT1:\n {"pLI":6.77e-8,"pRec":8.90e-1,"pNull":1.10e-1,"synZ":1.58e-1,"misZ":1.57e0,"loeuf":9.68e-1}\n {"pLI":1.96e-8,"pRec":6.71e-1,"pNull":3.29e-1,"synZ":1.68e-1,"misZ":1.41e0,"loeuf":1.08e0}\nARL14EPL:\n {"pLI":3.48e-2,"pRec":8.38e-1,"pNull":1.28e-1,"synZ":3.56e-1,"misZ":-1.87e-1,"loeuf":1.23e0}\n {"pLI":3.23e-2,"pRec":8.29e-1,"pNull":1.38e-1,"synZ":1.15e0,"misZ":-4.05e-1,"loeuf":1.26e0}\nUGT2A1:\n {"pLI":2.90e-13,"pRec":1.40e-1,"pNull":8.60e-1,"synZ":-1.29e0,"misZ":-1.77e0,"loeuf":1.18e0}\n {"pLI":3.88e-17,"pRec":2.87e-3,"pNull":9.97e-1,"synZ":-8.00e-1,"misZ":-1.40e0,"loeuf":1.53e0}\nLTB4R2:\n {"pLI":4.39e-4,"pRec":6.71e-1,"pNull":3.29e-1,"synZ":-5.24e-1,"misZ":-2.96e-1,"loeuf":1.40e0}\n {"pLI":1.38e-5,"pRec":4.12e-1,"pNull":5.88e-1,"synZ":-4.58e-1,"misZ":-2.02e-1,"loeuf":1.54e0}\nCDRT1:\n {"pLI":4.98e-14,"pRec":5.31e-1,"pNull":4.69e-1,"synZ":8.18e-1,"misZ":6.57e-1,"loeuf":1.00e0}\n {"pLI":3.50e-3,"pRec":6.37e-1,"pNull":3.59e-1,"synZ":4.89e-1,"misZ":6.90e-1,"loeuf":1.63e0}\nMUC3A:\n {"pLI":1.48e-10,"pRec":5.76e-1,"pNull":4.24e-1,"synZ":5.81e-2,"misZ":-6.01e-1,"loeuf":1.06e0}\n {"pLI":4.03e-1,"pRec":4.79e-1,"pNull":1.17e-1,"synZ":4.05e-2,"misZ":-1.60e-1,"loeuf":1.70e0}\nCOG8:\n {"pLI":2.97e-9,"pRec":5.04e-1,"pNull":4.96e-1,"synZ":-1.35e0,"misZ":-9.37e-2,"loeuf":1.13e0}\n {"pLI":2.31e-3,"pRec":5.47e-1,"pNull":4.50e-1,"synZ":-4.94e-1,"misZ":-1.48e-1,"loeuf":1.76e0}\nAC006486.1:\n {"pLI":9.37e-1,"pRec":6.27e-2,"pNull":2.47e-4,"synZ":1.44e0,"misZ":2.12e0,"loeuf":3.41e-1}\n {"pLI":1.14e-1,"pRec":6.16e-1,"pNull":2.70e-1,"synZ":-7.57e-2,"misZ":8.33e-2,"loeuf":1.84e0}\nAL645922.1:\n {"pLI":4.67e-16,"pRec":1.00e0,"pNull":4.15e-5,"synZ":7.99e-1,"misZ":1.61e0,"loeuf":6.92e-1}\n {"pLI":1.60e-3,"pRec":2.78e-1,"pNull":7.21e-1,"synZ":-7.30e-2,"misZ":3.21e-1,"loeuf":1.96e0}\nNBPF20:\n {"pLI":1.42e-7,"pRec":3.40e-2,"pNull":9.66e-1,"synZ":-1.86e0,"misZ":-2.88e0,"loeuf":1.97e0}\n {"pLI":1.92e-22,"pRec":7.96e-6,"pNull":1.00e0,"synZ":-9.73e0,"misZ":-7.67e0,"loeuf":1.97e0}\nPRAMEF11:\n {"pLI":6.16e-4,"pRec":7.42e-1,"pNull":2.58e-1,"synZ":-4.02e0,"misZ":-3.69e0,"loeuf":1.31e0}\n {"synZ":-3.33e0,"misZ":-2.59e0}\nFAM231D:\n {"synZ":-1.98e0,"misZ":-1.44e0}\n {"synZ":1.07e0,"misZ":3.13e-1}\n')),(0,l.kt)("p",null,(0,l.kt)("strong",{parentName:"p"},"Conflict resolution")),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"Pick the entry with the lowest LOEUF score"),(0,l.kt)("li",{parentName:"ul"},"If the same, pick the lowest pLI"),(0,l.kt)("li",{parentName:"ul"},"Otherwise pick the entry with the max absolute value of synZ + misZ")),(0,l.kt)("h3",{id:"download-url"},"Download URL"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz"},"https://storage.googleapis.com/gnomad-public/release/2.1.1/constraint/gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz")),(0,l.kt)("h3",{id:"json-output-1"},"JSON output"),(0,l.kt)(o.default,{mdxType:"JSONG"}))}g.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/c838d36d.7cf9ec25.js b/assets/js/c838d36d.7cf9ec25.js deleted file mode 100644 index 15fe1fba..00000000 --- a/assets/js/c838d36d.7cf9ec25.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7989],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>h});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},d=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},u="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,d=l(e,["components","mdxType","originalType","parentName"]),u=c(n),m=i,h=u["".concat(s,".").concat(m)]||u[m]||p[m]||r;return n?a.createElement(h,o(o({ref:t},d),{},{components:n})):a.createElement(h,o({ref:t},d))}));function h(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,o=new Array(r);o[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[u]="string"==typeof e?e:i,o[1]=l;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>u,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={title:"Getting Started"},o=void 0,l={unversionedId:"introduction/getting-started",id:"version-3.17/introduction/getting-started",title:"Getting Started",description:"Nirvana is written in C# using .NET Core (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files.",source:"@site/versioned_docs/version-3.17/introduction/getting-started.md",sourceDirName:"introduction",slug:"/introduction/getting-started",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/introduction/getting-started",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/introduction/getting-started.md",tags:[],version:"3.17",frontMatter:{title:"Getting Started"},sidebar:"version-3.17/docs",previous:{title:"Dependencies",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/introduction/dependencies"},next:{title:"Parsing Nirvana JSON",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/introduction/parsing-json"}},s=[{value:"Quick Start",id:"quick-start",children:[],level:2},{value:"Getting Nirvana",id:"getting-nirvana",children:[{value:"Compile from Source",id:"compile-from-source",children:[],level:3},{value:"GitHub Release Notes",id:"github-release-notes",children:[],level:3},{value:"Docker",id:"docker",children:[],level:3}],level:2},{value:"Downloading the data files",id:"downloading-the-data-files",children:[],level:2},{value:"Download a test VCF file",id:"download-a-test-vcf-file",children:[],level:2},{value:"Running Nirvana",id:"running-nirvana",children:[],level:2}],c={toc:s},d="wrapper";function u(e){let{components:t,...n}=e;return(0,i.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("p",null,"Nirvana is written in C# using ",(0,i.kt)("a",{parentName:"p",href:"https://www.microsoft.com/net/download/core"},".NET Core")," (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files."),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Nirvana currently uses .NET Core 3.1 or later. Please make sure that you have the most current runtime from the ",(0,i.kt)("a",{parentName:"p",href:"https://www.microsoft.com/net/download/core"},".NET Core downloads")," page."))),(0,i.kt)("h2",{id:"quick-start"},"Quick Start"),(0,i.kt)("p",null,"If you want to get started right away, we've created ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestNirvana.sh"},"a script")," that downloads Nirvana, compiles it, and starts annotating a test file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"curl -O https://illumina.github.io/NirvanaDocumentation/files/TestNirvana.sh\nbash ./TestNirvana.sh\n")),(0,i.kt)("p",null,"We have verified that this script works on Windows (using Git Bash or WSL), Linux, and Mac OS X."),(0,i.kt)("h2",{id:"getting-nirvana"},"Getting Nirvana"),(0,i.kt)("h3",{id:"compile-from-source"},"Compile from Source"),(0,i.kt)("p",null,"The following will grab the latest version of Nirvana from GitHub and compile it using the .NET Core compiler:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"git clone https://github.com/Illumina/Nirvana.git\ncd Nirvana\ndotnet build -c Release\n")),(0,i.kt)("h3",{id:"github-release-notes"},"GitHub Release Notes"),(0,i.kt)("p",null,"Alternatively, you can grab the latest binaries from our ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/Illumina/Nirvana/releases"},"GitHub Releases")," page:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"mkdir -p Nirvana/Data\ncd Nirvana\nunzip Nirvana-3.16.1-dotnet-3.1.0.zip\n")),(0,i.kt)("h3",{id:"docker"},"Docker"),(0,i.kt)("p",null,"You can find us on ",(0,i.kt)("a",{parentName:"p",href:"https://hub.docker.com/repository/docker/annotation/nirvana"},"Docker Hub")," under ",(0,i.kt)("inlineCode",{parentName:"p"},"annotation/nirvana"),":"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"We think Docker is fantastic. However, because our data files are usually accessed through a Docker volume, there is a noticeable performance penalty when running Nirvana in Docker."))),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"mkdir -p Nirvana/Data\ncd Nirvana\ndocker pull annotation/nirvana:3.14\n")),(0,i.kt)("p",null,"For Docker, we have special instructions for running the Downloader:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"sudo docker run --rm -it -v Data:/scratch annotation/nirvana:3.14 dotnet \\\n /opt/nirvana/Downloader.dll --ga GRCh37 -o /scratch\n")),(0,i.kt)("p",null,"Similarly, we have special instructions for running Nirvana (Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.vcf.gz"},"a toy VCF")," in case you need it):"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"sudo docker run --rm -it -v Data:/scratch annotation/nirvana:3.14 dotnet \\\n /opt/nirvana/Nirvana.dll -c /scratch/Cache/GRCh37/Both \\\n -r /scratch/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n --sd /scratch/SupplementaryAnnotation/GRCh37 \\\n -i /scratch/HiSeq.10000.vcf.gz -o /scratch/HiSeq\n")),(0,i.kt)("h2",{id:"downloading-the-data-files"},"Downloading the data files"),(0,i.kt)("p",null,"To download the latest data sources (or update the ones that you already have), use the following command to automate the download from S3:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp3.1/Downloader.dll \\\n --ga GRCh37 \\\n -o Data\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"--ga")," argument specifies the genome assembly which can be ",(0,i.kt)("inlineCode",{parentName:"li"},"GRCh37"),", ",(0,i.kt)("inlineCode",{parentName:"li"},"GRCh38"),", or ",(0,i.kt)("inlineCode",{parentName:"li"},"both"),"."),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output directory")),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Glitches in the Matrix")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Every once in a while, the download process does not go smoothly. Perhaps the internet connection cut out or you ran out of disk space. The Downloader attempts to detect these situations by checking the file sizes at the very end. If you see that a file was marked ",(0,i.kt)("inlineCode",{parentName:"p"},"truncated"),", try fixing the root cause and running the downloader again."))),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"From time to time, you can re-run the Downloader to get the latest annotation files. It will only download the files that changed."))),(0,i.kt)("h2",{id:"download-a-test-vcf-file"},"Download a test VCF file"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.vcf.gz"},"a toy VCF file")," you can play around with:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"curl -O https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.vcf.gz\n")),(0,i.kt)("h2",{id:"running-nirvana"},"Running Nirvana"),(0,i.kt)("p",null,"Once you have downloaded the data sets, use the following command to annotate your VCF:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp3.1/Nirvana.dll \\\n -c Data/Cache/GRCh37/Both \\\n --sd Data/SupplementaryAnnotation/GRCh37 \\\n -r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n -i HiSeq.10000.vcf.gz \\\n -o HiSeq.10000\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-c")," argument specifies the cache prefix"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"--sd")," argument specifies the supplementary annotation directory"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-r")," argument specifies the compressed reference path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input VCF path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output filename prefix")),(0,i.kt)("p",null,"When running Nirvana, performance metrics are shown as it evaluates each chromosome in the input VCF file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"---------------------------------------------------------------------------\nNirvana (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.16.1\n---------------------------------------------------------------------------\n\nInitialization Time Positions/s\n---------------------------------------------------------------------------\nCache 00:00:01.2\nSA Position Scan 00:00:00.1 55,270\n\nReference Preload Annotation Variants/s\n---------------------------------------------------------------------------\nchr1 00:00:00.1 00:00:01.5 6,323\n\nSummary Time Percent\n---------------------------------------------------------------------------\nInitialization 00:00:01.3 23.9 %\nPreload 00:00:00.1 2.9 %\nAnnotation 00:00:01.5 27.2 %\n\nPeak memory usage: 1.434 GB\nTime: 00:00:05.2\n")),(0,i.kt)("p",null,"The output will be a JSON file called ",(0,i.kt)("inlineCode",{parentName:"p"},"HiSeq.10000.json.gz"),". Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.json.gz"},"the full JSON file"),"."))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/c870c102.a173782f.js b/assets/js/c870c102.a173782f.js deleted file mode 100644 index 0c5d8cb9..00000000 --- a/assets/js/c870c102.a173782f.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1790],{3905:(t,e,n)=>{n.d(e,{Zo:()=>c,kt:()=>d});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function o(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var i=r.createContext({}),m=function(t){var e=r.useContext(i),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},c=function(t){var e=m(t.components);return r.createElement(i.Provider,{value:e},t.children)},s="mdxType",f={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},u=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,l=t.originalType,i=t.parentName,c=p(t,["components","mdxType","originalType","parentName"]),s=m(n),u=a,d=s["".concat(i,".").concat(u)]||s[u]||f[u]||l;return n?r.createElement(d,o(o({ref:e},c),{},{components:n})):r.createElement(d,o({ref:e},c))}));function d(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var l=n.length,o=new Array(l);o[0]=u;var p={};for(var i in e)hasOwnProperty.call(e,i)&&(p[i]=e[i]);p.originalType=t,p[s]="string"==typeof t?t:a,o[1]=p;for(var m=2;m{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>s,frontMatter:()=>l,metadata:()=>p,toc:()=>i});var r=n(87462),a=(n(67294),n(3905));const l={},o=void 0,p={unversionedId:"data-sources/1000Genomes-snv-json",id:"version-3.2.5/data-sources/1000Genomes-snv-json",title:"1000Genomes-snv-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.2.5/data-sources/1000Genomes-snv-json.md",sourceDirName:"data-sources",slug:"/data-sources/1000Genomes-snv-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/1000Genomes-snv-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/data-sources/1000Genomes-snv-json.md",tags:[],version:"3.2.5",frontMatter:{}},i=[],m={toc:i},c="wrapper";function s(t){let{components:e,...n}=t;return(0,a.kt)(c,(0,r.Z)({},m,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"oneKg":{\n "allAf":0.200879,\n "afrAf":0.210287,\n "amrAf":0.139769,\n "easAf":0.275794,\n "eurAf":0.181909,\n "sasAf":0.173824,\n "allAn":5008,\n "afrAn":1322,\n "amrAn":694,\n "easAn":1008,\n "eurAn":1006,\n "sasAn":978,\n "allAc":1006,\n "afrAc":278,\n "amrAc":97,\n "easAc":278,\n "eurAc":183,\n "sasAc":170\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for all populations. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"allAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for all populations. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"allAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for all populations. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"afrAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the African super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"afrAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the African super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"afrAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the African super population. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"amrAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"amrAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the Ad Mixed American super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"amrAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the Ad Mixed American super population. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"easAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"easAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the East Asian super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"easAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the East Asian super population. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"eurAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the European super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"eurAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the European super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"eurAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the European super population. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"sasAf"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele frequency for the South Asian super population. Range: 0 - 1.0")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"sasAc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele count for the South Asian super population. Integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"sasAn"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"allele number for the South Asian super population. Non-zero integer.")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/ca42a9aa.47bf73d2.js b/assets/js/ca42a9aa.47bf73d2.js deleted file mode 100644 index 58a665ed..00000000 --- a/assets/js/ca42a9aa.47bf73d2.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8459],{3905:(e,t,n)=>{n.d(t,{Zo:()=>s,kt:()=>f});var r=n(67294);function o(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function a(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}var i=r.createContext({}),p=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},s=function(e){var t=p(e.components);return r.createElement(i.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,o=e.mdxType,a=e.originalType,i=e.parentName,s=c(e,["components","mdxType","originalType","parentName"]),u=p(n),d=o,f=u["".concat(i,".").concat(d)]||u[d]||m[d]||a;return n?r.createElement(f,l(l({ref:t},s),{},{components:n})):r.createElement(f,l({ref:t},s))}));function f(e,t){var n=arguments,o=t&&t.mdxType;if("string"==typeof e||o){var a=n.length,l=new Array(a);l[0]=d;var c={};for(var i in t)hasOwnProperty.call(t,i)&&(c[i]=t[i]);c.originalType=e,c[u]="string"==typeof e?e:o,l[1]=c;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>a,metadata:()=>c,toc:()=>i});var r=n(87462),o=(n(67294),n(3905));const a={},l=void 0,c={unversionedId:"data-sources/phylop-json",id:"version-3.17/data-sources/phylop-json",title:"phylop-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/phylop-json.md",sourceDirName:"data-sources",slug:"/data-sources/phylop-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/phylop-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/phylop-json.md",tags:[],version:"3.17",frontMatter:{}},i=[],p={toc:i},s="wrapper";function u(e){let{components:t,...n}=e;return(0,o.kt)(s,(0,r.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-json"},'"variants":[\n {\n "vid":"2:48010488:A",\n "chromosome":"chr2",\n "begin":48010488,\n "end":48010488,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "phylopScore":0.459\n }\n] \n')),(0,o.kt)("table",null,(0,o.kt)("thead",{parentName:"table"},(0,o.kt)("tr",{parentName:"thead"},(0,o.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,o.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,o.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,o.kt)("tbody",{parentName:"table"},(0,o.kt)("tr",{parentName:"tbody"},(0,o.kt)("td",{parentName:"tr",align:"left"},"phylopScore"),(0,o.kt)("td",{parentName:"tr",align:"center"},"float"),(0,o.kt)("td",{parentName:"tr",align:"left"},"range: -14.08 to 6.424")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/cbf25a1c.8a36fb0b.js b/assets/js/cbf25a1c.8a36fb0b.js deleted file mode 100644 index 427c2518..00000000 --- a/assets/js/cbf25a1c.8a36fb0b.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3460],{3905:(e,t,n)=>{n.d(t,{Zo:()=>m,kt:()=>f});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),p=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},m=function(e){var t=p(e.components);return a.createElement(s.Provider,{value:t},e.children)},c="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,s=e.parentName,m=l(e,["components","mdxType","originalType","parentName"]),c=p(n),u=r,f=c["".concat(s,".").concat(u)]||c[u]||d[u]||i;return n?a.createElement(f,o(o({ref:t},m),{},{components:n})):a.createElement(f,o({ref:t},m))}));function f(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[c]="string"==typeof e?e:r,o[1]=l;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>c,frontMatter:()=>i,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const i={},o=void 0,l={unversionedId:"data-sources/omim-json",id:"version-3.17/data-sources/omim-json",title:"omim-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/omim-json.md",sourceDirName:"data-sources",slug:"/data-sources/omim-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/omim-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/omim-json.md",tags:[],version:"3.17",frontMatter:{}},s=[{value:"Phenotype",id:"phenotype",children:[],level:4},{value:"Mapping",id:"mapping",children:[],level:4},{value:"Inheritance",id:"inheritance",children:[],level:4},{value:"Comments",id:"comments",children:[],level:4}],p={toc:s},m="wrapper";function c(e){let{components:t,...n}=e;return(0,r.kt)(m,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"omim":[ \n { \n "mimNumber":600678,\n "geneName":"MutS, E. coli, homolog of, 6",\n "description":"The transcription factor p53 responds to diverse cellular stresses to regulate target genes that induce cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. In addition, p53 appears to induce apoptosis through nontranscriptional cytoplasmic processes. In unstressed cells, p53 is kept inactive essentially through the actions of the ubiquitin ligase MDM2, which inhibits p53 transcriptional activity and ubiquitinates p53 to promote its degradation. Numerous posttranslational modifications modulate p53 activity, most notably phosphorylation and acetylation. Several less abundant p53 isoforms also modulate p53 activity. Activity of p53 is ubiquitously lost in human cancer either by mutation of the p53 gene itself or by loss of cell signaling upstream or downstream of p53 (Toledo and Wahl, 2006; Bourdon, 2007; Vousden and Lane, 2007)",\n "phenotypes":[ \n { \n "mimNumber":614350,\n "phenotype":"Colorectal cancer, hereditary nonpolyposis, type 5",\n "description":"Hereditary nonpolyposis colorectal cancer type 5 is a cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal dominant"\n ]\n },\n { \n "mimNumber":608089,\n "phenotype":"Endometrial cancer, familial",\n "mapping":"molecular basis of the disorder is known"\n },\n { \n "mimNumber":276300,\n "phenotype":"Mismatch repair cancer syndrome",\n "description":"Constitutional mismatch repair deficiency is a rare childhood cancer predisposition syndrome ...",\n "mapping":"molecular basis of the disorder is known",\n "inheritances":[ \n "Autosomal recessive"\n ],\n "comments" : [\n "contribute to susceptibility to multifactorial disorders or to susceptibility to infection",\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM ID for gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"geneName"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene name")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"left"},"object array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#phenotype"},"Phenotype entry below"))))),(0,r.kt)("h4",{id:"phenotype"},"Phenotype"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mimNumber"),(0,r.kt)("td",{parentName:"tr",align:"left"},"int"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"description"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"mapping"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#mapping"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"inheritance"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#inheritance"},"possible values below"))),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"comments"),(0,r.kt)("td",{parentName:"tr",align:"left"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see ",(0,r.kt)("a",{parentName:"td",href:"#comments"},"possible values below"))))),(0,r.kt)("h4",{id:"mapping"},"Mapping"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"disorder was positioned by mapping of the wild type gene"),(0,r.kt)("li",{parentName:"ol"},"disease phenotype itself was mapped"),(0,r.kt)("li",{parentName:"ol"},"molecular basis of the disorder is known"),(0,r.kt)("li",{parentName:"ol"},"disorder is a chromosome deletion or duplication syndrome")),(0,r.kt)("h4",{id:"inheritance"},"Inheritance"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"autosomal recessive"),(0,r.kt)("li",{parentName:"ul"},"autosomal dominant")),(0,r.kt)("h4",{id:"comments"},"Comments"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"contributes to the susceptibility to multifactorial disorders"),(0,r.kt)("li",{parentName:"ul"},"variations that lead to apparently abnormal laboratory test values"),(0,r.kt)("li",{parentName:"ul"},"unconfirmed mapping")))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/cc05e3ba.e4571c30.js b/assets/js/cc05e3ba.e4571c30.js deleted file mode 100644 index 07884d7e..00000000 --- a/assets/js/cc05e3ba.e4571c30.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3288,3514,8841,2218],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>g});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function r(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):r(r({},t),e)),n},d=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},p="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,l=e.originalType,s=e.parentName,d=o(e,["components","mdxType","originalType","parentName"]),p=c(n),m=i,g=p["".concat(s,".").concat(m)]||p[m]||u[m]||l;return n?a.createElement(g,r(r({ref:t},d),{},{components:n})):a.createElement(g,r({ref:t},d))}));function g(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var l=n.length,r=new Array(l);r[0]=m;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[p]="string"==typeof e?e:i,r[1]=o;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>r,default:()=>p,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const l={},r=void 0,o={unversionedId:"data-sources/clingen-dosage-json",id:"version-3.17/data-sources/clingen-dosage-json",title:"clingen-dosage-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/clingen-dosage-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-dosage-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/clingen-dosage-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/clingen-dosage-json.md",tags:[],version:"3.17",frontMatter:{}},s=[],c={toc:s},d="wrapper";function p(e){let{components:t,...n}=e;return(0,i.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clingenDosageSensitivityMap": [{\n "chromosome": "15",\n "begin": 30900686,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 0.33994\n},\n{\n "chromosome": "15",\n "begin": 31727418,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "dosage sensitivity unlikely",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 1\n}]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Field"),(0,i.kt)("th",{parentName:"tr",align:null},"Type"),(0,i.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clingenDosageSensitivityMap"),(0,i.kt)("td",{parentName:"tr",align:null},"object array"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"begin"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"end"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"haploinsufficiency"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"triplosensitivity"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"(same as haploinsufficiency)\xa0")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"haploinsufficiency and triplosensitivity")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no evidence to suggest that dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"little evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"emerging evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"gene associated with autosomal recessive phenotype"),(0,i.kt)("li",{parentName:"ul"},"dosage sensitivity unlikely")))}p.isMDXComponent=!0},1890:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>r,default:()=>p,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const l={},r=void 0,o={unversionedId:"data-sources/clingen-gene-validity-json",id:"version-3.17/data-sources/clingen-gene-validity-json",title:"clingen-gene-validity-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/clingen-gene-validity-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-gene-validity-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/clingen-gene-validity-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/clingen-gene-validity-json.md",tags:[],version:"3.17",frontMatter:{}},s=[],c={toc:s},d="wrapper";function p(e){let{components:t,...n}=e;return(0,i.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clingenGeneValidity":[\n {\n "diseaseId":"MONDO_0007893",\n "disease":"Noonan syndrome with multiple lentigines",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n },\n {\n "diseaseId":"MONDO_0015280",\n "disease":"cardiofaciocutaneous syndrome",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Field"),(0,i.kt)("th",{parentName:"tr",align:null},"Type"),(0,i.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clingenGeneValidity"),(0,i.kt)("td",{parentName:"tr",align:null},"object"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"diseaseId"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Monarch Disease Ontology ID (MONDO)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"disease"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"disease label")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"classification"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"see below for possible values")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"classificationDate"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"yyyy-MM-dd")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"classification")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no reported evidence"),(0,i.kt)("li",{parentName:"ul"},"disputed"),(0,i.kt)("li",{parentName:"ul"},"limited"),(0,i.kt)("li",{parentName:"ul"},"moderate"),(0,i.kt)("li",{parentName:"ul"},"definitive"),(0,i.kt)("li",{parentName:"ul"},"strong"),(0,i.kt)("li",{parentName:"ul"},"refuted")))}p.isMDXComponent=!0},35295:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>r,default:()=>p,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const l={},r=void 0,o={unversionedId:"data-sources/clingen-json",id:"version-3.17/data-sources/clingen-json",title:"clingen-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/clingen-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/clingen-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/clingen-json.md",tags:[],version:"3.17",frontMatter:{}},s=[],c={toc:s},d="wrapper";function p(e){let{components:t,...n}=e;return(0,i.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clingen":[\n {\n "chromosome":"17",\n "begin":525,\n "end":14667519,\n "variantType":"copy_number_gain",\n "id":"nsv996083",\n "clinicalInterpretation":"pathogenic",\n "observedGains":1,\n "validated":true,\n "phenotypes":[\n "Intrauterine growth retardation"\n ],\n "phenotypeIds":[\n "HP:0001511",\n "MedGen:C1853481"\n ],\n "reciprocalOverlap":0.00131\n },\n {\n "chromosome":"17",\n "begin":45835,\n "end":7600330,\n "variantType":"copy_number_loss",\n "id":"nsv869419",\n "clinicalInterpretation":"pathogenic",\n "observedLosses":1,\n "validated":true,\n "phenotypes":[\n "Developmental delay AND/OR other significant developmental or morphological phenotypes"\n ],\n "reciprocalOverlap":0.00254\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Field"),(0,i.kt)("th",{parentName:"tr",align:null},"Type"),(0,i.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clingen"),(0,i.kt)("td",{parentName:"tr",align:null},"object array"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"begin"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"end"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"variantType"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Any of the\xa0sequence alterations defined here.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"id"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Identifier from the data source. Alternatively a VID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clinicalInterpretation"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"observedGains"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,i.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"observedLosses"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,i.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"validated"),(0,i.kt)("td",{parentName:"tr",align:null},"boolean"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"phenotypes"),(0,i.kt)("td",{parentName:"tr",align:null},"string array"),(0,i.kt)("td",{parentName:"tr",align:null},"Description of the phenotype.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"phenotypeIds"),(0,i.kt)("td",{parentName:"tr",align:null},"string array"),(0,i.kt)("td",{parentName:"tr",align:null},"Description of the phenotype IDs.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"clinicalInterpretation")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"benign"),(0,i.kt)("li",{parentName:"ul"},"curated benign"),(0,i.kt)("li",{parentName:"ul"},"curated pathogenic"),(0,i.kt)("li",{parentName:"ul"},"likely benign"),(0,i.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"path gain"),(0,i.kt)("li",{parentName:"ul"},"path loss"),(0,i.kt)("li",{parentName:"ul"},"pathogenic"),(0,i.kt)("li",{parentName:"ul"},"uncertain")))}p.isMDXComponent=!0},44245:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>c,default:()=>g,frontMatter:()=>s,metadata:()=>d,toc:()=>p});var a=n(87462),i=(n(67294),n(3905)),l=n(35295),r=n(81474),o=n(1890);const s={title:"ClinGen"},c=void 0,d={unversionedId:"data-sources/clingen",id:"version-3.17/data-sources/clingen",title:"ClinGen",description:"Overview",source:"@site/versioned_docs/version-3.17/data-sources/clingen.mdx",sourceDirName:"data-sources",slug:"/data-sources/clingen",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/clingen",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/clingen.mdx",tags:[],version:"3.17",frontMatter:{title:"ClinGen"},sidebar:"version-3.17/docs",previous:{title:"Amino Acid Conservation",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/amino-acid-conservation"},next:{title:"ClinVar",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/clinvar"}},p=[{value:"Overview",id:"overview",children:[],level:2},{value:"ISCA Regions",id:"isca-regions",children:[{value:"TSV Extraction",id:"tsv-extraction",children:[{value:"Status levels",id:"status-levels",children:[],level:4},{value:"Parsing",id:"parsing",children:[],level:4}],level:3}],level:2},{value:"Conflict Resolution",id:"conflict-resolution",children:[{value:"Clinical significance priority",id:"clinical-significance-priority",children:[],level:3},{value:"Validation Priority",id:"validation-priority",children:[],level:3},{value:"Download URL",id:"download-url",children:[],level:3},{value:"JSON Output",id:"json-output",children:[],level:3}],level:2},{value:"Dosage Sensitivity Map",id:"dosage-sensitivity-map",children:[{value:"TSV Source files",id:"tsv-source-files",children:[],level:3},{value:"Dosage Rating System",id:"dosage-rating-system",children:[],level:3},{value:"Download URL",id:"download-url-1",children:[],level:3},{value:"JSON Output",id:"json-output-1",children:[],level:3}],level:2},{value:"Gene-Disease Validity",id:"gene-disease-validity",children:[{value:"Source TSV",id:"source-tsv",children:[],level:3},{value:"Download URL",id:"download-url-2",children:[],level:3},{value:"Conflict Resolution",id:"conflict-resolution-1",children:[{value:"Multiple Classifications",id:"multiple-classifications",children:[],level:4},{value:"Multiple Dates",id:"multiple-dates",children:[],level:4}],level:3},{value:"JSON Output",id:"json-output-2",children:[],level:3}],level:2}],u={toc:p},m="wrapper";function g(e){let{components:t,...n}=e;return(0,i.kt)(m,(0,a.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"ClinGen is a National Institutes of Health (NIH)-funded resource dedicated to building a central resource that defines the clinical relevance of genes and variants for use in precision medicine and research."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Heidi L. Rehm, Ph.D., Jonathan S. Berg, M.D., Ph.D., Lisa D. Brooks, Ph.D., Carlos D. Bustamante, Ph.D., James P. Evans, M.D., Ph.D., Melissa J. Landrum, Ph.D., David H. Ledbetter, Ph.D., Donna R. Maglott, Ph.D., Christa Lese Martin, Ph.D., Robert L. Nussbaum, M.D., Sharon E. Plon, M.D., Ph.D., Erin M. Ramos, Ph.D., Stephen T. Sherry, Ph.D., and Michael S. Watson, Ph.D., for ClinGen. ",(0,i.kt)("strong",{parentName:"p"},"ClinGen The Clinical Genome Resource.")," ",(0,i.kt)("em",{parentName:"p"},"N Engl J Med 2015; 372:2235-2242 June 4, 2015 DOI: 10.1056/NEJMsr1406261.")))),(0,i.kt)("h2",{id:"isca-regions"},"ISCA Regions"),(0,i.kt)("h3",{id:"tsv-extraction"},"TSV Extraction"),(0,i.kt)("p",null,"ClinGen contains only copy number variation variants, since the coordinates in ClinGen original file follow the same rule as BED format, the coordinates had to be adjusted to ","[BEGIN+1, END]","."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#bin chrom chromStart chromEnd name score strand thickStart thickEnd attrCount attrTags attrVals\nnsv530705 1 564405 8597804 0 1 copy_number_loss pathogenic False Developmental delay AND/OR other significant developmental or morphological phenotypes\nnsv530706 1 564424 3262790 0 1 copy_number_loss pathogenic False Abnormal facial shape,Abnormality of cardiac morphology,Global developmental delay,Muscular hypotonia HP:0001252,HP:0001263,HP:0001627,HP:0001999,MedGen:CN001147,MedGen:CN001157,MedGen:CN001482,MedGen:CN001810\nnsv530707 1 564424 7068738 0 1 copy_number_loss pathogenic False Abnormality of cardiac morphology,Cleft upper lip,Failure to thrive,Global developmental delay,Intrauterine growth retardation,Microcephaly,Short stature HP:0000204,HP:0000252,HP:0001263,HP:0001508,HP:0001511,HP:0001627,HP:0004322,MedGen:C0349588,MedGen:C1845868,MedGen:C1853481,MedGen:C2364119,MedGen:CN000197,MedGen:CN001157,MedGen:CN001482\nnsv533512 1 564435 649748 0 1 copy_number_loss benign False Developmental delay AND/OR other significant developmental or morphological phenotypes\nnsv931338 1 714078 4958499 0 1 copy_number_loss pathogenic False Developmental delay AND/OR other significant developmental or morphological phenotypes\nnsv530300 1 728138 5066371 1 0 copy_number_gain pathogenic False Abnormality of cardiac morphology,Cleft palate,Global developmental delay HP:0000175,HP:0001263,HP:0001627,MedGen:C2240378,MedGen:CN001157,MedGen:CN001482\n")),(0,i.kt)("h4",{id:"status-levels"},"Status levels"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"benign"),(0,i.kt)("li",{parentName:"ul"},"curated benign"),(0,i.kt)("li",{parentName:"ul"},"curated pathogenic"),(0,i.kt)("li",{parentName:"ul"},"likely benign"),(0,i.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"path gain"),(0,i.kt)("li",{parentName:"ul"},"path loss"),(0,i.kt)("li",{parentName:"ul"},"pathogenic"),(0,i.kt)("li",{parentName:"ul"},"uncertain")),(0,i.kt)("h4",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"We parse the ClinGen tsv file and extract the following:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"chrom"),(0,i.kt)("li",{parentName:"ul"},"chromStart (note this a 0-based coordinate)"),(0,i.kt)("li",{parentName:"ul"},"chromEnd"),(0,i.kt)("li",{parentName:"ul"},"attrTags"),(0,i.kt)("li",{parentName:"ul"},"attrVals")),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"attrTags")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"attrVals")," are comma separated lists. ",(0,i.kt)("inlineCode",{parentName:"p"},"attrTags")," contains the field keys and ",(0,i.kt)("inlineCode",{parentName:"p"},"attrVals")," contains the field values. We will parse the following keys from the two fields:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"parent (this will be used as the ID in our JSON output)"),(0,i.kt)("li",{parentName:"ul"},"clinical_int"),(0,i.kt)("li",{parentName:"ul"},"validated"),(0,i.kt)("li",{parentName:"ul"},"phenotype (this should be a string array)"),(0,i.kt)("li",{parentName:"ul"},"phenotype_id (this should be a string array)")),(0,i.kt)("p",null,"Observed losses and observed gains will be calculated from entries that share a common parent ID."),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"variants with a common parent ID and same coordinates are grouped",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"calculated observed losses, observed gains for each group"),(0,i.kt)("li",{parentName:"ul"},"Clinical significance and validation status are collapsed using the priority strategy described below"))),(0,i.kt)("li",{parentName:"ul"},"Variants with the same parent ID can have different coordinates (mapped to hg38)",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"nsv491508 : chr14:105583663-106881350 and chr14:105605043-106766076 (only one example)"),(0,i.kt)("li",{parentName:"ul"},"we kept both variants")))),(0,i.kt)("h2",{id:"conflict-resolution"},"Conflict Resolution"),(0,i.kt)("h3",{id:"clinical-significance-priority"},"Clinical significance priority"),(0,i.kt)("p",null,"When there are a mixture of variants belonging to the same parent ID, we will choose the most pathogenic clinical significance from the available values. i.e. if 3 samples were deemed pathogenic and 2 samples were likely pathogenic, we would list the variant as pathogenic."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Priority")," (high to low)"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"Priority"),(0,i.kt)("li",{parentName:"ul"},"Pathogenic"),(0,i.kt)("li",{parentName:"ul"},"Likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"Benign"),(0,i.kt)("li",{parentName:"ul"},"Likely benign"),(0,i.kt)("li",{parentName:"ul"},"Uncertain significance")),(0,i.kt)("h3",{id:"validation-priority"},"Validation Priority"),(0,i.kt)("p",null,"When there are a mixture of variants belonging to same parent ID, we will set the validation status to true if any of the variants were validated."),(0,i.kt)("h3",{id:"download-url"},"Download URL"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"https://cirm.ucsc.edu/cgi-bin/hgTrackUi?db=hg19&g=iscaComposite"},"https://cirm.ucsc.edu/cgi-bin/hgTrackUi?db=hg19&g=iscaComposite")),(0,i.kt)("h3",{id:"json-output"},"JSON Output"),(0,i.kt)(l.default,{mdxType:"CLINGENJSON"}),(0,i.kt)("h2",{id:"dosage-sensitivity-map"},"Dosage Sensitivity Map"),(0,i.kt)("p",null,"The Clinical Genome Resource (ClinGen) consortium is curating genes and regions of the genome to assess whether there is evidence to support that these genes/regions are dosage sensitive and should be targeted on a cytogenomic array. Nirvana reports these annotations for overlapping SVs."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Riggs ER, Nelson T, Merz A, Ackley T, Bunke B, Collins CD, Collinson MN, Fan YS, Goodenberger ML, Golden DM, Haglund-Hazy L, Krgovic D, Lamb AN, Lewis Z, Li G, Liu Y, Meck J, Neufeld-Kaiser W, Runke CK, Sanmann JN, Stavropoulos DJ, Strong E, Su M, Tayeh MK, Kokalj Vokac N, Thorland EC, Andersen E, Martin CL. ",(0,i.kt)("strong",{parentName:"p"},"Copy number variant discrepancy resolution using the ClinGen dosage sensitivity map results in updated clinical interpretations in ClinVar.")," ",(0,i.kt)("em",{parentName:"p"},"Hum Mutat. 2018 Nov;39(11):1650-1659. doi: 10.1002/humu.23610. PMID: 30095202; PMCID: PMC7374944.")))),(0,i.kt)("h3",{id:"tsv-source-files"},"TSV Source files"),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Regions")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#ClinGen Region Curation Results\n#07 May,2019\n#Genomic Locations are reported on GRCh38 (hg38): GCF_000001405.36\n#https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen\n#to create link: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/clingen_region.cgi?id=key\n#ISCA ID ISCA Region Name cytoBand Genomic Location Haploinsufficiency Score Haploinsufficiency Description Haploinsufficiency PMID1 Haploinsufficiency PMID2 Haploinsufficiency PMID3 Triplosensitivity Score Triplosensitivity Description Triplosensitivity PMID1 Triplosensitivity PMID2 Triplosensitivity PMID3 Date Last Evaluated Loss phenotype OMIM ID Triplosensitive phenotype OMIM ID\nISCA-46299 Xp11.22 region (includes HUWE1) Xp11.22 tbd 0 No evidence available 3 Sufficient evidence for dosage pathogenicity 22840365 20655035 26692240 2018-11-19\nISCA-46295 15q13.3 recurrent region (D-CHRNA7 to BP5) (includes CHRNA7 and OTUD7A) 15q13.3 chr15:31727418-32153204 3 Sufficient evidence for dosage pathogenicity 19898479 20236110 22775350 40 Dosage sensitivity unlikely 26968334 22420048 2018-05-10\nISCA-46291 7q11.23 recurrent distal region (includes HIP1, YWHAG) 7q11.23 chr7:75528718-76433859 2 Some evidence for dosage pathogenicity 21109226 16971481 1 Little evidence for dosage pathogenicity 21109226 27867344 2018-12-31\nISCA-46290 Xp11.22p11.23 recurrent region (includes SHROOM4) Xp11.22-p11.23 chrX: 48447780-52444264 0 No evidence available 3 Sufficient evidence for dosage pathogenicity 19716111 21418194 25425167 2017-12-14 300801\n")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Genes")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#ClinGen Gene Curation Results\n#24 May,2019\n#Genomic Locations are reported on GRCh37 (hg19): GCF_000001405.13\n#https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen\n#to create link: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/clingen_gene.cgi?sym=Gene Symbol\n#Gene Symbol Gene ID cytoBand Genomic Location Haploinsufficiency Score Haploinsufficiency Description Haploinsufficiency PMID1 Haploinsufficiency PMID2 Haploinsufficiency PMID3 Triplosensitivity Score Triplosensitivity Description Triplosensitivity PMID1 Triplosensitivity PMID2 Triplosensitivity PMID3 Date Last Evaluated Loss phenotype OMIM ID Triplosensitive phenotype OMIM ID\nA4GALT 53947 22q13.2 chr22:43088121-43117307 30 Gene associated with autosomal recessive phenotype 0 No evidence available 2014-12-11 111400\nAAGAB 79719 15q23 chr15:67493013-67547536 3 Sufficient evidence for dosage pathogenicity 23064416 23000146 0 No evidence available 2013-02-28 148600\n")),(0,i.kt)("h3",{id:"dosage-rating-system"},"Dosage Rating System"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Rating"),(0,i.kt)("th",{parentName:"tr",align:null},"Possible Clinical Interpretation"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"0"),(0,i.kt)("td",{parentName:"tr",align:null},"No evidence to suggest that dosage sensitivity is associated with clinical phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"1"),(0,i.kt)("td",{parentName:"tr",align:null},"Little evidence suggesting dosage sensitivity is associated with clinical phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"2"),(0,i.kt)("td",{parentName:"tr",align:null},"Emerging evidence suggesting dosage sensitivity is associated with clinical phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"3"),(0,i.kt)("td",{parentName:"tr",align:null},"Sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"30"),(0,i.kt)("td",{parentName:"tr",align:null},"Gene associated with autosomal recessive phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"40"),(0,i.kt)("td",{parentName:"tr",align:null},"Dosage sensitivity unlikely")))),(0,i.kt)("p",null,"Reference: ",(0,i.kt)("a",{parentName:"p",href:"https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/help.shtml"},"https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/help.shtml")),(0,i.kt)("h3",{id:"download-url-1"},"Download URL"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"ftp://ftp.clinicalgenome.org/"},"ftp://ftp.clinicalgenome.org/")),(0,i.kt)("h3",{id:"json-output-1"},"JSON Output"),(0,i.kt)(r.default,{mdxType:"ClinGenDosageJson"}),(0,i.kt)("h2",{id:"gene-disease-validity"},"Gene-Disease Validity"),(0,i.kt)("p",null,"The ClinGen Gene-Disease Clinical Validity curation process involves evaluating the strength of evidence supporting or refuting a claim that variation in a particular gene causes a particular disease. Nirvana reports these annotations for genes in the genes section of the JSON."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Strande NT, Riggs ER, Buchanan AH, et al. ",(0,i.kt)("strong",{parentName:"p"},"Evaluating the Clinical Validity of Gene-Disease Associations: An Evidence-Based Framework Developed by the Clinical Genome Resource.")," ",(0,i.kt)("em",{parentName:"p"},"Am J Hum Genet. 2017;100(6):895-906. doi:10.1016/j.ajhg.2017.04.015")))),(0,i.kt)("h3",{id:"source-tsv"},"Source TSV"),(0,i.kt)("p",null,"The source data comes in a CSV file that we convert to a TSV as follows:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"CLINGEN GENE VALIDITY CURATIONS\nFILE CREATED: 2019-05-28\nWEBPAGE: https://search.clinicalgenome.org/kb/gene-validity\n+++++++++++,++++++++++++++,+++++++++++++,++++++++++++++++++,+++++++++,++++++++++++++,+++++++++++++,+++++++++++++++++++\nGENE SYMBOL,GENE ID (HGNC),DISEASE LABEL,DISEASE ID (MONDO),SOP,CLASSIFICATION,ONLINE REPORT,CLASSIFICATION DATE\n+++++++++++,++++++++++++++,+++++++++++++,++++++++++++++++++,+++++++++,++++++++++++++,+++++++++++++,+++++++++++++++++++\nA2ML1,HGNC:23336,Noonan syndrome with multiple lentigines,MONDO_0007893,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/59b87033-dd91-4f1e-aec1-c9b1f5124b16--2018-06-07T14:37:47,2018-06-07T14:37:47.175Z\nA2ML1,HGNC:23336,cardiofaciocutaneous syndrome,MONDO_0015280,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/fc3c41d8-8497-489b-a350-c9e30016bc6a--2018-06-07T14:31:03,2018-06-07T14:31:03.696Z\nA2ML1,HGNC:23336,Costello syndrome,MONDO_0009026,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/ea72ba8d-cf62-44bc-86be-da64e3848eba--2018-06-07T14:34:05,2018-06-07T14:34:05.324Z\n")),(0,i.kt)("h3",{id:"download-url-2"},"Download URL"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"https://search.clinicalgenome.org/kb/gene-validity.csv"},"https://search.clinicalgenome.org/kb/gene-validity.csv")),(0,i.kt)("h3",{id:"conflict-resolution-1"},"Conflict Resolution"),(0,i.kt)("h4",{id:"multiple-classifications"},"Multiple Classifications"),(0,i.kt)("p",null,"Here is an example of multiple classifications."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"$ grep MONDO_0010192 ClinGen-Gene-Disease-Summary-2019-12-02.csv | grep EDNRB\nEDNRB,HGNC:3180,Waardenburg syndrome type 4A,MONDO_0010192,SOP6,Moderate,https://search.clinicalgenome.org/kb/gene-validity/d7abbd45-7915-437b-849b-dea876bfc2f5--2018-05-08T04:00:00,2018-05-08T04:00:00.000Z\nEDNRB,HGNC:3180,Waardenburg syndrome type 4A,MONDO_0010192,SOP6,Limited,https://search.clinicalgenome.org/kb/gene-validity/73ee9727-60c1-40fd-830f-08c2b513d2ee--2018-05-08T04:00:00,2018-05-08T04:00:00.000Z\n")),(0,i.kt)("p",null,"In such cases, we select the more severe classification."),(0,i.kt)("h4",{id:"multiple-dates"},"Multiple Dates"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"$ grep MONDO_0016419 ClinGen-Gene-Disease-Summary-2019-12-02.csv | grep MUTYH\nMUTYH,HGNC:7527,hereditary breast carcinoma,MONDO_0016419,SOP4,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/9904,2017-05-24T00:00:00\nMUTYH,HGNC:7527,hereditary breast carcinoma,MONDO_0016419,SOP4,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/9902,2017-05-25T00:00:00\n")),(0,i.kt)("p",null,"If the classifications are the same, we should select the latest classification date."),(0,i.kt)("h3",{id:"json-output-2"},"JSON Output"),(0,i.kt)(o.default,{mdxType:"ClinGenGeneValidity"}))}g.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/cd0802b4.10f4a81e.js b/assets/js/cd0802b4.10f4a81e.js new file mode 100644 index 00000000..71afb464 --- /dev/null +++ b/assets/js/cd0802b4.10f4a81e.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1144,1266],{3905:(t,e,a)=>{a.d(e,{Zo:()=>d,kt:()=>g});var n=a(7294);function r(t,e,a){return e in t?Object.defineProperty(t,e,{value:a,enumerable:!0,configurable:!0,writable:!0}):t[e]=a,t}function l(t,e){var a=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),a.push.apply(a,n)}return a}function i(t){for(var e=1;e=0||(r[a]=t[a]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,a)&&(r[a]=t[a])}return r}var p=n.createContext({}),m=function(t){var e=n.useContext(p),a=e;return t&&(a="function"==typeof t?t(e):i(i({},e),t)),a},d=function(t){var e=m(t.components);return n.createElement(p.Provider,{value:e},t.children)},c="mdxType",s={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},N=n.forwardRef((function(t,e){var a=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,d=o(t,["components","mdxType","originalType","parentName"]),c=m(a),N=r,g=c["".concat(p,".").concat(N)]||c[N]||s[N]||l;return a?n.createElement(g,i(i({ref:e},d),{},{components:a})):n.createElement(g,i({ref:e},d))}));function g(t,e){var a=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=a.length,i=new Array(l);i[0]=N;var o={};for(var p in e)hasOwnProperty.call(e,p)&&(o[p]=e[p]);o.originalType=t,o[c]="string"==typeof t?t:r,i[1]=o;for(var m=2;m{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>c,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/fusioncatcher-json",id:"data-sources/fusioncatcher-json",title:"fusioncatcher-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/fusioncatcher-json.md",sourceDirName:"data-sources",slug:"/data-sources/fusioncatcher-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/fusioncatcher-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/fusioncatcher-json.md",tags:[],version:"current",frontMatter:{}},p=[{value:"genes",id:"genes",children:[],level:4},{value:"gene",id:"gene",children:[],level:4}],m={toc:p},d="wrapper";function c(t){let{components:e,...a}=t;return(0,r.kt)(d,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},' "fusionCatcher":[\n {\n "genes":{\n "first":{\n "hgnc":"ETV6",\n "isOncogene":true\n },\n "second":{\n "hgnc":"RUNX1"\n },\n "isParalogPair":true,\n "isPseudogenePair":true,\n "isReadthrough":true\n },\n "germlineSources":[\n "1000 Genomes Project"\n ],\n "somaticSources":[\n "COSMIC",\n "TCGA oesophageal carcinomas"\n ]\n }\n ]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genes"),(0,r.kt)("td",{parentName:"tr",align:"center"},"genes object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"5' gene & 3' gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"germlineSources"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"matches in known germline data sources")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"somaticSources"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"matches in known somatic data sources")))),(0,r.kt)("h4",{id:"genes"},"genes"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"first"),(0,r.kt)("td",{parentName:"tr",align:"center"},"gene object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"5' gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"second"),(0,r.kt)("td",{parentName:"tr",align:"center"},"gene object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"3' gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isParalogPair"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when both genes are paralogs for each other")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isPseudogenePair"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when both genes are pseudogenes for each other")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isReadthrough"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this fusion gene is a readthrough event (both are on the same strand and there are no genes between them)")))),(0,r.kt)("h4",{id:"gene"},"gene"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isOncogene"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this gene is an oncogene")))))}c.isMDXComponent=!0},3468:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>o,default:()=>s,frontMatter:()=>i,metadata:()=>p,toc:()=>m});var n=a(7462),r=(a(7294),a(3905)),l=a(8202);const i={title:"FusionCatcher"},o=void 0,p={unversionedId:"data-sources/fusioncatcher",id:"data-sources/fusioncatcher",title:"FusionCatcher",description:"Overview",source:"@site/docs/data-sources/fusioncatcher.mdx",sourceDirName:"data-sources",slug:"/data-sources/fusioncatcher",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/fusioncatcher",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/fusioncatcher.mdx",tags:[],version:"current",frontMatter:{title:"FusionCatcher"},sidebar:"docs",previous:{title:"DECIPHER",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/decipher"},next:{title:"GERP",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gerp"}},m=[{value:"Overview",id:"overview",children:[],level:2},{value:"Supported Data Sources",id:"supported-data-sources",children:[{value:"Oncogenes",id:"oncogenes",children:[],level:3},{value:"Germline",id:"germline",children:[],level:3},{value:"Somatic",id:"somatic",children:[],level:3}],level:2},{value:"Gene Pair TSV File",id:"gene-pair-tsv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Gene TSV File",id:"gene-tsv-file",children:[{value:"Example",id:"example-1",children:[],level:3},{value:"Parsing",id:"parsing-1",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],d={toc:m},c="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(c,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://github.com/ndaniel/fusioncatcher"},"FusionCatcher")," is a well-known tool that searches for somatic novel/known fusion genes, translocations, and/or chimeras in RNA-seq data. While FusionCatcher itself is not part of Illumina Connected Annotations, we have included a subset of their genomic databases in Illumina Connected Annotations."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Daniel Nicorici, Mihaela \u015eatalan, Henrik Edgren, Sara Kangaspeska, Astrid Murum\xe4gi, Olli Kallioniemi, Sami Virtanen, Olavi Kilkku. (2014) ",(0,r.kt)("a",{parentName:"p",href:"https://www.biorxiv.org/content/10.1101/011650v1"},"FusionCatcher \u2013 a tool for finding somatic fusion genes in paired-end RNA-sequencing data"),". ",(0,r.kt)("em",{parentName:"p"},"bioRxiv")," 011650"))),(0,r.kt)("h2",{id:"supported-data-sources"},"Supported Data Sources"),(0,r.kt)("h3",{id:"oncogenes"},"Oncogenes"),(0,r.kt)("p",null,"The following data sources are aggregated and used to populate the ",(0,r.kt)("inlineCode",{parentName:"p"},"isOncogene")," field in the gene JSON object:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Description"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Reference"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Data"),(0,r.kt)("th",{parentName:"tr",align:"left"},"FusionCatcher filename"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Bushman"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://www.bushmanlab.org/links/genelists"},"bushmanlab.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"cancer_genes.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ONGENE"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.sciencedirect.com/science/article/pii/S1673852716302053"},"JGG")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://ongene.bioinfo-minzhao.org"},"bioinfo-minzhao.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"oncogenes_more.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"UniProt tumor genes"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/49/D1/D480/6006196"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.uniprot.org/downloads"},"uniprot.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tumor_genes.txt")))),(0,r.kt)("h3",{id:"germline"},"Germline"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Illumina Connected Annotations label"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Reference"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Data"),(0,r.kt)("th",{parentName:"tr",align:"left"},"FusionCatcher filename"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"1000 Genomes Project"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0104567"},"PLOS ONE")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"1000genomes.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Healthy (strong support)"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"banned.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Illumina Body Map 2.0"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.ebi.ac.uk/arrayexpress/experiments/E-MTAB-513"},"EBI")),(0,r.kt)("td",{parentName:"tr",align:"left"},"bodymap2.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"CACG"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.sciencedirect.com/science/article/pii/S0888754312000821"},"Genomics")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"cacg.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ConjoinG"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0013284"},"PLOS ONE")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"conjoing.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Healthy prefrontal cortex"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://bmcmedgenomics.biomedcentral.com/articles/10.1186/s12920-016-0164-y"},"BMC Medical Genomics")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE68719"},"NCBI GEO")),(0,r.kt)("td",{parentName:"tr",align:"left"},"cortex.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Duplicated Genes Database"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0050653"},"PLOS ONE")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://dgd.genouest.org/"},"genouest.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"dgd.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"GTEx healthy tissues"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://gtexportal.org/home/"},"gtexportal.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"gtex.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Healthy"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"healthy.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Human Protein Atlas"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.mcponline.org/article/S1535-9476(20)34633-8/fulltext"},"MCP")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.ebi.ac.uk/arrayexpress/experiments/E-MTAB-1733/"},"EBI")),(0,r.kt)("td",{parentName:"tr",align:"left"},"hpa.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Babiceanu non-cancer tissues"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/44/6/2859/2499453"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/44/6/2859/2499453#supplementary-data"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-cancer_tissues.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"non-tumor cell lines"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-tumor_cells.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TumorFusions normal"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/46/D1/D1144/4584571"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/46/D1/D1144/4584571#supplementary-data"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga-normal.txt")))),(0,r.kt)("h3",{id:"somatic"},"Somatic"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Illumina Connected Annotations label"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Reference"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Data"),(0,r.kt)("th",{parentName:"tr",align:"left"},"FusionCatcher filename"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Alaei-Mahabadi 18 cancers"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.pnas.org/content/113/48/13768.long"},"PNAS")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"18cancers.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"DepMap CCLE"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://depmap.org/portal/download/"},"depmap.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"ccle.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"CCLE Klijn"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nbt.3080"},"Nature Biotechnology")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nbt.3080#Sec27"},"Nature Biotechnology")),(0,r.kt)("td",{parentName:"tr",align:"left"},"ccle2.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"CCLE Vellichirammal"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cell.com/molecular-therapy-family/nucleic-acids/fulltext/S2162-2531(20)30058-5"},"Molecular Therapy Nucleic Acids")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"ccle3.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Cancer Genome Project"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://cancer.sanger.ac.uk/cosmic/download"},"COSMIC")),(0,r.kt)("td",{parentName:"tr",align:"left"},"cgp.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ChimerKB 4.0"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/48/D1/D817/5611671"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.kobic.re.kr/chimerdb_mirror/download"},"kobic.re.kr")),(0,r.kt)("td",{parentName:"tr",align:"left"},"chimerdb4kb.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ChimerPub 4.0"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/48/D1/D817/5611671"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.kobic.re.kr/chimerdb_mirror/download"},"kobic.re.kr")),(0,r.kt)("td",{parentName:"tr",align:"left"},"chimerdb4pub.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ChimerSeq 4.0"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/48/D1/D817/5611671"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.kobic.re.kr/chimerdb_mirror/download"},"kobic.re.kr")),(0,r.kt)("td",{parentName:"tr",align:"left"},"chimerdb4seq.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"COSMIC"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/47/D1/D941/5146192"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://cancer.sanger.ac.uk/cosmic/download"},"COSMIC")),(0,r.kt)("td",{parentName:"tr",align:"left"},"cosmic.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Bao gliomas"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://genome.cshlp.org/content/24/11/1765"},"Genome Research")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"gliomas.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Known"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"known.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Mitelman DB"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://mitelmandatabase.isb-cgc.org"},"ISB-CGC")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://storage.cloud.google.com/mitelman-data-files/prod/mitelman_db.zip"},"Google Cloud")),(0,r.kt)("td",{parentName:"tr",align:"left"},"mitelman.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TCGA oesophageal carcinomas"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nature20805"},"Nature")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"oesophagus.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Bailey pancreatic cancers"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nature16965"},"Nature")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nature16965#Sec44"},"Nature")),(0,r.kt)("td",{parentName:"tr",align:"left"},"pancreases.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"PCAWG"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://doi.org/10.1016/j.cell.2018.03.042"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://dcc.icgc.org/releases/PCAWG/transcriptome/fusion"},"ICGC")),(0,r.kt)("td",{parentName:"tr",align:"left"},"pcawg.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Robinson prostate cancers"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://doi.org/10.1016/j.cell.2015.05.001"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cell.com/cell/fulltext/S0092-8674(15)00548-6?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS0092867415005486%3Fshowall%3Dtrue#supplementaryMaterial"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},"prostate_cancer.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TCGA"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cancer.gov/about-nci/organization/ccg/research/structural-genomics/tcga"},"cancer.gov")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TumorFusions tumor"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/46/D1/D1144/4584571"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/46/D1/D1144/4584571#supplementary-data"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga-cancer.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TCGA Gao"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://doi.org/10.1016/j.celrep.2018.03.050"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cell.com/cell-reports/fulltext/S2211-1247(18)30395-4?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS2211124718303954%3Fshowall%3Dtrue#supplementaryMaterial"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga2.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TCGA Vellichirammal"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cell.com/molecular-therapy-family/nucleic-acids/fulltext/S2162-2531(20)30058-5"},"Molecular Therapy Nucleic Acids")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga3.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TICdb"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://bmcgenomics.biomedcentral.com/articles/10.1186/1471-2164-8-33"},"BMC Genomics")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://genetica.unav.edu/TICdb/allseqs_TICdb.txt"},"unav.edu")),(0,r.kt)("td",{parentName:"tr",align:"left"},"ticdb.txt")))),(0,r.kt)("h2",{id:"gene-pair-tsv-file"},"Gene Pair TSV File"),(0,r.kt)("p",null,"Most of the data files in FusionCatcher are two-column TSV files containing the Ensembl gene IDs that are paired together."),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("p",null,"Here are the first few lines of the 1000genomes.txt file:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre"},"ENSG00000006210 ENSG00000102962\nENSG00000006652 ENSG00000181016\nENSG00000014138 ENSG00000149798\nENSG00000026297 ENSG00000071242\nENSG00000035499 ENSG00000155959\nENSG00000055211 ENSG00000131013\nENSG00000055332 ENSG00000179915\nENSG00000062485 ENSG00000257727\nENSG00000065978 ENSG00000166501\nENSG00000066044 ENSG00000104980\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"In Illumina Connected Annotations, we will only import a gene pair if both Ensembl gene IDs are recognized from either our GRCh37 or GRCh38 cache files."),(0,r.kt)("h2",{id:"gene-tsv-file"},"Gene TSV File"),(0,r.kt)("p",null,"Some of the data files are single-column files containing Ensembl gene IDs. This is commonly used in the data files representing oncogene data sources."),(0,r.kt)("h3",{id:"example-1"},"Example"),(0,r.kt)("p",null,"Here are the first few lines of the oncogenes_more.txt file:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre"},"ENSG00000000938\nENSG00000003402\nENSG00000005469\nENSG00000005884\nENSG00000006128\nENSG00000006453\nENSG00000006468\nENSG00000007350\nENSG00000008294\nENSG00000008952\n")),(0,r.kt)("h3",{id:"parsing-1"},"Parsing"),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"FusionCatcher also uses creates custom Ensembl genes (e.g. ",(0,r.kt)("inlineCode",{parentName:"p"},"ENSG09000000002"),") to handle missing Ensembl genes. Illumina Connected Annotations will ignore these entries since we only include the gene IDs that are currently recognized by Illumina Connected Annotations."),(0,r.kt)("p",{parentName:"div"},"I suspect that these were originally RefSeq genes and if so, we can support those directly in Illumina Connected Annotations in the future."))),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://sourceforge.net/projects/fusioncatcher/files/data"},"https://sourceforge.net/projects/fusioncatcher/files/data")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(l.default,{mdxType:"JSON"}))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/cd0802b4.74040618.js b/assets/js/cd0802b4.74040618.js deleted file mode 100644 index 02d2d04c..00000000 --- a/assets/js/cd0802b4.74040618.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1144,1266],{3905:(t,e,a)=>{a.d(e,{Zo:()=>d,kt:()=>g});var n=a(67294);function r(t,e,a){return e in t?Object.defineProperty(t,e,{value:a,enumerable:!0,configurable:!0,writable:!0}):t[e]=a,t}function l(t,e){var a=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),a.push.apply(a,n)}return a}function i(t){for(var e=1;e=0||(r[a]=t[a]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,a)&&(r[a]=t[a])}return r}var p=n.createContext({}),m=function(t){var e=n.useContext(p),a=e;return t&&(a="function"==typeof t?t(e):i(i({},e),t)),a},d=function(t){var e=m(t.components);return n.createElement(p.Provider,{value:e},t.children)},c="mdxType",s={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},N=n.forwardRef((function(t,e){var a=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,d=o(t,["components","mdxType","originalType","parentName"]),c=m(a),N=r,g=c["".concat(p,".").concat(N)]||c[N]||s[N]||l;return a?n.createElement(g,i(i({ref:e},d),{},{components:a})):n.createElement(g,i({ref:e},d))}));function g(t,e){var a=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=a.length,i=new Array(l);i[0]=N;var o={};for(var p in e)hasOwnProperty.call(e,p)&&(o[p]=e[p]);o.originalType=t,o[c]="string"==typeof t?t:r,i[1]=o;for(var m=2;m{a.r(e),a.d(e,{contentTitle:()=>i,default:()=>c,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},i=void 0,o={unversionedId:"data-sources/fusioncatcher-json",id:"data-sources/fusioncatcher-json",title:"fusioncatcher-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/fusioncatcher-json.md",sourceDirName:"data-sources",slug:"/data-sources/fusioncatcher-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/fusioncatcher-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/fusioncatcher-json.md",tags:[],version:"current",frontMatter:{}},p=[{value:"genes",id:"genes",children:[],level:4},{value:"gene",id:"gene",children:[],level:4}],m={toc:p},d="wrapper";function c(t){let{components:e,...a}=t;return(0,r.kt)(d,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},' "fusionCatcher":[\n {\n "genes":{\n "first":{\n "hgnc":"ETV6",\n "isOncogene":true\n },\n "second":{\n "hgnc":"RUNX1"\n },\n "isParalogPair":true,\n "isPseudogenePair":true,\n "isReadthrough":true\n },\n "germlineSources":[\n "1000 Genomes Project"\n ],\n "somaticSources":[\n "COSMIC",\n "TCGA oesophageal carcinomas"\n ]\n }\n ]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"genes"),(0,r.kt)("td",{parentName:"tr",align:"center"},"genes object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"5' gene & 3' gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"germlineSources"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"matches in known germline data sources")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"somaticSources"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"matches in known somatic data sources")))),(0,r.kt)("h4",{id:"genes"},"genes"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"first"),(0,r.kt)("td",{parentName:"tr",align:"center"},"gene object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"5' gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"second"),(0,r.kt)("td",{parentName:"tr",align:"center"},"gene object"),(0,r.kt)("td",{parentName:"tr",align:"left"},"3' gene")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isParalogPair"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when both genes are paralogs for each other")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isPseudogenePair"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when both genes are pseudogenes for each other")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isReadthrough"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this fusion gene is a readthrough event (both are on the same strand and there are no genes between them)")))),(0,r.kt)("h4",{id:"gene"},"gene"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isOncogene"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when this gene is an oncogene")))))}c.isMDXComponent=!0},83468:(t,e,a)=>{a.r(e),a.d(e,{contentTitle:()=>o,default:()=>s,frontMatter:()=>i,metadata:()=>p,toc:()=>m});var n=a(87462),r=(a(67294),a(3905)),l=a(58202);const i={title:"FusionCatcher"},o=void 0,p={unversionedId:"data-sources/fusioncatcher",id:"data-sources/fusioncatcher",title:"FusionCatcher",description:"Overview",source:"@site/docs/data-sources/fusioncatcher.mdx",sourceDirName:"data-sources",slug:"/data-sources/fusioncatcher",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/fusioncatcher",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/fusioncatcher.mdx",tags:[],version:"current",frontMatter:{title:"FusionCatcher"},sidebar:"docs",previous:{title:"DECIPHER",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/decipher"},next:{title:"GERP",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gerp"}},m=[{value:"Overview",id:"overview",children:[],level:2},{value:"Supported Data Sources",id:"supported-data-sources",children:[{value:"Oncogenes",id:"oncogenes",children:[],level:3},{value:"Germline",id:"germline",children:[],level:3},{value:"Somatic",id:"somatic",children:[],level:3}],level:2},{value:"Gene Pair TSV File",id:"gene-pair-tsv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Gene TSV File",id:"gene-tsv-file",children:[{value:"Example",id:"example-1",children:[],level:3},{value:"Parsing",id:"parsing-1",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],d={toc:m},c="wrapper";function s(t){let{components:e,...a}=t;return(0,r.kt)(c,(0,n.Z)({},d,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://github.com/ndaniel/fusioncatcher"},"FusionCatcher")," is a well-known tool that searches for somatic novel/known fusion genes, translocations, and/or chimeras in RNA-seq data. While FusionCatcher itself is not part of Illumina Connected Annotations, we have included a subset of their genomic databases in Illumina Connected Annotations."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Daniel Nicorici, Mihaela \u015eatalan, Henrik Edgren, Sara Kangaspeska, Astrid Murum\xe4gi, Olli Kallioniemi, Sami Virtanen, Olavi Kilkku. (2014) ",(0,r.kt)("a",{parentName:"p",href:"https://www.biorxiv.org/content/10.1101/011650v1"},"FusionCatcher \u2013 a tool for finding somatic fusion genes in paired-end RNA-sequencing data"),". ",(0,r.kt)("em",{parentName:"p"},"bioRxiv")," 011650"))),(0,r.kt)("h2",{id:"supported-data-sources"},"Supported Data Sources"),(0,r.kt)("h3",{id:"oncogenes"},"Oncogenes"),(0,r.kt)("p",null,"The following data sources are aggregated and used to populate the ",(0,r.kt)("inlineCode",{parentName:"p"},"isOncogene")," field in the gene JSON object:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Description"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Reference"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Data"),(0,r.kt)("th",{parentName:"tr",align:"left"},"FusionCatcher filename"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Bushman"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://www.bushmanlab.org/links/genelists"},"bushmanlab.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"cancer_genes.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ONGENE"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.sciencedirect.com/science/article/pii/S1673852716302053"},"JGG")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://ongene.bioinfo-minzhao.org"},"bioinfo-minzhao.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"oncogenes_more.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"UniProt tumor genes"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/49/D1/D480/6006196"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.uniprot.org/downloads"},"uniprot.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tumor_genes.txt")))),(0,r.kt)("h3",{id:"germline"},"Germline"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Illumina Connected Annotations label"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Reference"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Data"),(0,r.kt)("th",{parentName:"tr",align:"left"},"FusionCatcher filename"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"1000 Genomes Project"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0104567"},"PLOS ONE")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"1000genomes.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Healthy (strong support)"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"banned.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Illumina Body Map 2.0"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.ebi.ac.uk/arrayexpress/experiments/E-MTAB-513"},"EBI")),(0,r.kt)("td",{parentName:"tr",align:"left"},"bodymap2.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"CACG"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.sciencedirect.com/science/article/pii/S0888754312000821"},"Genomics")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"cacg.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ConjoinG"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0013284"},"PLOS ONE")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"conjoing.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Healthy prefrontal cortex"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://bmcmedgenomics.biomedcentral.com/articles/10.1186/s12920-016-0164-y"},"BMC Medical Genomics")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE68719"},"NCBI GEO")),(0,r.kt)("td",{parentName:"tr",align:"left"},"cortex.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Duplicated Genes Database"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0050653"},"PLOS ONE")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"http://dgd.genouest.org/"},"genouest.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"dgd.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"GTEx healthy tissues"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://gtexportal.org/home/"},"gtexportal.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"gtex.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Healthy"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"healthy.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Human Protein Atlas"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.mcponline.org/article/S1535-9476(20)34633-8/fulltext"},"MCP")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.ebi.ac.uk/arrayexpress/experiments/E-MTAB-1733/"},"EBI")),(0,r.kt)("td",{parentName:"tr",align:"left"},"hpa.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Babiceanu non-cancer tissues"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/44/6/2859/2499453"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/44/6/2859/2499453#supplementary-data"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-cancer_tissues.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"non-tumor cell lines"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"non-tumor_cells.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TumorFusions normal"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/46/D1/D1144/4584571"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/46/D1/D1144/4584571#supplementary-data"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga-normal.txt")))),(0,r.kt)("h3",{id:"somatic"},"Somatic"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Illumina Connected Annotations label"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Reference"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Data"),(0,r.kt)("th",{parentName:"tr",align:"left"},"FusionCatcher filename"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Alaei-Mahabadi 18 cancers"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.pnas.org/content/113/48/13768.long"},"PNAS")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"18cancers.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"DepMap CCLE"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://depmap.org/portal/download/"},"depmap.org")),(0,r.kt)("td",{parentName:"tr",align:"left"},"ccle.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"CCLE Klijn"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nbt.3080"},"Nature Biotechnology")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nbt.3080#Sec27"},"Nature Biotechnology")),(0,r.kt)("td",{parentName:"tr",align:"left"},"ccle2.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"CCLE Vellichirammal"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cell.com/molecular-therapy-family/nucleic-acids/fulltext/S2162-2531(20)30058-5"},"Molecular Therapy Nucleic Acids")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"ccle3.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Cancer Genome Project"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://cancer.sanger.ac.uk/cosmic/download"},"COSMIC")),(0,r.kt)("td",{parentName:"tr",align:"left"},"cgp.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ChimerKB 4.0"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/48/D1/D817/5611671"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.kobic.re.kr/chimerdb_mirror/download"},"kobic.re.kr")),(0,r.kt)("td",{parentName:"tr",align:"left"},"chimerdb4kb.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ChimerPub 4.0"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/48/D1/D817/5611671"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.kobic.re.kr/chimerdb_mirror/download"},"kobic.re.kr")),(0,r.kt)("td",{parentName:"tr",align:"left"},"chimerdb4pub.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"ChimerSeq 4.0"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/48/D1/D817/5611671"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.kobic.re.kr/chimerdb_mirror/download"},"kobic.re.kr")),(0,r.kt)("td",{parentName:"tr",align:"left"},"chimerdb4seq.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"COSMIC"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/47/D1/D941/5146192"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://cancer.sanger.ac.uk/cosmic/download"},"COSMIC")),(0,r.kt)("td",{parentName:"tr",align:"left"},"cosmic.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Bao gliomas"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://genome.cshlp.org/content/24/11/1765"},"Genome Research")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"gliomas.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Known"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"known.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Mitelman DB"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://mitelmandatabase.isb-cgc.org"},"ISB-CGC")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://storage.cloud.google.com/mitelman-data-files/prod/mitelman_db.zip"},"Google Cloud")),(0,r.kt)("td",{parentName:"tr",align:"left"},"mitelman.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TCGA oesophageal carcinomas"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nature20805"},"Nature")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"oesophagus.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Bailey pancreatic cancers"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nature16965"},"Nature")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.nature.com/articles/nature16965#Sec44"},"Nature")),(0,r.kt)("td",{parentName:"tr",align:"left"},"pancreases.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"PCAWG"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://doi.org/10.1016/j.cell.2018.03.042"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://dcc.icgc.org/releases/PCAWG/transcriptome/fusion"},"ICGC")),(0,r.kt)("td",{parentName:"tr",align:"left"},"pcawg.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Robinson prostate cancers"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://doi.org/10.1016/j.cell.2015.05.001"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cell.com/cell/fulltext/S0092-8674(15)00548-6?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS0092867415005486%3Fshowall%3Dtrue#supplementaryMaterial"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},"prostate_cancer.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TCGA"),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cancer.gov/about-nci/organization/ccg/research/structural-genomics/tcga"},"cancer.gov")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TumorFusions tumor"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/46/D1/D1144/4584571"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://academic.oup.com/nar/article/46/D1/D1144/4584571#supplementary-data"},"NAR")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga-cancer.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TCGA Gao"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://doi.org/10.1016/j.celrep.2018.03.050"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cell.com/cell-reports/fulltext/S2211-1247(18)30395-4?_returnURL=https%3A%2F%2Flinkinghub.elsevier.com%2Fretrieve%2Fpii%2FS2211124718303954%3Fshowall%3Dtrue#supplementaryMaterial"},"Cell")),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga2.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TCGA Vellichirammal"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://www.cell.com/molecular-therapy-family/nucleic-acids/fulltext/S2162-2531(20)30058-5"},"Molecular Therapy Nucleic Acids")),(0,r.kt)("td",{parentName:"tr",align:"left"}),(0,r.kt)("td",{parentName:"tr",align:"left"},"tcga3.txt")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"TICdb"),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://bmcgenomics.biomedcentral.com/articles/10.1186/1471-2164-8-33"},"BMC Genomics")),(0,r.kt)("td",{parentName:"tr",align:"left"},(0,r.kt)("a",{parentName:"td",href:"https://genetica.unav.edu/TICdb/allseqs_TICdb.txt"},"unav.edu")),(0,r.kt)("td",{parentName:"tr",align:"left"},"ticdb.txt")))),(0,r.kt)("h2",{id:"gene-pair-tsv-file"},"Gene Pair TSV File"),(0,r.kt)("p",null,"Most of the data files in FusionCatcher are two-column TSV files containing the Ensembl gene IDs that are paired together."),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("p",null,"Here are the first few lines of the 1000genomes.txt file:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre"},"ENSG00000006210 ENSG00000102962\nENSG00000006652 ENSG00000181016\nENSG00000014138 ENSG00000149798\nENSG00000026297 ENSG00000071242\nENSG00000035499 ENSG00000155959\nENSG00000055211 ENSG00000131013\nENSG00000055332 ENSG00000179915\nENSG00000062485 ENSG00000257727\nENSG00000065978 ENSG00000166501\nENSG00000066044 ENSG00000104980\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"In Illumina Connected Annotations, we will only import a gene pair if both Ensembl gene IDs are recognized from either our GRCh37 or GRCh38 cache files."),(0,r.kt)("h2",{id:"gene-tsv-file"},"Gene TSV File"),(0,r.kt)("p",null,"Some of the data files are single-column files containing Ensembl gene IDs. This is commonly used in the data files representing oncogene data sources."),(0,r.kt)("h3",{id:"example-1"},"Example"),(0,r.kt)("p",null,"Here are the first few lines of the oncogenes_more.txt file:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre"},"ENSG00000000938\nENSG00000003402\nENSG00000005469\nENSG00000005884\nENSG00000006128\nENSG00000006453\nENSG00000006468\nENSG00000007350\nENSG00000008294\nENSG00000008952\n")),(0,r.kt)("h3",{id:"parsing-1"},"Parsing"),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"FusionCatcher also uses creates custom Ensembl genes (e.g. ",(0,r.kt)("inlineCode",{parentName:"p"},"ENSG09000000002"),") to handle missing Ensembl genes. Illumina Connected Annotations will ignore these entries since we only include the gene IDs that are currently recognized by Illumina Connected Annotations."),(0,r.kt)("p",{parentName:"div"},"I suspect that these were originally RefSeq genes and if so, we can support those directly in Illumina Connected Annotations in the future."))),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://sourceforge.net/projects/fusioncatcher/files/data"},"https://sourceforge.net/projects/fusioncatcher/files/data")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(l.default,{mdxType:"JSON"}))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/cd35fae7.42fd7239.js b/assets/js/cd35fae7.42fd7239.js new file mode 100644 index 00000000..2255757c --- /dev/null +++ b/assets/js/cd35fae7.42fd7239.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5490,3232],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>g});var a=n(7294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var o=a.createContext({}),p=function(e){var t=a.useContext(o),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},c=function(e){var t=p(e.components);return a.createElement(o.Provider,{value:t},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,o=e.parentName,c=s(e,["components","mdxType","originalType","parentName"]),m=p(n),u=i,g=m["".concat(o,".").concat(u)]||m[u]||d[u]||r;return n?a.createElement(g,l(l({ref:t},c),{},{components:n})):a.createElement(g,l({ref:t},c))}));function g(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,l=new Array(r);l[0]=u;var s={};for(var o in t)hasOwnProperty.call(t,o)&&(s[o]=t[o]);s.originalType=e,s[m]="string"==typeof e?e:i,l[1]=s;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>r,metadata:()=>s,toc:()=>o});var a=n(7462),i=(n(7294),n(3905));const r={},l=void 0,s={unversionedId:"data-sources/clinvar-json",id:"data-sources/clinvar-json",title:"clinvar-json",description:"small variants:",source:"@site/docs/data-sources/clinvar-json.md",sourceDirName:"data-sources",slug:"/data-sources/clinvar-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clinvar-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clinvar-json.md",tags:[],version:"current",frontMatter:{}},o=[],p={toc:o},c="wrapper";function m(e){let{components:t,...n}=e;return(0,i.kt)(c,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"small variants:")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "id":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "significance":[\n "benign"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "lastUpdatedDate":"2020-03-01",\n "isAlleleSpecific":true\n },\n {\n "id":"RCV000030258.4",\n "variationId":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "alleleOrigins":[\n "germline"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "phenotypes":[\n "Lynch syndrome"\n ],\n "medGenIds":[\n "C1333990"\n ],\n "omimIds":[\n "120435"\n ],\n "significance":[\n "benign"\n ],\n "lastUpdatedDate":"2017-05-01",\n "isAlleleSpecific":true\n }\n]\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"large variants:")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "chromosome":"1", \n "begin":629025, \n "end":8537745, \n "variantType":"copy_number_loss", \n "id":"RCV000051993.4", \n "variationId":"VCV000058242.1", \n "reviewStatus":"criteria provided, single submitter", \n "alleleOrigins":[\n "not provided"\n ], \n "phenotypes":[\n "See cases"\n ], \n "significance":[\n "pathogenic"\n ], \n "lastUpdatedDate":"2022-04-21", \n "pubMedIds":[\n "21844811"\n ]\n },\n {\n "id":"VCV000058242.1",\n "reviewStatus":"criteria provided, single submitter",\n "significance":[\n "pathogenic"\n ],\n "lastUpdatedDate":"2022-04-21"\n },\n ......\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"id"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"ClinVar ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"variationId"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"ClinVar VCV ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"variant type")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"reviewStatus"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"alleleOrigins"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"medGenIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"MedGen IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omimIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"OMIM IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"orphanetIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Orphanet IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"significance"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"lastUpdatedDate"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,i.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,i.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the ClinVar alternate allele")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"reviewStatus:")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no assertion provided"),(0,i.kt)("li",{parentName:"ul"},"no assertion criteria provided"),(0,i.kt)("li",{parentName:"ul"},"criteria provided, single submitter"),(0,i.kt)("li",{parentName:"ul"},"practice guideline"),(0,i.kt)("li",{parentName:"ul"},"classified by multiple submitters"),(0,i.kt)("li",{parentName:"ul"},"criteria provided, conflicting interpretations"),(0,i.kt)("li",{parentName:"ul"},"criteria provided, multiple submitters, no conflicts"),(0,i.kt)("li",{parentName:"ul"},"no interpretation for the single variant")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"alleleOrigins:")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"unknown"),(0,i.kt)("li",{parentName:"ul"},"other"),(0,i.kt)("li",{parentName:"ul"},"germline"),(0,i.kt)("li",{parentName:"ul"},"somatic"),(0,i.kt)("li",{parentName:"ul"},"inherited"),(0,i.kt)("li",{parentName:"ul"},"paternal"),(0,i.kt)("li",{parentName:"ul"},"maternal"),(0,i.kt)("li",{parentName:"ul"},"de-novo"),(0,i.kt)("li",{parentName:"ul"},"biparental"),(0,i.kt)("li",{parentName:"ul"},"uniparental"),(0,i.kt)("li",{parentName:"ul"},"not-tested"),(0,i.kt)("li",{parentName:"ul"},"tested-inconclusive")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"significance:")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"uncertain significance"),(0,i.kt)("li",{parentName:"ul"},"not provided"),(0,i.kt)("li",{parentName:"ul"},"benign"),(0,i.kt)("li",{parentName:"ul"},"likely benign"),(0,i.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"pathogenic"),(0,i.kt)("li",{parentName:"ul"},"drug response"),(0,i.kt)("li",{parentName:"ul"},"histocompatibility"),(0,i.kt)("li",{parentName:"ul"},"association"),(0,i.kt)("li",{parentName:"ul"},"risk factor"),(0,i.kt)("li",{parentName:"ul"},"protective"),(0,i.kt)("li",{parentName:"ul"},"affects"),(0,i.kt)("li",{parentName:"ul"},"conflicting data from submitters"),(0,i.kt)("li",{parentName:"ul"},"other"),(0,i.kt)("li",{parentName:"ul"},"no interpretation for the single variant"),(0,i.kt)("li",{parentName:"ul"},"conflicting interpretations of pathogenicity")))}m.isMDXComponent=!0},1396:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>s,default:()=>d,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var a=n(7462),i=(n(7294),n(3905)),r=n(212);const l={title:"ClinVar"},s=void 0,o={unversionedId:"data-sources/clinvar",id:"data-sources/clinvar",title:"ClinVar",description:"Overview",source:"@site/docs/data-sources/clinvar.mdx",sourceDirName:"data-sources",slug:"/data-sources/clinvar",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clinvar",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clinvar.mdx",tags:[],version:"current",frontMatter:{title:"ClinVar"},sidebar:"docs",previous:{title:"ClinGen",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen"},next:{title:"COSMIC",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic"}},p=[{value:"Overview",id:"overview",children:[],level:2},{value:"RCV File",id:"rcv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Parsing Significance",id:"parsing-significance",children:[],level:4}],level:3}],level:2},{value:"VCV File",id:"vcv-file",children:[{value:"Example",id:"example-1",children:[],level:3},{value:"Parsing",id:"parsing-1",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URLs",id:"download-urls",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2},{value:"Building the supplementary files",id:"building-the-supplementary-files",children:[{value:"Source data files",id:"source-data-files",children:[],level:3}],level:2}],c={toc:p},m="wrapper";function d(e){let{components:t,...l}=e;return(0,i.kt)(m,(0,a.Z)({},c,l,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"ClinVar is a freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence. ClinVar thus facilitates access to and communication about the relationships asserted between human variation and observed health status, and the history of that interpretation."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Melissa J Landrum, Jennifer M Lee, Mark Benson, Garth R Brown, Chen Chao, Shanmuga Chitipiralla, Baoshan Gu, Jennifer Hart, Douglas Hoffman, Wonhee Jang, Karen Karapetyan, Kenneth Katz, Chunlei Liu, Zenith Maddipatla, Adriana Malheiro, Kurt McDaniel, Michael Ovetsky, George Riley, George Zhou, J Bradley Holmes, Brandi L Kattman, Donna R Maglott, ClinVar: improving access to variant interpretations and supporting evidence, ",(0,i.kt)("em",{parentName:"p"},"Nucleic Acids Research"),", ",(0,i.kt)("strong",{parentName:"p"},"46"),", Issue D1, 4 January 2018, Pages D1062\u2013D1067, ",(0,i.kt)("a",{parentName:"p",href:"https://doi.org/10.1093/nar/gkx1153"},"https://doi.org/10.1093/nar/gkx1153")))),(0,i.kt)("h2",{id:"rcv-file"},"RCV File"),(0,i.kt)("h3",{id:"example"},"Example"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{target:"_blank",href:n(5902).Z},"a full RCV entry"),"."),(0,i.kt)("h3",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"ID")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{3}","{3}":!0},'\n \n \n\n')),(0,i.kt)("p",null,"The Acc and Version fields are merged to form the ID (RCV000000001.2)"),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"LastUpdatedDate")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{2}","{2}":!0},'\n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Significance")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{5}","{5}":!0},'\n \n \n no assertion criteria provided\n Pathogenic\n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"ReviewStatus")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4}","{4}":!0},'\n \n \n no assertion criteria provided\n Pathogenic\n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Phenotypes")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{2-8}","{2-8}":!0},'\n \n \n \n Joubert syndrome 9\n \n \n \n\n')),(0,i.kt)("p",null,'We only use the field with Type="Preferred". Multiple phenotypes may be reported'),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Location, Variant Type and Variant Id")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{3-12}","{3-12}":!0},'\n\n \n \n \n \n \n \n \n\n')),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"The variant position is extracted from the fields for their respective assemblies."),(0,i.kt)("li",{parentName:"ul"},"Updated records contain positionVCF, referenceAlleleVCF and alternateAlleleVCF fields and when present, we use them to create the variant."),(0,i.kt)("li",{parentName:"ul"},'For older records, since "start\' and "stop" fields are not always available, we use the "display_start" and "display_end" fields.'),(0,i.kt)("li",{parentName:"ul"},"If a required allele is not available, we extract it from the reference sequence."),(0,i.kt)("li",{parentName:"ul"},"Only variants having a dbSNP id are extracted."),(0,i.kt)("li",{parentName:"ul"},"Note that a ClinVar accession may have multiple variants associated with it (possible in different locations)"),(0,i.kt)("li",{parentName:"ul"},"VariantId is extracted from the MeasureSet attributes."),(0,i.kt)("li",{parentName:"ul"},"VariantType is extracted from the Measure attributes.",(0,i.kt)("div",{parentName:"li",className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"unsupported variant types")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"We currently don't support the following variant types:"),(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"Microsatellite"),(0,i.kt)("li",{parentName:"ul"},"protein only"),(0,i.kt)("li",{parentName:"ul"},"fusion"),(0,i.kt)("li",{parentName:"ul"},"Complex"),(0,i.kt)("li",{parentName:"ul"},"Variation"),(0,i.kt)("li",{parentName:"ul"},"Translocation ")))))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"MedGen, OMIM, Orphanet IDs")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4-7}","{4-7}":!0},'\n \n \n \n \n \n \n \n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"AlleleOrigins")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{2}","{2}":!0},"\n germline\n\n")),(0,i.kt)("p",null,"We only extract all Allele Origins from Submissions (SCV) entries."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"PubMedIds")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4,10,16,21}","{4,10,16,21}":!0},'\n \n \n 12114475\n \n \n \n LMM Criteria\n \n 24033266\n \n \n \n \n \n 9113933\n \n \n \n \n 23757202\n \n\n')),(0,i.kt)("p",null,"We only extract all Pubmed Ids from Submissions (SCV) entries."),(0,i.kt)("h4",{id:"parsing-significance"},"Parsing Significance"),(0,i.kt)("p",null,"Extracting significance(s) may involve parsing multiple fields. Take the following snippets into consideration."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{3,8,13-14}","{3,8,13-14}":!0},'\n no assertion criteria provided\n Pathogenic\n\n\n\n criteria provided, multiple submitters, no conflicts\n Pathogenic/Likely pathogenic\n\n\n\n no assertion criteria provided\n Conflicting interpretations of pathogenicity\n Pathogenic(1);Uncertain significance(1)\n\n')),(0,i.kt)("p",null,"Given the evidence, we converted the significance field into an array of strings which may be parsed out of the ",(0,i.kt)("inlineCode",{parentName:"p"},"Descriptions")," or ",(0,i.kt)("inlineCode",{parentName:"p"},"Explanation")," fields."),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Varying Delimiters")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The delimiters in each field may vary. Currently, the delimiters for ",(0,i.kt)("inlineCode",{parentName:"p"},"Description")," are ",(0,i.kt)("inlineCode",{parentName:"p"},",")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"/"),". The delimiters for ",(0,i.kt)("inlineCode",{parentName:"p"},"Explanation")," are ",(0,i.kt)("inlineCode",{parentName:"p"},";")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"/"),"."))),(0,i.kt)("h2",{id:"vcv-file"},"VCV File"),(0,i.kt)("h3",{id:"example-1"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml"},'\n\n\n current\n Homo sapiens\n \n \n \n \n \n 1p36.31\n \n \n \n 601142\n \n \n \n 1p36.31\n \n \n \n 607215\n \n \n GRCh37/hg19 1p36.31(chr1:6051187-6158763)\n copy number gain\n \n 1p36.31\n \n \n \n no interpretation for the single variant\n \n \n \n \n \n \n no interpretation for the single variant\n \n \n no interpretation for the single variant\n \n \n \n \n \n \n \n \n \n\n\n')),(0,i.kt)("h3",{id:"parsing-1"},"Parsing"),(0,i.kt)("p",null,"In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"id")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml"},'\n')),(0,i.kt)("p",null,"The Acc and Version fields are merged to form the ID (RCV000000001.2)"),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"significance")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{7}","{7}":!0},'\n \n \n \n \n \n no interpretation for the single variant\n \n \n \n \n \n\n')),(0,i.kt)("p",null,"May have multiple significances listed."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"reviewStatus")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4}","{4}":!0},"\n \n \n no interpretation for the single variant\n \n \n\n")),(0,i.kt)("h2",{id:"known-issues"},"Known Issues"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"The XML file contains ~1k more entries (out of 162K) than the VCF file"),(0,i.kt)("li",{parentName:"ul"},"The XML file does not have a field indicating that a record is associated with the reference base - something that was present in VCF"),(0,i.kt)("li",{parentName:"ul"},'The XML file contains entries (e.g. RCV000016645 version=1) which have IUPAC ambiguous bases ("R", "Y", "H",\netc.) as their alternate allele')))),(0,i.kt)("h2",{id:"download-urls"},"Download URLs"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz"},"ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz")),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_00-latest.xml.gz"},"https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_00-latest.xml.gz")),(0,i.kt)("h2",{id:"json-output"},"JSON Output"),(0,i.kt)(r.default,{mdxType:"JSON"}),(0,i.kt)("h2",{id:"building-the-supplementary-files"},"Building the supplementary files"),(0,i.kt)("p",null,"The ClinVar ",(0,i.kt)("inlineCode",{parentName:"p"},".nsa")," and ",(0,i.kt)("inlineCode",{parentName:"p"},".nsi")," for Illumina Connected Annotations can be built using the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's ",(0,i.kt)("inlineCode",{parentName:"p"},"clinvar")," subcommand."),(0,i.kt)("h3",{id:"source-data-files"},"Source data files"),(0,i.kt)("p",null,"Two input ",(0,i.kt)("inlineCode",{parentName:"p"},".xml")," files and a ",(0,i.kt)("inlineCode",{parentName:"p"},".version")," file are required in order to build the ",(0,i.kt)("inlineCode",{parentName:"p"},".nsa")," and ",(0,i.kt)("inlineCode",{parentName:"p"},".nsi")," file. You should have the following files:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"ClinVarFullRelease_00-latest.xml.gz ClinVarVariationRelease_00-latest.xml.gz\nClinVarFullRelease_00-latest.xml.gz.version\n")),(0,i.kt)("p",null,"The version file is a text file with the follwoing format."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"NAME=ClinVar\nVERSION=20220505\nDATE=2022-05-05\nDESCRIPTION=A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence\n")),(0,i.kt)("p",null,"The help menu for the utility is as follows:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet SAUtils.dll clinvar\n---------------------------------------------------------------------------\nSAUtils (c) 2022 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.18.1\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll clinvar [options]\nCreates a supplementary database with ClinVar annotations\n\nOPTIONS:\n --ref, -r compressed reference sequence file\n --rcv, -i ClinVar Full release XML file\n --vcv, -c ClinVar Variation release XML file\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\ndotnet SAUtils.dll clinvar\n")),(0,i.kt)("p",null,"Here is a sample execution:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet SAUtils.dll clinvar \\\\\n--ref ~/development/References/7/Homo_sapiens.GRCh38.Nirvana.dat --rcv ClinVarFullRelease_00-latest.xml.gz \\\\\n--vcv ClinVarVariationRelease_00-latest.xml.gz --out ~/development/SupplementaryDatabase/63/GRCh38\n---------------------------------------------------------------------------\nSAUtils (c) 2022 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.18.1\n---------------------------------------------------------------------------\n\nFound 1535677 VCV records\nUnknown vcv id:225946 found in RCV000211201.2\nUnknown vcv id:225946 found in RCV000211253.2\nUnknown vcv id:225946 found in RCV000211375.2\nUnknown vcv id:976117 found in RCV001253316.1\nUnknown vcv id:1321016 found in RCV001776995.2\n3 unknown VCVs found in RCVs.\n225946,976117,1321016\n0 unknown VCVs found in RCVs.\nChromosome 1 completed in 00:00:15.1\nChromosome 2 completed in 00:00:20.0\nChromosome 3 completed in 00:00:09.7\nChromosome 4 completed in 00:00:05.9\nChromosome 5 completed in 00:00:09.8\nChromosome 6 completed in 00:00:08.3\nChromosome 7 completed in 00:00:08.7\nChromosome 8 completed in 00:00:06.2\nChromosome 9 completed in 00:00:08.6\nChromosome 10 completed in 00:00:07.0\nChromosome 11 completed in 00:00:11.7\nChromosome 12 completed in 00:00:08.0\nChromosome 13 completed in 00:00:06.3\nChromosome 14 completed in 00:00:06.0\nChromosome 15 completed in 00:00:06.6\nChromosome 16 completed in 00:00:10.8\nChromosome 17 completed in 00:00:13.8\nChromosome 18 completed in 00:00:02.9\nChromosome 19 completed in 00:00:08.7\nChromosome 20 completed in 00:00:03.6\nChromosome 21 completed in 00:00:02.4\nChromosome 22 completed in 00:00:03.6\nChromosome MT completed in 00:00:00.2\nChromosome X completed in 00:00:07.5\nChromosome Y completed in 00:00:00.0\nMaximum bp shifted for any variant:2\nWriting 37097 intervals to database...\n\nTime: 00:13:26.9\n\n")))}d.isMDXComponent=!0},5902:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/files/clinvar-rcv-example-4e0a2f2ac6c70acd0ce41410690b683b.xml"}}]); \ No newline at end of file diff --git a/assets/js/cd35fae7.aeeba7df.js b/assets/js/cd35fae7.aeeba7df.js deleted file mode 100644 index 7a394a90..00000000 --- a/assets/js/cd35fae7.aeeba7df.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5490,3232],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>g});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var o=a.createContext({}),p=function(e){var t=a.useContext(o),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},c=function(e){var t=p(e.components);return a.createElement(o.Provider,{value:t},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,o=e.parentName,c=s(e,["components","mdxType","originalType","parentName"]),m=p(n),u=i,g=m["".concat(o,".").concat(u)]||m[u]||d[u]||r;return n?a.createElement(g,l(l({ref:t},c),{},{components:n})):a.createElement(g,l({ref:t},c))}));function g(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,l=new Array(r);l[0]=u;var s={};for(var o in t)hasOwnProperty.call(t,o)&&(s[o]=t[o]);s.originalType=e,s[m]="string"==typeof e?e:i,l[1]=s;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>r,metadata:()=>s,toc:()=>o});var a=n(87462),i=(n(67294),n(3905));const r={},l=void 0,s={unversionedId:"data-sources/clinvar-json",id:"data-sources/clinvar-json",title:"clinvar-json",description:"small variants:",source:"@site/docs/data-sources/clinvar-json.md",sourceDirName:"data-sources",slug:"/data-sources/clinvar-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clinvar-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clinvar-json.md",tags:[],version:"current",frontMatter:{}},o=[],p={toc:o},c="wrapper";function m(e){let{components:t,...n}=e;return(0,i.kt)(c,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"small variants:")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "id":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "significance":[\n "benign"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "lastUpdatedDate":"2020-03-01",\n "isAlleleSpecific":true\n },\n {\n "id":"RCV000030258.4",\n "variationId":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "alleleOrigins":[\n "germline"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "phenotypes":[\n "Lynch syndrome"\n ],\n "medGenIds":[\n "C1333990"\n ],\n "omimIds":[\n "120435"\n ],\n "significance":[\n "benign"\n ],\n "lastUpdatedDate":"2017-05-01",\n "isAlleleSpecific":true\n }\n]\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"large variants:")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "chromosome":"1", \n "begin":629025, \n "end":8537745, \n "variantType":"copy_number_loss", \n "id":"RCV000051993.4", \n "variationId":"VCV000058242.1", \n "reviewStatus":"criteria provided, single submitter", \n "alleleOrigins":[\n "not provided"\n ], \n "phenotypes":[\n "See cases"\n ], \n "significance":[\n "pathogenic"\n ], \n "lastUpdatedDate":"2022-04-21", \n "pubMedIds":[\n "21844811"\n ]\n },\n {\n "id":"VCV000058242.1",\n "reviewStatus":"criteria provided, single submitter",\n "significance":[\n "pathogenic"\n ],\n "lastUpdatedDate":"2022-04-21"\n },\n ......\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"id"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"ClinVar ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"variationId"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"ClinVar VCV ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"variant type")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"reviewStatus"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"alleleOrigins"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"medGenIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"MedGen IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omimIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"OMIM IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"orphanetIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Orphanet IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"significance"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"lastUpdatedDate"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,i.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,i.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the ClinVar alternate allele")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"reviewStatus:")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no assertion provided"),(0,i.kt)("li",{parentName:"ul"},"no assertion criteria provided"),(0,i.kt)("li",{parentName:"ul"},"criteria provided, single submitter"),(0,i.kt)("li",{parentName:"ul"},"practice guideline"),(0,i.kt)("li",{parentName:"ul"},"classified by multiple submitters"),(0,i.kt)("li",{parentName:"ul"},"criteria provided, conflicting interpretations"),(0,i.kt)("li",{parentName:"ul"},"criteria provided, multiple submitters, no conflicts"),(0,i.kt)("li",{parentName:"ul"},"no interpretation for the single variant")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"alleleOrigins:")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"unknown"),(0,i.kt)("li",{parentName:"ul"},"other"),(0,i.kt)("li",{parentName:"ul"},"germline"),(0,i.kt)("li",{parentName:"ul"},"somatic"),(0,i.kt)("li",{parentName:"ul"},"inherited"),(0,i.kt)("li",{parentName:"ul"},"paternal"),(0,i.kt)("li",{parentName:"ul"},"maternal"),(0,i.kt)("li",{parentName:"ul"},"de-novo"),(0,i.kt)("li",{parentName:"ul"},"biparental"),(0,i.kt)("li",{parentName:"ul"},"uniparental"),(0,i.kt)("li",{parentName:"ul"},"not-tested"),(0,i.kt)("li",{parentName:"ul"},"tested-inconclusive")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"significance:")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"uncertain significance"),(0,i.kt)("li",{parentName:"ul"},"not provided"),(0,i.kt)("li",{parentName:"ul"},"benign"),(0,i.kt)("li",{parentName:"ul"},"likely benign"),(0,i.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"pathogenic"),(0,i.kt)("li",{parentName:"ul"},"drug response"),(0,i.kt)("li",{parentName:"ul"},"histocompatibility"),(0,i.kt)("li",{parentName:"ul"},"association"),(0,i.kt)("li",{parentName:"ul"},"risk factor"),(0,i.kt)("li",{parentName:"ul"},"protective"),(0,i.kt)("li",{parentName:"ul"},"affects"),(0,i.kt)("li",{parentName:"ul"},"conflicting data from submitters"),(0,i.kt)("li",{parentName:"ul"},"other"),(0,i.kt)("li",{parentName:"ul"},"no interpretation for the single variant"),(0,i.kt)("li",{parentName:"ul"},"conflicting interpretations of pathogenicity")))}m.isMDXComponent=!0},51396:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>s,default:()=>d,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var a=n(87462),i=(n(67294),n(3905)),r=n(90212);const l={title:"ClinVar"},s=void 0,o={unversionedId:"data-sources/clinvar",id:"data-sources/clinvar",title:"ClinVar",description:"Overview",source:"@site/docs/data-sources/clinvar.mdx",sourceDirName:"data-sources",slug:"/data-sources/clinvar",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clinvar",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/clinvar.mdx",tags:[],version:"current",frontMatter:{title:"ClinVar"},sidebar:"docs",previous:{title:"ClinGen",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen"},next:{title:"COSMIC",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic"}},p=[{value:"Overview",id:"overview",children:[],level:2},{value:"RCV File",id:"rcv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Parsing Significance",id:"parsing-significance",children:[],level:4}],level:3}],level:2},{value:"VCV File",id:"vcv-file",children:[{value:"Example",id:"example-1",children:[],level:3},{value:"Parsing",id:"parsing-1",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URLs",id:"download-urls",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2},{value:"Building the supplementary files",id:"building-the-supplementary-files",children:[{value:"Source data files",id:"source-data-files",children:[],level:3}],level:2}],c={toc:p},m="wrapper";function d(e){let{components:t,...l}=e;return(0,i.kt)(m,(0,a.Z)({},c,l,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"ClinVar is a freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence. ClinVar thus facilitates access to and communication about the relationships asserted between human variation and observed health status, and the history of that interpretation."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Melissa J Landrum, Jennifer M Lee, Mark Benson, Garth R Brown, Chen Chao, Shanmuga Chitipiralla, Baoshan Gu, Jennifer Hart, Douglas Hoffman, Wonhee Jang, Karen Karapetyan, Kenneth Katz, Chunlei Liu, Zenith Maddipatla, Adriana Malheiro, Kurt McDaniel, Michael Ovetsky, George Riley, George Zhou, J Bradley Holmes, Brandi L Kattman, Donna R Maglott, ClinVar: improving access to variant interpretations and supporting evidence, ",(0,i.kt)("em",{parentName:"p"},"Nucleic Acids Research"),", ",(0,i.kt)("strong",{parentName:"p"},"46"),", Issue D1, 4 January 2018, Pages D1062\u2013D1067, ",(0,i.kt)("a",{parentName:"p",href:"https://doi.org/10.1093/nar/gkx1153"},"https://doi.org/10.1093/nar/gkx1153")))),(0,i.kt)("h2",{id:"rcv-file"},"RCV File"),(0,i.kt)("h3",{id:"example"},"Example"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{target:"_blank",href:n(95902).Z},"a full RCV entry"),"."),(0,i.kt)("h3",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"ID")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{3}","{3}":!0},'\n \n \n\n')),(0,i.kt)("p",null,"The Acc and Version fields are merged to form the ID (RCV000000001.2)"),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"LastUpdatedDate")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{2}","{2}":!0},'\n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Significance")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{5}","{5}":!0},'\n \n \n no assertion criteria provided\n Pathogenic\n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"ReviewStatus")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4}","{4}":!0},'\n \n \n no assertion criteria provided\n Pathogenic\n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Phenotypes")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{2-8}","{2-8}":!0},'\n \n \n \n Joubert syndrome 9\n \n \n \n\n')),(0,i.kt)("p",null,'We only use the field with Type="Preferred". Multiple phenotypes may be reported'),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Location, Variant Type and Variant Id")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{3-12}","{3-12}":!0},'\n\n \n \n \n \n \n \n \n\n')),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"The variant position is extracted from the fields for their respective assemblies."),(0,i.kt)("li",{parentName:"ul"},"Updated records contain positionVCF, referenceAlleleVCF and alternateAlleleVCF fields and when present, we use them to create the variant."),(0,i.kt)("li",{parentName:"ul"},'For older records, since "start\' and "stop" fields are not always available, we use the "display_start" and "display_end" fields.'),(0,i.kt)("li",{parentName:"ul"},"If a required allele is not available, we extract it from the reference sequence."),(0,i.kt)("li",{parentName:"ul"},"Only variants having a dbSNP id are extracted."),(0,i.kt)("li",{parentName:"ul"},"Note that a ClinVar accession may have multiple variants associated with it (possible in different locations)"),(0,i.kt)("li",{parentName:"ul"},"VariantId is extracted from the MeasureSet attributes."),(0,i.kt)("li",{parentName:"ul"},"VariantType is extracted from the Measure attributes.",(0,i.kt)("div",{parentName:"li",className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"unsupported variant types")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"We currently don't support the following variant types:"),(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"Microsatellite"),(0,i.kt)("li",{parentName:"ul"},"protein only"),(0,i.kt)("li",{parentName:"ul"},"fusion"),(0,i.kt)("li",{parentName:"ul"},"Complex"),(0,i.kt)("li",{parentName:"ul"},"Variation"),(0,i.kt)("li",{parentName:"ul"},"Translocation ")))))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"MedGen, OMIM, Orphanet IDs")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4-7}","{4-7}":!0},'\n \n \n \n \n \n \n \n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"AlleleOrigins")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{2}","{2}":!0},"\n germline\n\n")),(0,i.kt)("p",null,"We only extract all Allele Origins from Submissions (SCV) entries."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"PubMedIds")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4,10,16,21}","{4,10,16,21}":!0},'\n \n \n 12114475\n \n \n \n LMM Criteria\n \n 24033266\n \n \n \n \n \n 9113933\n \n \n \n \n 23757202\n \n\n')),(0,i.kt)("p",null,"We only extract all Pubmed Ids from Submissions (SCV) entries."),(0,i.kt)("h4",{id:"parsing-significance"},"Parsing Significance"),(0,i.kt)("p",null,"Extracting significance(s) may involve parsing multiple fields. Take the following snippets into consideration."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{3,8,13-14}","{3,8,13-14}":!0},'\n no assertion criteria provided\n Pathogenic\n\n\n\n criteria provided, multiple submitters, no conflicts\n Pathogenic/Likely pathogenic\n\n\n\n no assertion criteria provided\n Conflicting interpretations of pathogenicity\n Pathogenic(1);Uncertain significance(1)\n\n')),(0,i.kt)("p",null,"Given the evidence, we converted the significance field into an array of strings which may be parsed out of the ",(0,i.kt)("inlineCode",{parentName:"p"},"Descriptions")," or ",(0,i.kt)("inlineCode",{parentName:"p"},"Explanation")," fields."),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Varying Delimiters")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The delimiters in each field may vary. Currently, the delimiters for ",(0,i.kt)("inlineCode",{parentName:"p"},"Description")," are ",(0,i.kt)("inlineCode",{parentName:"p"},",")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"/"),". The delimiters for ",(0,i.kt)("inlineCode",{parentName:"p"},"Explanation")," are ",(0,i.kt)("inlineCode",{parentName:"p"},";")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"/"),"."))),(0,i.kt)("h2",{id:"vcv-file"},"VCV File"),(0,i.kt)("h3",{id:"example-1"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml"},'\n\n\n current\n Homo sapiens\n \n \n \n \n \n 1p36.31\n \n \n \n 601142\n \n \n \n 1p36.31\n \n \n \n 607215\n \n \n GRCh37/hg19 1p36.31(chr1:6051187-6158763)\n copy number gain\n \n 1p36.31\n \n \n \n no interpretation for the single variant\n \n \n \n \n \n \n no interpretation for the single variant\n \n \n no interpretation for the single variant\n \n \n \n \n \n \n \n \n \n\n\n')),(0,i.kt)("h3",{id:"parsing-1"},"Parsing"),(0,i.kt)("p",null,"In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"id")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml"},'\n')),(0,i.kt)("p",null,"The Acc and Version fields are merged to form the ID (RCV000000001.2)"),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"significance")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{7}","{7}":!0},'\n \n \n \n \n \n no interpretation for the single variant\n \n \n \n \n \n\n')),(0,i.kt)("p",null,"May have multiple significances listed."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"reviewStatus")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4}","{4}":!0},"\n \n \n no interpretation for the single variant\n \n \n\n")),(0,i.kt)("h2",{id:"known-issues"},"Known Issues"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"The XML file contains ~1k more entries (out of 162K) than the VCF file"),(0,i.kt)("li",{parentName:"ul"},"The XML file does not have a field indicating that a record is associated with the reference base - something that was present in VCF"),(0,i.kt)("li",{parentName:"ul"},'The XML file contains entries (e.g. RCV000016645 version=1) which have IUPAC ambiguous bases ("R", "Y", "H",\netc.) as their alternate allele')))),(0,i.kt)("h2",{id:"download-urls"},"Download URLs"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz"},"ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz")),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_00-latest.xml.gz"},"https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_00-latest.xml.gz")),(0,i.kt)("h2",{id:"json-output"},"JSON Output"),(0,i.kt)(r.default,{mdxType:"JSON"}),(0,i.kt)("h2",{id:"building-the-supplementary-files"},"Building the supplementary files"),(0,i.kt)("p",null,"The ClinVar ",(0,i.kt)("inlineCode",{parentName:"p"},".nsa")," and ",(0,i.kt)("inlineCode",{parentName:"p"},".nsi")," for Illumina Connected Annotations can be built using the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's ",(0,i.kt)("inlineCode",{parentName:"p"},"clinvar")," subcommand."),(0,i.kt)("h3",{id:"source-data-files"},"Source data files"),(0,i.kt)("p",null,"Two input ",(0,i.kt)("inlineCode",{parentName:"p"},".xml")," files and a ",(0,i.kt)("inlineCode",{parentName:"p"},".version")," file are required in order to build the ",(0,i.kt)("inlineCode",{parentName:"p"},".nsa")," and ",(0,i.kt)("inlineCode",{parentName:"p"},".nsi")," file. You should have the following files:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"ClinVarFullRelease_00-latest.xml.gz ClinVarVariationRelease_00-latest.xml.gz\nClinVarFullRelease_00-latest.xml.gz.version\n")),(0,i.kt)("p",null,"The version file is a text file with the follwoing format."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"NAME=ClinVar\nVERSION=20220505\nDATE=2022-05-05\nDESCRIPTION=A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence\n")),(0,i.kt)("p",null,"The help menu for the utility is as follows:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet SAUtils.dll clinvar\n---------------------------------------------------------------------------\nSAUtils (c) 2022 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.18.1\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll clinvar [options]\nCreates a supplementary database with ClinVar annotations\n\nOPTIONS:\n --ref, -r compressed reference sequence file\n --rcv, -i ClinVar Full release XML file\n --vcv, -c ClinVar Variation release XML file\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\ndotnet SAUtils.dll clinvar\n")),(0,i.kt)("p",null,"Here is a sample execution:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet SAUtils.dll clinvar \\\\\n--ref ~/development/References/7/Homo_sapiens.GRCh38.Nirvana.dat --rcv ClinVarFullRelease_00-latest.xml.gz \\\\\n--vcv ClinVarVariationRelease_00-latest.xml.gz --out ~/development/SupplementaryDatabase/63/GRCh38\n---------------------------------------------------------------------------\nSAUtils (c) 2022 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.18.1\n---------------------------------------------------------------------------\n\nFound 1535677 VCV records\nUnknown vcv id:225946 found in RCV000211201.2\nUnknown vcv id:225946 found in RCV000211253.2\nUnknown vcv id:225946 found in RCV000211375.2\nUnknown vcv id:976117 found in RCV001253316.1\nUnknown vcv id:1321016 found in RCV001776995.2\n3 unknown VCVs found in RCVs.\n225946,976117,1321016\n0 unknown VCVs found in RCVs.\nChromosome 1 completed in 00:00:15.1\nChromosome 2 completed in 00:00:20.0\nChromosome 3 completed in 00:00:09.7\nChromosome 4 completed in 00:00:05.9\nChromosome 5 completed in 00:00:09.8\nChromosome 6 completed in 00:00:08.3\nChromosome 7 completed in 00:00:08.7\nChromosome 8 completed in 00:00:06.2\nChromosome 9 completed in 00:00:08.6\nChromosome 10 completed in 00:00:07.0\nChromosome 11 completed in 00:00:11.7\nChromosome 12 completed in 00:00:08.0\nChromosome 13 completed in 00:00:06.3\nChromosome 14 completed in 00:00:06.0\nChromosome 15 completed in 00:00:06.6\nChromosome 16 completed in 00:00:10.8\nChromosome 17 completed in 00:00:13.8\nChromosome 18 completed in 00:00:02.9\nChromosome 19 completed in 00:00:08.7\nChromosome 20 completed in 00:00:03.6\nChromosome 21 completed in 00:00:02.4\nChromosome 22 completed in 00:00:03.6\nChromosome MT completed in 00:00:00.2\nChromosome X completed in 00:00:07.5\nChromosome Y completed in 00:00:00.0\nMaximum bp shifted for any variant:2\nWriting 37097 intervals to database...\n\nTime: 00:13:26.9\n\n")))}d.isMDXComponent=!0},95902:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/files/clinvar-rcv-example-4e0a2f2ac6c70acd0ce41410690b683b.xml"}}]); \ No newline at end of file diff --git a/assets/js/cd8220b1.2848816f.js b/assets/js/cd8220b1.2848816f.js new file mode 100644 index 00000000..da369335 --- /dev/null +++ b/assets/js/cd8220b1.2848816f.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4246],{3905:(t,e,n)=>{n.d(e,{Zo:()=>p,kt:()=>f});var r=n(7294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function o(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var u=r.createContext({}),c=function(t){var e=r.useContext(u),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},p=function(t){var e=c(t.components);return r.createElement(u.Provider,{value:e},t.children)},d="mdxType",m={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},s=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,l=t.originalType,u=t.parentName,p=i(t,["components","mdxType","originalType","parentName"]),d=c(n),s=a,f=d["".concat(u,".").concat(s)]||d[s]||m[s]||l;return n?r.createElement(f,o(o({ref:e},p),{},{components:n})):r.createElement(f,o({ref:e},p))}));function f(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var l=n.length,o=new Array(l);o[0]=s;var i={};for(var u in e)hasOwnProperty.call(e,u)&&(i[u]=e[u]);i.originalType=t,i[d]="string"==typeof t?t:a,o[1]=i;for(var c=2;c{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>d,frontMatter:()=>l,metadata:()=>i,toc:()=>u});var r=n(7462),a=(n(7294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/topmed-json",id:"data-sources/topmed-json",title:"topmed-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/topmed-json.md",sourceDirName:"data-sources",slug:"/data-sources/topmed-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/topmed-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/topmed-json.md",tags:[],version:"current",frontMatter:{}},u=[],c={toc:u},p="wrapper";function d(t){let{components:e,...n}=t;return(0,a.kt)(p,(0,r.Z)({},c,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"topmed":{ \n "allAc":20,\n "allAn":125568,\n "allAf":0.000159,\n "allHc":0,\n "failedFilter":true\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAc"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"TOPMed allele count")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAn"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"TOPMed allele number. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAf"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"TOPMed allele frequency (computed by Illumina Connected Annotations)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allHc"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"TOPMed homozygous count")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,a.kt)("td",{parentName:"tr",align:null},"bool"),(0,a.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/cd8220b1.ee913650.js b/assets/js/cd8220b1.ee913650.js deleted file mode 100644 index 386a7d18..00000000 --- a/assets/js/cd8220b1.ee913650.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4246],{3905:(t,e,n)=>{n.d(e,{Zo:()=>p,kt:()=>f});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function o(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var u=r.createContext({}),c=function(t){var e=r.useContext(u),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},p=function(t){var e=c(t.components);return r.createElement(u.Provider,{value:e},t.children)},d="mdxType",m={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},s=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,l=t.originalType,u=t.parentName,p=i(t,["components","mdxType","originalType","parentName"]),d=c(n),s=a,f=d["".concat(u,".").concat(s)]||d[s]||m[s]||l;return n?r.createElement(f,o(o({ref:e},p),{},{components:n})):r.createElement(f,o({ref:e},p))}));function f(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var l=n.length,o=new Array(l);o[0]=s;var i={};for(var u in e)hasOwnProperty.call(e,u)&&(i[u]=e[u]);i.originalType=t,i[d]="string"==typeof t?t:a,o[1]=i;for(var c=2;c{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>d,frontMatter:()=>l,metadata:()=>i,toc:()=>u});var r=n(87462),a=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/topmed-json",id:"data-sources/topmed-json",title:"topmed-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/topmed-json.md",sourceDirName:"data-sources",slug:"/data-sources/topmed-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/topmed-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/topmed-json.md",tags:[],version:"current",frontMatter:{}},u=[],c={toc:u},p="wrapper";function d(t){let{components:e,...n}=t;return(0,a.kt)(p,(0,r.Z)({},c,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"topmed":{ \n "allAc":20,\n "allAn":125568,\n "allAf":0.000159,\n "allHc":0,\n "failedFilter":true\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAc"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"TOPMed allele count")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAn"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"TOPMed allele number. Non-zero integer.")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAf"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"TOPMed allele frequency (computed by Illumina Connected Annotations)")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allHc"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"TOPMed homozygous count")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,a.kt)("td",{parentName:"tr",align:null},"bool"),(0,a.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/cf6ff622.d7aafcfa.js b/assets/js/cf6ff622.d7aafcfa.js deleted file mode 100644 index e13670ab..00000000 --- a/assets/js/cf6ff622.d7aafcfa.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2218],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>m});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},p=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},d="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},g=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,p=o(e,["components","mdxType","originalType","parentName"]),d=c(n),g=i,m=d["".concat(s,".").concat(g)]||d[g]||u[g]||r;return n?a.createElement(m,l(l({ref:t},p),{},{components:n})):a.createElement(m,l({ref:t},p))}));function m(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,l=new Array(r);l[0]=g;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[d]="string"==typeof e?e:i,l[1]=o;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>d,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={},l=void 0,o={unversionedId:"data-sources/clingen-dosage-json",id:"version-3.17/data-sources/clingen-dosage-json",title:"clingen-dosage-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/clingen-dosage-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-dosage-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/clingen-dosage-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/clingen-dosage-json.md",tags:[],version:"3.17",frontMatter:{}},s=[],c={toc:s},p="wrapper";function d(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clingenDosageSensitivityMap": [{\n "chromosome": "15",\n "begin": 30900686,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 0.33994\n},\n{\n "chromosome": "15",\n "begin": 31727418,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "dosage sensitivity unlikely",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 1\n}]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Field"),(0,i.kt)("th",{parentName:"tr",align:null},"Type"),(0,i.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clingenDosageSensitivityMap"),(0,i.kt)("td",{parentName:"tr",align:null},"object array"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"begin"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"end"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"haploinsufficiency"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"triplosensitivity"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"(same as haploinsufficiency)\xa0")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"haploinsufficiency and triplosensitivity")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no evidence to suggest that dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"little evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"emerging evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"gene associated with autosomal recessive phenotype"),(0,i.kt)("li",{parentName:"ul"},"dosage sensitivity unlikely")))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/d013bea7.a21cea67.js b/assets/js/d013bea7.a21cea67.js deleted file mode 100644 index acc14519..00000000 --- a/assets/js/d013bea7.a21cea67.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4418],{3905:(e,n,t)=>{t.d(n,{Zo:()=>d,kt:()=>h});var a=t(67294);function i(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function o(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function r(e){for(var n=1;n=0||(i[t]=e[t]);return i}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(i[t]=e[t])}return i}var s=a.createContext({}),c=function(e){var n=a.useContext(s),t=n;return e&&(t="function"==typeof e?e(n):r(r({},n),e)),t},d=function(e){var n=c(e.components);return a.createElement(s.Provider,{value:n},e.children)},p="mdxType",u={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},m=a.forwardRef((function(e,n){var t=e.components,i=e.mdxType,o=e.originalType,s=e.parentName,d=l(e,["components","mdxType","originalType","parentName"]),p=c(t),m=i,h=p["".concat(s,".").concat(m)]||p[m]||u[m]||o;return t?a.createElement(h,r(r({ref:n},d),{},{components:t})):a.createElement(h,r({ref:n},d))}));function h(e,n){var t=arguments,i=n&&n.mdxType;if("string"==typeof e||i){var o=t.length,r=new Array(o);r[0]=m;var l={};for(var s in n)hasOwnProperty.call(n,s)&&(l[s]=n[s]);l.originalType=e,l[p]="string"==typeof e?e:i,r[1]=l;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>r,default:()=>p,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=t(87462),i=(t(67294),t(3905));const o={title:"Getting Started"},r=void 0,l={unversionedId:"introduction/getting-started",id:"version-3.21/introduction/getting-started",title:"Getting Started",description:"Nirvana is written in C# using .NET Core (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files.",source:"@site/versioned_docs/version-3.21/introduction/getting-started.md",sourceDirName:"introduction",slug:"/introduction/getting-started",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/introduction/getting-started",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/introduction/getting-started.md",tags:[],version:"3.21",frontMatter:{title:"Getting Started"},sidebar:"docs",previous:{title:"Dependencies",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/introduction/dependencies"},next:{title:"Parsing Nirvana JSON",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/introduction/parsing-json"}},s=[{value:"Getting Nirvana",id:"getting-nirvana",children:[{value:"Latest Release",id:"latest-release",children:[],level:3},{value:"GitHub Release Notes",id:"github-release-notes",children:[],level:3},{value:"Quick Start",id:"quick-start",children:[],level:3},{value:"Docker",id:"docker",children:[],level:3}],level:2},{value:"Downloading the data files",id:"downloading-the-data-files",children:[],level:2},{value:"Download a test VCF file",id:"download-a-test-vcf-file",children:[],level:2},{value:"Running Nirvana",id:"running-nirvana",children:[],level:2},{value:"The Nirvana command line",id:"the-nirvana-command-line",children:[{value:"Specifying annotation sources",id:"specifying-annotation-sources",children:[],level:3}],level:2}],c={toc:s},d="wrapper";function p(e){let{components:n,...o}=e;return(0,i.kt)(d,(0,a.Z)({},c,o,{components:n,mdxType:"MDXLayout"}),(0,i.kt)("p",null,"Nirvana is written in C# using ",(0,i.kt)("a",{parentName:"p",href:"https://www.microsoft.com/net/download/core"},".NET Core")," (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files."),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Nirvana currently uses .NET6.0. Please make sure that you have the most current runtime from the ",(0,i.kt)("a",{parentName:"p",href:"https://www.microsoft.com/net/download/core"},".NET Core downloads")," page."))),(0,i.kt)("h2",{id:"getting-nirvana"},"Getting Nirvana"),(0,i.kt)("h3",{id:"latest-release"},"Latest Release"),(0,i.kt)("p",null,"Contact the team to obtain the latest release."),(0,i.kt)("h3",{id:"github-release-notes"},"GitHub Release Notes"),(0,i.kt)("p",null,"Alternatively, you can grab the previous binaries from our ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/Illumina/Nirvana/releases"},"GitHub Releases")," page:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"mkdir -p Nirvana/Data\ncd Nirvana\nunzip Nirvana-3.18.1-net6.0.zip\n")),(0,i.kt)("h3",{id:"quick-start"},"Quick Start"),(0,i.kt)("p",null,"If you want to get started right away, we've created ",(0,i.kt)("a",{target:"_blank",href:t(83869).Z},"a script")," that unzips the Nirvana build, downloads the annotation data, and starts annotating a test file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"bash ./TestNirvana.sh NirvanaBuild.zip\n")),(0,i.kt)("p",null,"We have verified that this script works on Windows (using Git Bash or WSL), Linux, and Mac OS X."),(0,i.kt)("h3",{id:"docker"},"Docker"),(0,i.kt)("p",null,"You can find us on ",(0,i.kt)("a",{parentName:"p",href:"https://hub.docker.com/repository/docker/annotation/nirvana"},"Docker Hub")," under ",(0,i.kt)("inlineCode",{parentName:"p"},"annotation/nirvana"),":"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"We think Docker is fantastic. However, because our data files are usually accessed through a Docker volume, there is a noticeable performance penalty when running Nirvana in Docker."))),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"mkdir -p Nirvana/Data\ncd Nirvana\ndocker pull annotation/nirvana:3.14\n")),(0,i.kt)("p",null,"For Docker, we have special instructions for running the Downloader:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"sudo docker run --rm -it -v Data:/scratch annotation/nirvana:3.14 dotnet \\\n /opt/nirvana/Downloader.dll --ga GRCh37 -o /scratch\n")),(0,i.kt)("p",null,"Similarly, we have special instructions for running Nirvana (Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.vcf.gz"},"a toy VCF")," in case you need it):"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"sudo docker run --rm -it -v Data:/scratch annotation/nirvana:3.14 dotnet \\\n /opt/nirvana/Nirvana.dll -c /scratch/Cache/GRCh37/Both \\\n -r /scratch/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n --sd /scratch/SupplementaryAnnotation/GRCh37 \\\n -i /scratch/HiSeq.10000.vcf.gz -o /scratch/HiSeq\n")),(0,i.kt)("h2",{id:"downloading-the-data-files"},"Downloading the data files"),(0,i.kt)("p",null,"To download the latest data sources (or update the ones that you already have), use the following command to automate the download from S3:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/net6.0/Downloader.dll \\\n --ga GRCh37 \\\n -o Data\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"--ga")," argument specifies the genome assembly which can be ",(0,i.kt)("inlineCode",{parentName:"li"},"GRCh37"),", ",(0,i.kt)("inlineCode",{parentName:"li"},"GRCh38"),", or ",(0,i.kt)("inlineCode",{parentName:"li"},"both"),"."),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output directory")),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Glitches in the Matrix")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Every once in a while, the download process does not go smoothly. Perhaps the internet connection cut out or you ran out of disk space. The Downloader attempts to detect these situations by checking the file sizes at the very end. If you see that a file was marked ",(0,i.kt)("inlineCode",{parentName:"p"},"truncated"),", try fixing the root cause and running the downloader again."))),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"From time to time, you can re-run the Downloader to get the latest annotation files. It will only download the files that changed."))),(0,i.kt)("h2",{id:"download-a-test-vcf-file"},"Download a test VCF file"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.vcf.gz"},"a toy VCF file")," you can play around with:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"curl -O https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.vcf.gz\n")),(0,i.kt)("h2",{id:"running-nirvana"},"Running Nirvana"),(0,i.kt)("p",null,"Once you have downloaded the data sets, use the following command to annotate your VCF:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/net6.0/Nirvana.dll \\\n -c Data/Cache \\\n --sd Data/SupplementaryAnnotation/GRCh37 \\\n -r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n -i HiSeq.10000.vcf.gz \\\n -o HiSeq.10000\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-c")," argument specifies the cache directory"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"--sd")," argument specifies the supplementary annotation directory"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-r")," argument specifies the compressed reference path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input VCF path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output filename prefix")),(0,i.kt)("p",null,"When running Nirvana, performance metrics are shown as it evaluates each chromosome in the input VCF file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"---------------------------------------------------------------------------\nNirvana (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0\n---------------------------------------------------------------------------\n\nInitialization Time Positions/s\n---------------------------------------------------------------------------\nCache 00:00:00.0\nSA Position Scan 00:00:00.0 153,634\n\nReference Preload Annotation Variants/s\n---------------------------------------------------------------------------\nchr1 00:00:00.2 00:00:00.8 11,873\n\nSummary Time Percent\n---------------------------------------------------------------------------\nInitialization 00:00:00.0 1.5 %\nPreload 00:00:00.2 4.9 %\nAnnotation 00:00:00.8 18.5 %\n\nTime: 00:00:04.4\n")),(0,i.kt)("p",null,"The output will be a JSON file called ",(0,i.kt)("inlineCode",{parentName:"p"},"HiSeq.10000.json.gz"),". Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.json.gz"},"the full JSON file"),"."),(0,i.kt)("h2",{id:"the-nirvana-command-line"},"The Nirvana command line"),(0,i.kt)("p",null,"The full command line options can be viewed by using the ",(0,i.kt)("inlineCode",{parentName:"p"},"-h")," option or no options"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/net6.0/Nirvana.dll\n---------------------------------------------------------------------------\nNirvana (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0\n---------------------------------------------------------------------------\n\nUSAGE: dotnet Nirvana.dll -i -c --sd -r -o \nAnnotates a set of variants\n\nOPTIONS:\n --cache, -c \n input cache directory\n --in, -i input VCF path\n --out, -o output file path\n --ref, -r input compressed reference sequence path\n --sd input supplementary annotation directory\n --sources, -s annotation data sources to be used (comma\n separated list of supported tags)\n --force-mt forces to annotate mitochondrial variants\n --legacy-vids enables support for legacy VIDs\n --enable-dq report DQ from VCF samples field\n --enable-bidirectional-fusions\n enables support for bidirectional gene fusions\n --str user provided STR annotation TSV file\n --vcf-info additional vcf info field keys (comma separated)\n desired in the output\n --vcf-sample-info \n additional vcf format field keys (comma separated)\n desired in the output\n --help, -h displays the help menu\n --version, -v displays the version\n\nSupplementary annotation version: 69, Reference version: 7\n")),(0,i.kt)("h3",{id:"specifying-annotation-sources"},"Specifying annotation sources"),(0,i.kt)("p",null,"By default, Nirvana will use all available data sources. However, the user can customize the set of sources using the ",(0,i.kt)("inlineCode",{parentName:"p"},"--sources|-s")," option. If an unknown source is specified, a warning message will be printed."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/net6.0/Nirvana.dll \\\n -c Data/Cache/GRCh37 \\\n --sd Data/SupplementaryAnnotation/GRCh37 \\\n -r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n -i HiSeq.10000.vcf.gz \\\n -o HiSeq.10000 \\\n -s omim,gnomad,ense\n ---------------------------------------------------------------------------\n Nirvana (c) 2023 Illumina, Inc.\n Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0\n ---------------------------------------------------------------------------\n\n WARNING: Unknown tag in data-sources: ense.\n Available values are: aminoAcidConservation,primateAI,dbsnp,spliceAI,revel,cosmic,clinvar,gnomad,\n mitomap,oneKg,gmeVariome,topmed,clingen,decipher,gnomAD-preview,clingenDosageSensitivityMap,\n gerpScore,dannScore,omim,clingenGeneValidity,phylopScore,lowComplexityRegion,refMinor,\n heteroplasmy,Ensembl,RefSeq\n\n Initialization Time Positions/s\n ---------------------------------------------------------------------------\n SA Position Scan 00:00:00.3 307,966\n ....\n ..\n")),(0,i.kt)("p",null,"The list of available values is compiled from the files provided (using ",(0,i.kt)("inlineCode",{parentName:"p"},"-c")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"--sd")," options)."))}p.isMDXComponent=!0},83869:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/files/TestNirvana-393f155ae4157b0ffbd1b7e399348477.sh"}}]); \ No newline at end of file diff --git a/assets/js/d03dbe1a.26aa2330.js b/assets/js/d03dbe1a.26aa2330.js deleted file mode 100644 index b93f2a85..00000000 --- a/assets/js/d03dbe1a.26aa2330.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9321,2137],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>g});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var o=a.createContext({}),p=function(e){var t=a.useContext(o),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},c=function(e){var t=p(e.components);return a.createElement(o.Provider,{value:t},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,o=e.parentName,c=s(e,["components","mdxType","originalType","parentName"]),m=p(n),u=i,g=m["".concat(o,".").concat(u)]||m[u]||d[u]||r;return n?a.createElement(g,l(l({ref:t},c),{},{components:n})):a.createElement(g,l({ref:t},c))}));function g(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,l=new Array(r);l[0]=u;var s={};for(var o in t)hasOwnProperty.call(t,o)&&(s[o]=t[o]);s.originalType=e,s[m]="string"==typeof e?e:i,l[1]=s;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>m,frontMatter:()=>r,metadata:()=>s,toc:()=>o});var a=n(87462),i=(n(67294),n(3905));const r={},l=void 0,s={unversionedId:"data-sources/clinvar-json",id:"version-3.16/data-sources/clinvar-json",title:"clinvar-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/clinvar-json.md",sourceDirName:"data-sources",slug:"/data-sources/clinvar-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/clinvar-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/clinvar-json.md",tags:[],version:"3.16",frontMatter:{}},o=[],p={toc:o},c="wrapper";function m(e){let{components:t,...n}=e;return(0,i.kt)(c,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "id":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "significance":[\n "benign"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "lastUpdatedDate":"2020-03-01",\n "isAlleleSpecific":true\n },\n {\n "id":"RCV000030258.4",\n "variationId":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "alleleOrigins":[\n "germline"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "phenotypes":[\n "Lynch syndrome"\n ],\n "medGenIds":[\n "C1333990"\n ],\n "omimIds":[\n "120435"\n ],\n "significance":[\n "benign"\n ],\n "lastUpdatedDate":"2017-05-01",\n "isAlleleSpecific":true\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"id"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"ClinVar ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"variationId"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"ClinVar VCV ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"reviewStatus"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"alleleOrigins"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"medGenIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"MedGen IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omimIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"OMIM IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"orphanetIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Orphanet IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"significance"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"lastUpdatedDate"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,i.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,i.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the ClinVar alternate allele")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"reviewStatus:")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no assertion provided"),(0,i.kt)("li",{parentName:"ul"},"no assertion criteria provided"),(0,i.kt)("li",{parentName:"ul"},"criteria provided, single submitter"),(0,i.kt)("li",{parentName:"ul"},"practice guideline"),(0,i.kt)("li",{parentName:"ul"},"classified by multiple submitters"),(0,i.kt)("li",{parentName:"ul"},"criteria provided, conflicting interpretations"),(0,i.kt)("li",{parentName:"ul"},"criteria provided, multiple submitters, no conflicts"),(0,i.kt)("li",{parentName:"ul"},"no interpretation for the single variant")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"alleleOrigins:")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"unknown"),(0,i.kt)("li",{parentName:"ul"},"other"),(0,i.kt)("li",{parentName:"ul"},"germline"),(0,i.kt)("li",{parentName:"ul"},"somatic"),(0,i.kt)("li",{parentName:"ul"},"inherited"),(0,i.kt)("li",{parentName:"ul"},"paternal"),(0,i.kt)("li",{parentName:"ul"},"maternal"),(0,i.kt)("li",{parentName:"ul"},"de-novo"),(0,i.kt)("li",{parentName:"ul"},"biparental"),(0,i.kt)("li",{parentName:"ul"},"uniparental"),(0,i.kt)("li",{parentName:"ul"},"not-tested"),(0,i.kt)("li",{parentName:"ul"},"tested-inconclusive")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"significance:")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"uncertain significance"),(0,i.kt)("li",{parentName:"ul"},"not provided"),(0,i.kt)("li",{parentName:"ul"},"benign"),(0,i.kt)("li",{parentName:"ul"},"likely benign"),(0,i.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"pathogenic"),(0,i.kt)("li",{parentName:"ul"},"drug response"),(0,i.kt)("li",{parentName:"ul"},"histocompatibility"),(0,i.kt)("li",{parentName:"ul"},"association"),(0,i.kt)("li",{parentName:"ul"},"risk factor"),(0,i.kt)("li",{parentName:"ul"},"protective"),(0,i.kt)("li",{parentName:"ul"},"affects"),(0,i.kt)("li",{parentName:"ul"},"conflicting data from submitters"),(0,i.kt)("li",{parentName:"ul"},"other"),(0,i.kt)("li",{parentName:"ul"},"no interpretation for the single variant"),(0,i.kt)("li",{parentName:"ul"},"conflicting interpretations of pathogenicity")))}m.isMDXComponent=!0},11943:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>s,default:()=>d,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var a=n(87462),i=(n(67294),n(3905)),r=n(48247);const l={title:"ClinVar"},s=void 0,o={unversionedId:"data-sources/clinvar",id:"version-3.16/data-sources/clinvar",title:"ClinVar",description:"Overview",source:"@site/versioned_docs/version-3.16/data-sources/clinvar.mdx",sourceDirName:"data-sources",slug:"/data-sources/clinvar",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/clinvar",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/clinvar.mdx",tags:[],version:"3.16",frontMatter:{title:"ClinVar"},sidebar:"version-3.16/docs",previous:{title:"ClinGen",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/clingen"},next:{title:"COSMIC",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/cosmic"}},p=[{value:"Overview",id:"overview",children:[],level:2},{value:"RCV File",id:"rcv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Parsing Significance",id:"parsing-significance",children:[],level:4}],level:3}],level:2},{value:"VCV File",id:"vcv-file",children:[{value:"Example",id:"example-1",children:[],level:3},{value:"Parsing",id:"parsing-1",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URLs",id:"download-urls",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2},{value:"Building the supplementary files",id:"building-the-supplementary-files",children:[{value:"Source data files",id:"source-data-files",children:[],level:3}],level:2}],c={toc:p},m="wrapper";function d(e){let{components:t,...l}=e;return(0,i.kt)(m,(0,a.Z)({},c,l,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"ClinVar is a freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence. ClinVar thus facilitates access to and communication about the relationships asserted between human variation and observed health status, and the history of that interpretation."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Melissa J Landrum, Jennifer M Lee, Mark Benson, Garth R Brown, Chen Chao, Shanmuga Chitipiralla, Baoshan Gu, Jennifer Hart, Douglas Hoffman, Wonhee Jang, Karen Karapetyan, Kenneth Katz, Chunlei Liu, Zenith Maddipatla, Adriana Malheiro, Kurt McDaniel, Michael Ovetsky, George Riley, George Zhou, J Bradley Holmes, Brandi L Kattman, Donna R Maglott, ClinVar: improving access to variant interpretations and supporting evidence, ",(0,i.kt)("em",{parentName:"p"},"Nucleic Acids Research"),", ",(0,i.kt)("strong",{parentName:"p"},"46"),", Issue D1, 4 January 2018, Pages D1062\u2013D1067, ",(0,i.kt)("a",{parentName:"p",href:"https://doi.org/10.1093/nar/gkx1153"},"https://doi.org/10.1093/nar/gkx1153")))),(0,i.kt)("h2",{id:"rcv-file"},"RCV File"),(0,i.kt)("h3",{id:"example"},"Example"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{target:"_blank",href:n(76975).Z},"a full RCV entry"),"."),(0,i.kt)("h3",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"ID")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{3}","{3}":!0},'\n \n \n\n')),(0,i.kt)("p",null,"The Acc and Version fields are merged to form the ID (RCV000000001.2)"),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"LastUpdatedDate")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{2}","{2}":!0},'\n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Significance")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{5}","{5}":!0},'\n \n \n no assertion criteria provided\n Pathogenic\n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"ReviewStatus")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4}","{4}":!0},'\n \n \n no assertion criteria provided\n Pathogenic\n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Phenotypes")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{2-8}","{2-8}":!0},'\n \n \n \n Joubert syndrome 9\n \n \n \n\n')),(0,i.kt)("p",null,'We only use the field with Type="Preferred". Multiple phenotypes may be reported'),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Location and Variant Id")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{3,5-12}","{3,5-12}":!0},'\n\n \n \n \n \n \n \n \n\n')),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"The variant position is extracted from the fields for their respective assemblies."),(0,i.kt)("li",{parentName:"ul"},"Updated records contain positionVCF, referenceAlleleVCF and alternateAlleleVCF fields and when present, we use them to create the variant."),(0,i.kt)("li",{parentName:"ul"},'For older records, since "start\' and "stop" fields are not always available, we use the "display_start" and "display_end" fields.'),(0,i.kt)("li",{parentName:"ul"},"If a required allele is not available, we extract it from the reference sequence."),(0,i.kt)("li",{parentName:"ul"},"Only variants having a dbSNP id are extracted."),(0,i.kt)("li",{parentName:"ul"},"Note that a ClinVar accession may have multiple variants associated with it (possible in different locations)"),(0,i.kt)("li",{parentName:"ul"},"VariantId is extracted from the MeasureSet attributes.")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"MedGen, OMIM, Orphanet IDs")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4-7}","{4-7}":!0},'\n \n \n \n \n \n \n \n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"AlleleOrigins")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{2}","{2}":!0},"\n germline\n\n")),(0,i.kt)("p",null,"We only extract all Allele Origins from Submissions (SCV) entries."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"PubMedIds")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4,10,16,21}","{4,10,16,21}":!0},'\n \n \n 12114475\n \n \n \n LMM Criteria\n \n 24033266\n \n \n \n \n \n 9113933\n \n \n \n \n 23757202\n \n\n')),(0,i.kt)("p",null,"We only extract all Pubmed Ids from Submissions (SCV) entries."),(0,i.kt)("h4",{id:"parsing-significance"},"Parsing Significance"),(0,i.kt)("p",null,"Extracting significance(s) may involve parsing multiple fields. Take the following snippets into consideration."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{3,8,13-14}","{3,8,13-14}":!0},'\n no assertion criteria provided\n Pathogenic\n\n\n\n criteria provided, multiple submitters, no conflicts\n Pathogenic/Likely pathogenic\n\n\n\n no assertion criteria provided\n Conflicting interpretations of pathogenicity\n Pathogenic(1);Uncertain significance(1)\n\n')),(0,i.kt)("p",null,"Given the evidence, we converted the significance field into an array of strings which may be parsed out of the ",(0,i.kt)("inlineCode",{parentName:"p"},"Descriptions")," or ",(0,i.kt)("inlineCode",{parentName:"p"},"Explanation")," fields."),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Varying Delimiters")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The delimiters in each field may vary. Currently, the delimiters for ",(0,i.kt)("inlineCode",{parentName:"p"},"Description")," are ",(0,i.kt)("inlineCode",{parentName:"p"},",")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"/"),". The delimiters for ",(0,i.kt)("inlineCode",{parentName:"p"},"Explanation")," are ",(0,i.kt)("inlineCode",{parentName:"p"},";")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"/"),"."))),(0,i.kt)("h2",{id:"vcv-file"},"VCV File"),(0,i.kt)("h3",{id:"example-1"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml"},'\n\n\n current\n Homo sapiens\n \n \n \n \n \n 1p36.31\n \n \n \n 601142\n \n \n \n 1p36.31\n \n \n \n 607215\n \n \n GRCh37/hg19 1p36.31(chr1:6051187-6158763)\n copy number gain\n \n 1p36.31\n \n \n \n no interpretation for the single variant\n \n \n \n \n \n \n no interpretation for the single variant\n \n \n no interpretation for the single variant\n \n \n \n \n \n \n \n \n \n\n\n')),(0,i.kt)("h3",{id:"parsing-1"},"Parsing"),(0,i.kt)("p",null,"In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"id")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml"},'\n')),(0,i.kt)("p",null,"The Acc and Version fields are merged to form the ID (RCV000000001.2)"),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"significance")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{7}","{7}":!0},'\n \n \n \n \n \n no interpretation for the single variant\n \n \n \n \n \n\n')),(0,i.kt)("p",null,"May have multiple significances listed."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"reviewStatus")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4}","{4}":!0},"\n \n \n no interpretation for the single variant\n \n \n\n")),(0,i.kt)("h2",{id:"known-issues"},"Known Issues"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"The XML file contains ~1k more entries (out of 162K) than the VCF file"),(0,i.kt)("li",{parentName:"ul"},"The XML file does not have a field indicating that a record is associated with the reference base - something that was present in VCF"),(0,i.kt)("li",{parentName:"ul"},'The XML file contains entries (e.g. RCV000016645 version=1) which have IUPAC ambiguous bases ("R", "Y", "H",\netc.) as their alternate allele')))),(0,i.kt)("h2",{id:"download-urls"},"Download URLs"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz"},"ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz")),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_00-latest.xml.gz"},"https://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/clinvar_variation/ClinVarVariationRelease_00-latest.xml.gz")),(0,i.kt)("h2",{id:"json-output"},"JSON Output"),(0,i.kt)(r.default,{mdxType:"JSON"}),(0,i.kt)("h2",{id:"building-the-supplementary-files"},"Building the supplementary files"),(0,i.kt)("p",null,"The ClinVar ",(0,i.kt)("inlineCode",{parentName:"p"},".nsa")," for Nirvana can be built using the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's ",(0,i.kt)("inlineCode",{parentName:"p"},"clinvar")," subcommand."),(0,i.kt)("h3",{id:"source-data-files"},"Source data files"),(0,i.kt)("p",null,"Two input ",(0,i.kt)("inlineCode",{parentName:"p"},".xml")," files and a ",(0,i.kt)("inlineCode",{parentName:"p"},".version")," file are required in order to build the ",(0,i.kt)("inlineCode",{parentName:"p"},".nsa")," file. You should have the following files:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"ClinVarFullRelease_2021-06.xml.gz ClinVarVariationRelease_2021-06.xml.gz\nClinVarFullRelease_2021-06.xml.gz.version\n")),(0,i.kt)("p",null,"The version file is a text file with the follwoing format."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"NAME=ClinVar\nVERSION=20210603\nDATE=2021-06-03\nDESCRIPTION=A freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence\n")),(0,i.kt)("p",null,"The help menu for the utility is as follows:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet SAUtils.dll clinvar\n---------------------------------------------------------------------------\nSAUtils (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.15.0\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll clinvar [options]\nCreates a supplementary database with ClinVar annotations\n\nOPTIONS:\n --ref, -r compressed reference sequence file\n --rcv, -i ClinVar Full release XML file\n --vcv, -c ClinVar Variation release XML file\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\ndotnet SAUtils.dll clinvar\n")),(0,i.kt)("p",null,"Here is a sample execution:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet ~/development/Nirvana/bin/Debug/netcoreapp3.1/SAUtils.dll clinvar \\\\\n--ref ~/development/References/7/Homo_sapiens.GRCh38.Nirvana.dat --rcv ClinVarFullRelease_2021-06.xml.gz \\\\\n--vcv ClinVarVariationRelease_2021-06.xml.gz --out ~/development/SupplementaryDatabase/63/GRCh38\n---------------------------------------------------------------------------\nSAUtils (c) 2020 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.13.0\n---------------------------------------------------------------------------\n\nFound 983417 VCV records\nChromosome 1 completed in 00:09:46.2\nChromosome 2 completed in 00:00:16.4\nChromosome 3 completed in 00:00:06.9\nUnknown vcv id:982521 found in RCV001262095.1\nChromosome 4 completed in 00:00:03.9\nChromosome 5 completed in 00:00:07.1\nChromosome 6 completed in 00:00:05.7\nChromosome 7 completed in 00:00:06.6\nUnknown vcv id:430873 found in RCV000493222.1\nChromosome 8 completed in 00:00:04.6\nChromosome 9 completed in 00:00:06.2\nChromosome 10 completed in 00:00:05.6\nChromosome 11 completed in 00:00:10.2\nChromosome 12 completed in 00:00:06.9\nChromosome 13 completed in 00:00:05.9\nChromosome 14 completed in 00:00:04.9\nChromosome 15 completed in 00:00:05.4\nChromosome 16 completed in 00:00:08.9\nChromosome 17 completed in 00:00:13.1\nChromosome 18 completed in 00:00:02.4\nChromosome 19 completed in 00:00:07.6\nChromosome 20 completed in 00:00:02.4\nChromosome 21 completed in 00:00:01.6\nChromosome 22 completed in 00:00:02.6\nChromosome MT completed in 00:00:00.3\nChromosome X completed in 00:00:05.5\n2 unknown VCVs found in RCVs.\n982521,430873\nChromosome Y completed in 00:00:00.0\n\nTime: 00:12:08.2\n\n")))}d.isMDXComponent=!0},76975:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/files/clinvar-rcv-example-4e0a2f2ac6c70acd0ce41410690b683b.xml"}}]); \ No newline at end of file diff --git a/assets/js/d0cd84a1.f023e02d.js b/assets/js/d0cd84a1.f023e02d.js new file mode 100644 index 00000000..49c1f53d --- /dev/null +++ b/assets/js/d0cd84a1.f023e02d.js @@ -0,0 +1,2 @@ +/*! For license information please see d0cd84a1.f023e02d.js.LICENSE.txt */ +(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8955],{7331:e=>{function t(){this._events=this._events||{},this._maxListeners=this._maxListeners||void 0}function r(e){return"function"==typeof e}function n(e){return"object"==typeof e&&null!==e}function i(e){return void 0===e}e.exports=t,t.prototype._events=void 0,t.prototype._maxListeners=void 0,t.defaultMaxListeners=10,t.prototype.setMaxListeners=function(e){if("number"!=typeof e||e<0||isNaN(e))throw TypeError("n must be a positive number");return this._maxListeners=e,this},t.prototype.emit=function(e){var t,a,s,c,u,o;if(this._events||(this._events={}),"error"===e&&(!this._events.error||n(this._events.error)&&!this._events.error.length)){if((t=arguments[1])instanceof Error)throw t;var h=new Error('Uncaught, unspecified "error" event. ('+t+")");throw h.context=t,h}if(i(a=this._events[e]))return!1;if(r(a))switch(arguments.length){case 1:a.call(this);break;case 2:a.call(this,arguments[1]);break;case 3:a.call(this,arguments[1],arguments[2]);break;default:c=Array.prototype.slice.call(arguments,1),a.apply(this,c)}else if(n(a))for(c=Array.prototype.slice.call(arguments,1),s=(o=a.slice()).length,u=0;u0&&this._events[e].length>s&&(this._events[e].warned=!0,console.error("(node) warning: possible EventEmitter memory leak detected. %d listeners added. Use emitter.setMaxListeners() to increase limit.",this._events[e].length),"function"==typeof console.trace&&console.trace()),this},t.prototype.on=t.prototype.addListener,t.prototype.once=function(e,t){if(!r(t))throw TypeError("listener must be a function");var n=!1;function i(){this.removeListener(e,i),n||(n=!0,t.apply(this,arguments))}return i.listener=t,this.on(e,i),this},t.prototype.removeListener=function(e,t){var i,a,s,c;if(!r(t))throw TypeError("listener must be a function");if(!this._events||!this._events[e])return this;if(s=(i=this._events[e]).length,a=-1,i===t||r(i.listener)&&i.listener===t)delete this._events[e],this._events.removeListener&&this.emit("removeListener",e,t);else if(n(i)){for(c=s;c-- >0;)if(i[c]===t||i[c].listener&&i[c].listener===t){a=c;break}if(a<0)return this;1===i.length?(i.length=0,delete this._events[e]):i.splice(a,1),this._events.removeListener&&this.emit("removeListener",e,t)}return this},t.prototype.removeAllListeners=function(e){var t,n;if(!this._events)return this;if(!this._events.removeListener)return 0===arguments.length?this._events={}:this._events[e]&&delete this._events[e],this;if(0===arguments.length){for(t in this._events)"removeListener"!==t&&this.removeAllListeners(t);return this.removeAllListeners("removeListener"),this._events={},this}if(r(n=this._events[e]))this.removeListener(e,n);else if(n)for(;n.length;)this.removeListener(e,n[n.length-1]);return delete this._events[e],this},t.prototype.listeners=function(e){return this._events&&this._events[e]?r(this._events[e])?[this._events[e]]:this._events[e].slice():[]},t.prototype.listenerCount=function(e){if(this._events){var t=this._events[e];if(r(t))return 1;if(t)return t.length}return 0},t.listenerCount=function(e,t){return e.listenerCount(t)}},8131:(e,t,r)=>{"use strict";var n=r(9374),i=r(7775),a=r(3076);function s(e,t,r){return new n(e,t,r)}s.version=r(4336),s.AlgoliaSearchHelper=n,s.SearchParameters=i,s.SearchResults=a,e.exports=s},8078:(e,t,r)=>{"use strict";var n=r(7331);function i(e,t){this.main=e,this.fn=t,this.lastResults=null}r(4853)(i,n),i.prototype.detach=function(){this.removeAllListeners(),this.main.detachDerivedHelper(this)},i.prototype.getModifiedState=function(e){return this.fn(e)},e.exports=i},2437:(e,t,r)=>{"use strict";var n=r(2344),i=r(116),a=r(9803),s={addRefinement:function(e,t,r){if(s.isRefined(e,t,r))return e;var i=""+r,a=e[t]?e[t].concat(i):[i],c={};return c[t]=a,n({},c,e)},removeRefinement:function(e,t,r){if(void 0===r)return s.clearRefinement(e,(function(e,r){return t===r}));var n=""+r;return s.clearRefinement(e,(function(e,r){return t===r&&n===e}))},toggleRefinement:function(e,t,r){if(void 0===r)throw new Error("toggleRefinement should be used with a value");return s.isRefined(e,t,r)?s.removeRefinement(e,t,r):s.addRefinement(e,t,r)},clearRefinement:function(e,t,r){if(void 0===t)return i(e)?{}:e;if("string"==typeof t)return a(e,[t]);if("function"==typeof t){var n=!1,s=Object.keys(e).reduce((function(i,a){var s=e[a]||[],c=s.filter((function(e){return!t(e,a,r)}));return c.length!==s.length&&(n=!0),i[a]=c,i}),{});return n?s:e}},isRefined:function(e,t,r){var n=Boolean(e[t])&&e[t].length>0;if(void 0===r||!n)return n;var i=""+r;return-1!==e[t].indexOf(i)}};e.exports=s},7775:(e,t,r)=>{"use strict";var n=r(2344),i=r(7888),a=r(2686),s=r(185),c=r(116),u=r(9803),o=r(8023),h=r(6801),f=r(2437);function l(e,t){return Array.isArray(e)&&Array.isArray(t)?e.length===t.length&&e.every((function(e,r){return l(t[r],e)})):e===t}function m(e){var t=e?m._parseNumbers(e):{};void 0===t.userToken||h(t.userToken)||console.warn("[algoliasearch-helper] The `userToken` parameter is invalid. This can lead to wrong analytics.\n - Format: [a-zA-Z0-9_-]{1,64}"),this.facets=t.facets||[],this.disjunctiveFacets=t.disjunctiveFacets||[],this.hierarchicalFacets=t.hierarchicalFacets||[],this.facetsRefinements=t.facetsRefinements||{},this.facetsExcludes=t.facetsExcludes||{},this.disjunctiveFacetsRefinements=t.disjunctiveFacetsRefinements||{},this.numericRefinements=t.numericRefinements||{},this.tagRefinements=t.tagRefinements||[],this.hierarchicalFacetsRefinements=t.hierarchicalFacetsRefinements||{};var r=this;Object.keys(t).forEach((function(e){var n=-1!==m.PARAMETERS.indexOf(e),i=void 0!==t[e];!n&&i&&(r[e]=t[e])}))}m.PARAMETERS=Object.keys(new m),m._parseNumbers=function(e){if(e instanceof m)return e;var t={};if(["aroundPrecision","aroundRadius","getRankingInfo","minWordSizefor2Typos","minWordSizefor1Typo","page","maxValuesPerFacet","distinct","minimumAroundRadius","hitsPerPage","minProximity"].forEach((function(r){var n=e[r];if("string"==typeof n){var i=parseFloat(n);t[r]=isNaN(i)?n:i}})),Array.isArray(e.insideBoundingBox)&&(t.insideBoundingBox=e.insideBoundingBox.map((function(e){return Array.isArray(e)?e.map((function(e){return parseFloat(e)})):e}))),e.numericRefinements){var r={};Object.keys(e.numericRefinements).forEach((function(t){var n=e.numericRefinements[t]||{};r[t]={},Object.keys(n).forEach((function(e){var i=n[e].map((function(e){return Array.isArray(e)?e.map((function(e){return"string"==typeof e?parseFloat(e):e})):"string"==typeof e?parseFloat(e):e}));r[t][e]=i}))})),t.numericRefinements=r}return s({},e,t)},m.make=function(e){var t=new m(e);return(e.hierarchicalFacets||[]).forEach((function(e){if(e.rootPath){var r=t.getHierarchicalRefinement(e.name);r.length>0&&0!==r[0].indexOf(e.rootPath)&&(t=t.clearRefinements(e.name)),0===(r=t.getHierarchicalRefinement(e.name)).length&&(t=t.toggleHierarchicalFacetRefinement(e.name,e.rootPath))}})),t},m.validate=function(e,t){var r=t||{};return e.tagFilters&&r.tagRefinements&&r.tagRefinements.length>0?new Error("[Tags] Cannot switch from the managed tag API to the advanced API. It is probably an error, if it is really what you want, you should first clear the tags with clearTags method."):e.tagRefinements.length>0&&r.tagFilters?new Error("[Tags] Cannot switch from the advanced tag API to the managed API. It is probably an error, if it is not, you should first clear the tags with clearTags method."):e.numericFilters&&r.numericRefinements&&c(r.numericRefinements)?new Error("[Numeric filters] Can't switch from the advanced to the managed API. It is probably an error, if this is really what you want, you have to first clear the numeric filters."):c(e.numericRefinements)&&r.numericFilters?new Error("[Numeric filters] Can't switch from the managed API to the advanced. It is probably an error, if this is really what you want, you have to first clear the numeric filters."):null},m.prototype={constructor:m,clearRefinements:function(e){var t={numericRefinements:this._clearNumericRefinements(e),facetsRefinements:f.clearRefinement(this.facetsRefinements,e,"conjunctiveFacet"),facetsExcludes:f.clearRefinement(this.facetsExcludes,e,"exclude"),disjunctiveFacetsRefinements:f.clearRefinement(this.disjunctiveFacetsRefinements,e,"disjunctiveFacet"),hierarchicalFacetsRefinements:f.clearRefinement(this.hierarchicalFacetsRefinements,e,"hierarchicalFacet")};return t.numericRefinements===this.numericRefinements&&t.facetsRefinements===this.facetsRefinements&&t.facetsExcludes===this.facetsExcludes&&t.disjunctiveFacetsRefinements===this.disjunctiveFacetsRefinements&&t.hierarchicalFacetsRefinements===this.hierarchicalFacetsRefinements?this:this.setQueryParameters(t)},clearTags:function(){return void 0===this.tagFilters&&0===this.tagRefinements.length?this:this.setQueryParameters({tagFilters:void 0,tagRefinements:[]})},setIndex:function(e){return e===this.index?this:this.setQueryParameters({index:e})},setQuery:function(e){return e===this.query?this:this.setQueryParameters({query:e})},setPage:function(e){return e===this.page?this:this.setQueryParameters({page:e})},setFacets:function(e){return this.setQueryParameters({facets:e})},setDisjunctiveFacets:function(e){return this.setQueryParameters({disjunctiveFacets:e})},setHitsPerPage:function(e){return this.hitsPerPage===e?this:this.setQueryParameters({hitsPerPage:e})},setTypoTolerance:function(e){return this.typoTolerance===e?this:this.setQueryParameters({typoTolerance:e})},addNumericRefinement:function(e,t,r){var n=o(r);if(this.isNumericRefined(e,t,n))return this;var i=s({},this.numericRefinements);return i[e]=s({},i[e]),i[e][t]?(i[e][t]=i[e][t].slice(),i[e][t].push(n)):i[e][t]=[n],this.setQueryParameters({numericRefinements:i})},getConjunctiveRefinements:function(e){return this.isConjunctiveFacet(e)&&this.facetsRefinements[e]||[]},getDisjunctiveRefinements:function(e){return this.isDisjunctiveFacet(e)&&this.disjunctiveFacetsRefinements[e]||[]},getHierarchicalRefinement:function(e){return this.hierarchicalFacetsRefinements[e]||[]},getExcludeRefinements:function(e){return this.isConjunctiveFacet(e)&&this.facetsExcludes[e]||[]},removeNumericRefinement:function(e,t,r){var n=r;return void 0!==n?this.isNumericRefined(e,t,n)?this.setQueryParameters({numericRefinements:this._clearNumericRefinements((function(r,i){return i===e&&r.op===t&&l(r.val,o(n))}))}):this:void 0!==t?this.isNumericRefined(e,t)?this.setQueryParameters({numericRefinements:this._clearNumericRefinements((function(r,n){return n===e&&r.op===t}))}):this:this.isNumericRefined(e)?this.setQueryParameters({numericRefinements:this._clearNumericRefinements((function(t,r){return r===e}))}):this},getNumericRefinements:function(e){return this.numericRefinements[e]||{}},getNumericRefinement:function(e,t){return this.numericRefinements[e]&&this.numericRefinements[e][t]},_clearNumericRefinements:function(e){if(void 0===e)return c(this.numericRefinements)?{}:this.numericRefinements;if("string"==typeof e)return u(this.numericRefinements,[e]);if("function"==typeof e){var t=!1,r=this.numericRefinements,n=Object.keys(r).reduce((function(n,i){var a=r[i],s={};return a=a||{},Object.keys(a).forEach((function(r){var n=a[r]||[],c=[];n.forEach((function(t){e({val:t,op:r},i,"numeric")||c.push(t)})),c.length!==n.length&&(t=!0),s[r]=c})),n[i]=s,n}),{});return t?n:this.numericRefinements}},addFacet:function(e){return this.isConjunctiveFacet(e)?this:this.setQueryParameters({facets:this.facets.concat([e])})},addDisjunctiveFacet:function(e){return this.isDisjunctiveFacet(e)?this:this.setQueryParameters({disjunctiveFacets:this.disjunctiveFacets.concat([e])})},addHierarchicalFacet:function(e){if(this.isHierarchicalFacet(e.name))throw new Error("Cannot declare two hierarchical facets with the same name: `"+e.name+"`");return this.setQueryParameters({hierarchicalFacets:this.hierarchicalFacets.concat([e])})},addFacetRefinement:function(e,t){if(!this.isConjunctiveFacet(e))throw new Error(e+" is not defined in the facets attribute of the helper configuration");return f.isRefined(this.facetsRefinements,e,t)?this:this.setQueryParameters({facetsRefinements:f.addRefinement(this.facetsRefinements,e,t)})},addExcludeRefinement:function(e,t){if(!this.isConjunctiveFacet(e))throw new Error(e+" is not defined in the facets attribute of the helper configuration");return f.isRefined(this.facetsExcludes,e,t)?this:this.setQueryParameters({facetsExcludes:f.addRefinement(this.facetsExcludes,e,t)})},addDisjunctiveFacetRefinement:function(e,t){if(!this.isDisjunctiveFacet(e))throw new Error(e+" is not defined in the disjunctiveFacets attribute of the helper configuration");return f.isRefined(this.disjunctiveFacetsRefinements,e,t)?this:this.setQueryParameters({disjunctiveFacetsRefinements:f.addRefinement(this.disjunctiveFacetsRefinements,e,t)})},addTagRefinement:function(e){if(this.isTagRefined(e))return this;var t={tagRefinements:this.tagRefinements.concat(e)};return this.setQueryParameters(t)},removeFacet:function(e){return this.isConjunctiveFacet(e)?this.clearRefinements(e).setQueryParameters({facets:this.facets.filter((function(t){return t!==e}))}):this},removeDisjunctiveFacet:function(e){return this.isDisjunctiveFacet(e)?this.clearRefinements(e).setQueryParameters({disjunctiveFacets:this.disjunctiveFacets.filter((function(t){return t!==e}))}):this},removeHierarchicalFacet:function(e){return this.isHierarchicalFacet(e)?this.clearRefinements(e).setQueryParameters({hierarchicalFacets:this.hierarchicalFacets.filter((function(t){return t.name!==e}))}):this},removeFacetRefinement:function(e,t){if(!this.isConjunctiveFacet(e))throw new Error(e+" is not defined in the facets attribute of the helper configuration");return f.isRefined(this.facetsRefinements,e,t)?this.setQueryParameters({facetsRefinements:f.removeRefinement(this.facetsRefinements,e,t)}):this},removeExcludeRefinement:function(e,t){if(!this.isConjunctiveFacet(e))throw new Error(e+" is not defined in the facets attribute of the helper configuration");return f.isRefined(this.facetsExcludes,e,t)?this.setQueryParameters({facetsExcludes:f.removeRefinement(this.facetsExcludes,e,t)}):this},removeDisjunctiveFacetRefinement:function(e,t){if(!this.isDisjunctiveFacet(e))throw new Error(e+" is not defined in the disjunctiveFacets attribute of the helper configuration");return f.isRefined(this.disjunctiveFacetsRefinements,e,t)?this.setQueryParameters({disjunctiveFacetsRefinements:f.removeRefinement(this.disjunctiveFacetsRefinements,e,t)}):this},removeTagRefinement:function(e){if(!this.isTagRefined(e))return this;var t={tagRefinements:this.tagRefinements.filter((function(t){return t!==e}))};return this.setQueryParameters(t)},toggleRefinement:function(e,t){return this.toggleFacetRefinement(e,t)},toggleFacetRefinement:function(e,t){if(this.isHierarchicalFacet(e))return this.toggleHierarchicalFacetRefinement(e,t);if(this.isConjunctiveFacet(e))return this.toggleConjunctiveFacetRefinement(e,t);if(this.isDisjunctiveFacet(e))return this.toggleDisjunctiveFacetRefinement(e,t);throw new Error("Cannot refine the undeclared facet "+e+"; it should be added to the helper options facets, disjunctiveFacets or hierarchicalFacets")},toggleConjunctiveFacetRefinement:function(e,t){if(!this.isConjunctiveFacet(e))throw new Error(e+" is not defined in the facets attribute of the helper configuration");return this.setQueryParameters({facetsRefinements:f.toggleRefinement(this.facetsRefinements,e,t)})},toggleExcludeFacetRefinement:function(e,t){if(!this.isConjunctiveFacet(e))throw new Error(e+" is not defined in the facets attribute of the helper configuration");return this.setQueryParameters({facetsExcludes:f.toggleRefinement(this.facetsExcludes,e,t)})},toggleDisjunctiveFacetRefinement:function(e,t){if(!this.isDisjunctiveFacet(e))throw new Error(e+" is not defined in the disjunctiveFacets attribute of the helper configuration");return this.setQueryParameters({disjunctiveFacetsRefinements:f.toggleRefinement(this.disjunctiveFacetsRefinements,e,t)})},toggleHierarchicalFacetRefinement:function(e,t){if(!this.isHierarchicalFacet(e))throw new Error(e+" is not defined in the hierarchicalFacets attribute of the helper configuration");var r=this._getHierarchicalFacetSeparator(this.getHierarchicalFacetByName(e)),i={};return void 0!==this.hierarchicalFacetsRefinements[e]&&this.hierarchicalFacetsRefinements[e].length>0&&(this.hierarchicalFacetsRefinements[e][0]===t||0===this.hierarchicalFacetsRefinements[e][0].indexOf(t+r))?-1===t.indexOf(r)?i[e]=[]:i[e]=[t.slice(0,t.lastIndexOf(r))]:i[e]=[t],this.setQueryParameters({hierarchicalFacetsRefinements:n({},i,this.hierarchicalFacetsRefinements)})},addHierarchicalFacetRefinement:function(e,t){if(this.isHierarchicalFacetRefined(e))throw new Error(e+" is already refined.");if(!this.isHierarchicalFacet(e))throw new Error(e+" is not defined in the hierarchicalFacets attribute of the helper configuration.");var r={};return r[e]=[t],this.setQueryParameters({hierarchicalFacetsRefinements:n({},r,this.hierarchicalFacetsRefinements)})},removeHierarchicalFacetRefinement:function(e){if(!this.isHierarchicalFacetRefined(e))return this;var t={};return t[e]=[],this.setQueryParameters({hierarchicalFacetsRefinements:n({},t,this.hierarchicalFacetsRefinements)})},toggleTagRefinement:function(e){return this.isTagRefined(e)?this.removeTagRefinement(e):this.addTagRefinement(e)},isDisjunctiveFacet:function(e){return this.disjunctiveFacets.indexOf(e)>-1},isHierarchicalFacet:function(e){return void 0!==this.getHierarchicalFacetByName(e)},isConjunctiveFacet:function(e){return this.facets.indexOf(e)>-1},isFacetRefined:function(e,t){return!!this.isConjunctiveFacet(e)&&f.isRefined(this.facetsRefinements,e,t)},isExcludeRefined:function(e,t){return!!this.isConjunctiveFacet(e)&&f.isRefined(this.facetsExcludes,e,t)},isDisjunctiveFacetRefined:function(e,t){return!!this.isDisjunctiveFacet(e)&&f.isRefined(this.disjunctiveFacetsRefinements,e,t)},isHierarchicalFacetRefined:function(e,t){if(!this.isHierarchicalFacet(e))return!1;var r=this.getHierarchicalRefinement(e);return t?-1!==r.indexOf(t):r.length>0},isNumericRefined:function(e,t,r){if(void 0===r&&void 0===t)return Boolean(this.numericRefinements[e]);var n=this.numericRefinements[e]&&void 0!==this.numericRefinements[e][t];if(void 0===r||!n)return n;var a,s,c=o(r),u=void 0!==(a=this.numericRefinements[e][t],s=c,i(a,(function(e){return l(e,s)})));return n&&u},isTagRefined:function(e){return-1!==this.tagRefinements.indexOf(e)},getRefinedDisjunctiveFacets:function(){var e=this,t=a(Object.keys(this.numericRefinements).filter((function(t){return Object.keys(e.numericRefinements[t]).length>0})),this.disjunctiveFacets);return Object.keys(this.disjunctiveFacetsRefinements).filter((function(t){return e.disjunctiveFacetsRefinements[t].length>0})).concat(t).concat(this.getRefinedHierarchicalFacets()).sort()},getRefinedHierarchicalFacets:function(){var e=this;return a(this.hierarchicalFacets.map((function(e){return e.name})),Object.keys(this.hierarchicalFacetsRefinements).filter((function(t){return e.hierarchicalFacetsRefinements[t].length>0}))).sort()},getUnrefinedDisjunctiveFacets:function(){var e=this.getRefinedDisjunctiveFacets();return this.disjunctiveFacets.filter((function(t){return-1===e.indexOf(t)}))},managedParameters:["index","facets","disjunctiveFacets","facetsRefinements","hierarchicalFacets","facetsExcludes","disjunctiveFacetsRefinements","numericRefinements","tagRefinements","hierarchicalFacetsRefinements"],getQueryParams:function(){var e=this.managedParameters,t={},r=this;return Object.keys(this).forEach((function(n){var i=r[n];-1===e.indexOf(n)&&void 0!==i&&(t[n]=i)})),t},setQueryParameter:function(e,t){if(this[e]===t)return this;var r={};return r[e]=t,this.setQueryParameters(r)},setQueryParameters:function(e){if(!e)return this;var t=m.validate(this,e);if(t)throw t;var r=this,n=m._parseNumbers(e),i=Object.keys(this).reduce((function(e,t){return e[t]=r[t],e}),{}),a=Object.keys(n).reduce((function(e,t){var r=void 0!==e[t],i=void 0!==n[t];return r&&!i?u(e,[t]):(i&&(e[t]=n[t]),e)}),i);return new this.constructor(a)},resetPage:function(){return void 0===this.page?this:this.setPage(0)},_getHierarchicalFacetSortBy:function(e){return e.sortBy||["isRefined:desc","name:asc"]},_getHierarchicalFacetSeparator:function(e){return e.separator||" > "},_getHierarchicalRootPath:function(e){return e.rootPath||null},_getHierarchicalShowParentLevel:function(e){return"boolean"!=typeof e.showParentLevel||e.showParentLevel},getHierarchicalFacetByName:function(e){return i(this.hierarchicalFacets,(function(t){return t.name===e}))},getHierarchicalFacetBreadcrumb:function(e){if(!this.isHierarchicalFacet(e))return[];var t=this.getHierarchicalRefinement(e)[0];if(!t)return[];var r=this._getHierarchicalFacetSeparator(this.getHierarchicalFacetByName(e));return t.split(r).map((function(e){return e.trim()}))},toString:function(){return JSON.stringify(this,null,2)}},e.exports=m},210:(e,t,r)=>{"use strict";e.exports=function(e){return function(t,r){var n=e.hierarchicalFacets[r],o=e.hierarchicalFacetsRefinements[n.name]&&e.hierarchicalFacetsRefinements[n.name][0]||"",h=e._getHierarchicalFacetSeparator(n),f=e._getHierarchicalRootPath(n),l=e._getHierarchicalShowParentLevel(n),m=a(e._getHierarchicalFacetSortBy(n)),d=t.every((function(e){return e.exhaustive})),p=function(e,t,r,n,a){return function(o,h,f){var l=o;if(f>0){var m=0;for(l=o;m{"use strict";var n=r(4587),i=r(2344),a=r(4039),s=r(7888),c=r(9725),u=r(2293),o=r(185),h=r(2148),f=a.escapeFacetValue,l=a.unescapeFacetValue,m=r(210);function d(e){var t={};return e.forEach((function(e,r){t[e]=r})),t}function p(e,t,r){t&&t[r]&&(e.stats=t[r])}function v(e,t,r){var a=t[0];this._rawResults=t;var u=this;Object.keys(a).forEach((function(e){u[e]=a[e]})),Object.keys(r||{}).forEach((function(e){u[e]=r[e]})),this.processingTimeMS=t.reduce((function(e,t){return void 0===t.processingTimeMS?e:e+t.processingTimeMS}),0),this.disjunctiveFacets=[],this.hierarchicalFacets=e.hierarchicalFacets.map((function(){return[]})),this.facets=[];var h=e.getRefinedDisjunctiveFacets(),f=d(e.facets),v=d(e.disjunctiveFacets),g=1,y=a.facets||{};Object.keys(y).forEach((function(t){var r,n,i=y[t],o=(r=e.hierarchicalFacets,n=t,s(r,(function(e){return(e.attributes||[]).indexOf(n)>-1})));if(o){var h=o.attributes.indexOf(t),l=c(e.hierarchicalFacets,(function(e){return e.name===o.name}));u.hierarchicalFacets[l][h]={attribute:t,data:i,exhaustive:a.exhaustiveFacetsCount}}else{var m,d=-1!==e.disjunctiveFacets.indexOf(t),g=-1!==e.facets.indexOf(t);d&&(m=v[t],u.disjunctiveFacets[m]={name:t,data:i,exhaustive:a.exhaustiveFacetsCount},p(u.disjunctiveFacets[m],a.facets_stats,t)),g&&(m=f[t],u.facets[m]={name:t,data:i,exhaustive:a.exhaustiveFacetsCount},p(u.facets[m],a.facets_stats,t))}})),this.hierarchicalFacets=n(this.hierarchicalFacets),h.forEach((function(r){var n=t[g],s=n&&n.facets?n.facets:{},h=e.getHierarchicalFacetByName(r);Object.keys(s).forEach((function(t){var r,f=s[t];if(h){r=c(e.hierarchicalFacets,(function(e){return e.name===h.name}));var m=c(u.hierarchicalFacets[r],(function(e){return e.attribute===t}));if(-1===m)return;u.hierarchicalFacets[r][m].data=o({},u.hierarchicalFacets[r][m].data,f)}else{r=v[t];var d=a.facets&&a.facets[t]||{};u.disjunctiveFacets[r]={name:t,data:i({},f,d),exhaustive:n.exhaustiveFacetsCount},p(u.disjunctiveFacets[r],n.facets_stats,t),e.disjunctiveFacetsRefinements[t]&&e.disjunctiveFacetsRefinements[t].forEach((function(n){!u.disjunctiveFacets[r].data[n]&&e.disjunctiveFacetsRefinements[t].indexOf(l(n))>-1&&(u.disjunctiveFacets[r].data[n]=0)}))}})),g++})),e.getRefinedHierarchicalFacets().forEach((function(r){var n=e.getHierarchicalFacetByName(r),a=e._getHierarchicalFacetSeparator(n),s=e.getHierarchicalRefinement(r);0===s.length||s[0].split(a).length<2||t.slice(g).forEach((function(t){var r=t&&t.facets?t.facets:{};Object.keys(r).forEach((function(t){var o=r[t],h=c(e.hierarchicalFacets,(function(e){return e.name===n.name})),f=c(u.hierarchicalFacets[h],(function(e){return e.attribute===t}));if(-1!==f){var l={};if(s.length>0){var m=s[0].split(a)[0];l[m]=u.hierarchicalFacets[h][f].data[m]}u.hierarchicalFacets[h][f].data=i(l,o,u.hierarchicalFacets[h][f].data)}})),g++}))})),Object.keys(e.facetsExcludes).forEach((function(t){var r=e.facetsExcludes[t],n=f[t];u.facets[n]={name:t,data:y[t],exhaustive:a.exhaustiveFacetsCount},r.forEach((function(e){u.facets[n]=u.facets[n]||{name:t},u.facets[n].data=u.facets[n].data||{},u.facets[n].data[e]=0}))})),this.hierarchicalFacets=this.hierarchicalFacets.map(m(e)),this.facets=n(this.facets),this.disjunctiveFacets=n(this.disjunctiveFacets),this._state=e}function g(e,t){function r(e){return e.name===t}if(e._state.isConjunctiveFacet(t)){var n=s(e.facets,r);return n?Object.keys(n.data).map((function(r){var i=f(r);return{name:r,escapedValue:i,count:n.data[r],isRefined:e._state.isFacetRefined(t,i),isExcluded:e._state.isExcludeRefined(t,r)}})):[]}if(e._state.isDisjunctiveFacet(t)){var i=s(e.disjunctiveFacets,r);return i?Object.keys(i.data).map((function(r){var n=f(r);return{name:r,escapedValue:n,count:i.data[r],isRefined:e._state.isDisjunctiveFacetRefined(t,n)}})):[]}if(e._state.isHierarchicalFacet(t)){var a=s(e.hierarchicalFacets,r);if(!a)return a;var c=e._state.getHierarchicalFacetByName(t),u=e._state._getHierarchicalFacetSeparator(c),o=l(e._state.getHierarchicalRefinement(t)[0]||"");0===o.indexOf(c.rootPath)&&(o=o.replace(c.rootPath+u,""));var h=o.split(u);return h.unshift(t),y(a,h,0),a}}function y(e,t,r){e.isRefined=e.name===t[r],e.data&&e.data.forEach((function(e){y(e,t,r+1)}))}function R(e,t,r,n){if(n=n||0,Array.isArray(t))return e(t,r[n]);if(!t.data||0===t.data.length)return t;var a=t.data.map((function(t){return R(e,t,r,n+1)})),s=e(a,r[n]);return i({data:s},t)}function F(e,t){var r=s(e,(function(e){return e.name===t}));return r&&r.stats}function b(e,t,r,n,i){var a=s(i,(function(e){return e.name===r})),c=a&&a.data&&a.data[n]?a.data[n]:0,u=a&&a.exhaustive||!1;return{type:t,attributeName:r,name:n,count:c,exhaustive:u}}v.prototype.getFacetByName=function(e){function t(t){return t.name===e}return s(this.facets,t)||s(this.disjunctiveFacets,t)||s(this.hierarchicalFacets,t)},v.DEFAULT_SORT=["isRefined:desc","count:desc","name:asc"],v.prototype.getFacetValues=function(e,t){var r=g(this,e);if(r){var n,a=i({},t,{sortBy:v.DEFAULT_SORT,facetOrdering:!(t&&t.sortBy)}),s=this;if(Array.isArray(r))n=[e];else n=s._state.getHierarchicalFacetByName(r.name).attributes;return R((function(e,t){if(a.facetOrdering){var r=function(e,t){return e.renderingContent&&e.renderingContent.facetOrdering&&e.renderingContent.facetOrdering.values&&e.renderingContent.facetOrdering.values[t]}(s,t);if(r)return function(e,t){var r=[],n=[],i=(t.order||[]).reduce((function(e,t,r){return e[t]=r,e}),{});e.forEach((function(e){var t=e.path||e.name;void 0!==i[t]?r[i[t]]=e:n.push(e)})),r=r.filter((function(e){return e}));var a,s=t.sortRemainingBy;return"hidden"===s?r:(a="alpha"===s?[["path","name"],["asc","asc"]]:[["count"],["desc"]],r.concat(h(n,a[0],a[1])))}(e,r)}if(Array.isArray(a.sortBy)){var n=u(a.sortBy,v.DEFAULT_SORT);return h(e,n[0],n[1])}if("function"==typeof a.sortBy)return function(e,t){return t.sort(e)}(a.sortBy,e);throw new Error("options.sortBy is optional but if defined it must be either an array of string (predicates) or a sorting function")}),r,n)}},v.prototype.getFacetStats=function(e){return this._state.isConjunctiveFacet(e)?F(this.facets,e):this._state.isDisjunctiveFacet(e)?F(this.disjunctiveFacets,e):void 0},v.prototype.getRefinements=function(){var e=this._state,t=this,r=[];return Object.keys(e.facetsRefinements).forEach((function(n){e.facetsRefinements[n].forEach((function(i){r.push(b(e,"facet",n,i,t.facets))}))})),Object.keys(e.facetsExcludes).forEach((function(n){e.facetsExcludes[n].forEach((function(i){r.push(b(e,"exclude",n,i,t.facets))}))})),Object.keys(e.disjunctiveFacetsRefinements).forEach((function(n){e.disjunctiveFacetsRefinements[n].forEach((function(i){r.push(b(e,"disjunctive",n,i,t.disjunctiveFacets))}))})),Object.keys(e.hierarchicalFacetsRefinements).forEach((function(n){e.hierarchicalFacetsRefinements[n].forEach((function(i){r.push(function(e,t,r,n){var i=e.getHierarchicalFacetByName(t),a=e._getHierarchicalFacetSeparator(i),c=r.split(a),u=s(n,(function(e){return e.name===t})),o=c.reduce((function(e,t){var r=e&&s(e.data,(function(e){return e.name===t}));return void 0!==r?r:e}),u),h=o&&o.count||0,f=o&&o.exhaustive||!1,l=o&&o.path||"";return{type:"hierarchical",attributeName:t,name:l,count:h,exhaustive:f}}(e,n,i,t.hierarchicalFacets))}))})),Object.keys(e.numericRefinements).forEach((function(t){var n=e.numericRefinements[t];Object.keys(n).forEach((function(e){n[e].forEach((function(n){r.push({type:"numeric",attributeName:t,name:n,numericValue:n,operator:e})}))}))})),e.tagRefinements.forEach((function(e){r.push({type:"tag",attributeName:"_tags",name:e})})),r},e.exports=v},9374:(e,t,r)=>{"use strict";var n=r(7331),i=r(8078),a=r(4039).escapeFacetValue,s=r(4853),c=r(185),u=r(116),o=r(9803),h=r(6394),f=r(7775),l=r(3076),m=r(4336);function d(e,t,r){"function"==typeof e.addAlgoliaAgent&&e.addAlgoliaAgent("JS Helper ("+m+")"),this.setClient(e);var n=r||{};n.index=t,this.state=f.make(n),this.lastResults=null,this._queryId=0,this._lastQueryIdReceived=-1,this.derivedHelpers=[],this._currentNbQueries=0}function p(e){if(e<0)throw new Error("Page requested below 0.");return this._change({state:this.state.setPage(e),isPageReset:!1}),this}function v(){return this.state.page}s(d,n),d.prototype.search=function(){return this._search({onlyWithDerivedHelpers:!1}),this},d.prototype.searchOnlyWithDerivedHelpers=function(){return this._search({onlyWithDerivedHelpers:!0}),this},d.prototype.getQuery=function(){var e=this.state;return h._getHitsSearchParams(e)},d.prototype.searchOnce=function(e,t){var r=e?this.state.setQueryParameters(e):this.state,n=h._getQueries(r.index,r),i=this;if(this._currentNbQueries++,this.emit("searchOnce",{state:r}),!t)return this.client.search(n).then((function(e){return i._currentNbQueries--,0===i._currentNbQueries&&i.emit("searchQueueEmpty"),{content:new l(r,e.results),state:r,_originalResponse:e}}),(function(e){throw i._currentNbQueries--,0===i._currentNbQueries&&i.emit("searchQueueEmpty"),e}));this.client.search(n).then((function(e){i._currentNbQueries--,0===i._currentNbQueries&&i.emit("searchQueueEmpty"),t(null,new l(r,e.results),r)})).catch((function(e){i._currentNbQueries--,0===i._currentNbQueries&&i.emit("searchQueueEmpty"),t(e,null,r)}))},d.prototype.findAnswers=function(e){console.warn("[algoliasearch-helper] answers is no longer supported");var t=this.state,r=this.derivedHelpers[0];if(!r)return Promise.resolve([]);var n=r.getModifiedState(t),i=c({attributesForPrediction:e.attributesForPrediction,nbHits:e.nbHits},{params:o(h._getHitsSearchParams(n),["attributesToSnippet","hitsPerPage","restrictSearchableAttributes","snippetEllipsisText"])}),a="search for answers was called, but this client does not have a function client.initIndex(index).findAnswers";if("function"!=typeof this.client.initIndex)throw new Error(a);var s=this.client.initIndex(n.index);if("function"!=typeof s.findAnswers)throw new Error(a);return s.findAnswers(n.query,e.queryLanguages,i)},d.prototype.searchForFacetValues=function(e,t,r,n){var i="function"==typeof this.client.searchForFacetValues,s="function"==typeof this.client.initIndex;if(!i&&!s&&"function"!=typeof this.client.search)throw new Error("search for facet values (searchable) was called, but this client does not have a function client.searchForFacetValues or client.initIndex(index).searchForFacetValues");var c=this.state.setQueryParameters(n||{}),u=c.isDisjunctiveFacet(e),o=h.getSearchForFacetQuery(e,t,r,c);this._currentNbQueries++;var f,l=this;return i?f=this.client.searchForFacetValues([{indexName:c.index,params:o}]):s?f=this.client.initIndex(c.index).searchForFacetValues(o):(delete o.facetName,f=this.client.search([{type:"facet",facet:e,indexName:c.index,params:o}]).then((function(e){return e.results[0]}))),this.emit("searchForFacetValues",{state:c,facet:e,query:t}),f.then((function(t){return l._currentNbQueries--,0===l._currentNbQueries&&l.emit("searchQueueEmpty"),(t=Array.isArray(t)?t[0]:t).facetHits.forEach((function(t){t.escapedValue=a(t.value),t.isRefined=u?c.isDisjunctiveFacetRefined(e,t.escapedValue):c.isFacetRefined(e,t.escapedValue)})),t}),(function(e){throw l._currentNbQueries--,0===l._currentNbQueries&&l.emit("searchQueueEmpty"),e}))},d.prototype.setQuery=function(e){return this._change({state:this.state.resetPage().setQuery(e),isPageReset:!0}),this},d.prototype.clearRefinements=function(e){return this._change({state:this.state.resetPage().clearRefinements(e),isPageReset:!0}),this},d.prototype.clearTags=function(){return this._change({state:this.state.resetPage().clearTags(),isPageReset:!0}),this},d.prototype.addDisjunctiveFacetRefinement=function(e,t){return this._change({state:this.state.resetPage().addDisjunctiveFacetRefinement(e,t),isPageReset:!0}),this},d.prototype.addDisjunctiveRefine=function(){return this.addDisjunctiveFacetRefinement.apply(this,arguments)},d.prototype.addHierarchicalFacetRefinement=function(e,t){return this._change({state:this.state.resetPage().addHierarchicalFacetRefinement(e,t),isPageReset:!0}),this},d.prototype.addNumericRefinement=function(e,t,r){return this._change({state:this.state.resetPage().addNumericRefinement(e,t,r),isPageReset:!0}),this},d.prototype.addFacetRefinement=function(e,t){return this._change({state:this.state.resetPage().addFacetRefinement(e,t),isPageReset:!0}),this},d.prototype.addRefine=function(){return this.addFacetRefinement.apply(this,arguments)},d.prototype.addFacetExclusion=function(e,t){return this._change({state:this.state.resetPage().addExcludeRefinement(e,t),isPageReset:!0}),this},d.prototype.addExclude=function(){return this.addFacetExclusion.apply(this,arguments)},d.prototype.addTag=function(e){return this._change({state:this.state.resetPage().addTagRefinement(e),isPageReset:!0}),this},d.prototype.removeNumericRefinement=function(e,t,r){return this._change({state:this.state.resetPage().removeNumericRefinement(e,t,r),isPageReset:!0}),this},d.prototype.removeDisjunctiveFacetRefinement=function(e,t){return this._change({state:this.state.resetPage().removeDisjunctiveFacetRefinement(e,t),isPageReset:!0}),this},d.prototype.removeDisjunctiveRefine=function(){return this.removeDisjunctiveFacetRefinement.apply(this,arguments)},d.prototype.removeHierarchicalFacetRefinement=function(e){return this._change({state:this.state.resetPage().removeHierarchicalFacetRefinement(e),isPageReset:!0}),this},d.prototype.removeFacetRefinement=function(e,t){return this._change({state:this.state.resetPage().removeFacetRefinement(e,t),isPageReset:!0}),this},d.prototype.removeRefine=function(){return this.removeFacetRefinement.apply(this,arguments)},d.prototype.removeFacetExclusion=function(e,t){return this._change({state:this.state.resetPage().removeExcludeRefinement(e,t),isPageReset:!0}),this},d.prototype.removeExclude=function(){return this.removeFacetExclusion.apply(this,arguments)},d.prototype.removeTag=function(e){return this._change({state:this.state.resetPage().removeTagRefinement(e),isPageReset:!0}),this},d.prototype.toggleFacetExclusion=function(e,t){return this._change({state:this.state.resetPage().toggleExcludeFacetRefinement(e,t),isPageReset:!0}),this},d.prototype.toggleExclude=function(){return this.toggleFacetExclusion.apply(this,arguments)},d.prototype.toggleRefinement=function(e,t){return this.toggleFacetRefinement(e,t)},d.prototype.toggleFacetRefinement=function(e,t){return this._change({state:this.state.resetPage().toggleFacetRefinement(e,t),isPageReset:!0}),this},d.prototype.toggleRefine=function(){return this.toggleFacetRefinement.apply(this,arguments)},d.prototype.toggleTag=function(e){return this._change({state:this.state.resetPage().toggleTagRefinement(e),isPageReset:!0}),this},d.prototype.nextPage=function(){var e=this.state.page||0;return this.setPage(e+1)},d.prototype.previousPage=function(){var e=this.state.page||0;return this.setPage(e-1)},d.prototype.setCurrentPage=p,d.prototype.setPage=p,d.prototype.setIndex=function(e){return this._change({state:this.state.resetPage().setIndex(e),isPageReset:!0}),this},d.prototype.setQueryParameter=function(e,t){return this._change({state:this.state.resetPage().setQueryParameter(e,t),isPageReset:!0}),this},d.prototype.setState=function(e){return this._change({state:f.make(e),isPageReset:!1}),this},d.prototype.overrideStateWithoutTriggeringChangeEvent=function(e){return this.state=new f(e),this},d.prototype.hasRefinements=function(e){return!!u(this.state.getNumericRefinements(e))||(this.state.isConjunctiveFacet(e)?this.state.isFacetRefined(e):this.state.isDisjunctiveFacet(e)?this.state.isDisjunctiveFacetRefined(e):!!this.state.isHierarchicalFacet(e)&&this.state.isHierarchicalFacetRefined(e))},d.prototype.isExcluded=function(e,t){return this.state.isExcludeRefined(e,t)},d.prototype.isDisjunctiveRefined=function(e,t){return this.state.isDisjunctiveFacetRefined(e,t)},d.prototype.hasTag=function(e){return this.state.isTagRefined(e)},d.prototype.isTagRefined=function(){return this.hasTagRefinements.apply(this,arguments)},d.prototype.getIndex=function(){return this.state.index},d.prototype.getCurrentPage=v,d.prototype.getPage=v,d.prototype.getTags=function(){return this.state.tagRefinements},d.prototype.getRefinements=function(e){var t=[];if(this.state.isConjunctiveFacet(e))this.state.getConjunctiveRefinements(e).forEach((function(e){t.push({value:e,type:"conjunctive"})})),this.state.getExcludeRefinements(e).forEach((function(e){t.push({value:e,type:"exclude"})}));else if(this.state.isDisjunctiveFacet(e)){this.state.getDisjunctiveRefinements(e).forEach((function(e){t.push({value:e,type:"disjunctive"})}))}var r=this.state.getNumericRefinements(e);return Object.keys(r).forEach((function(e){var n=r[e];t.push({value:n,operator:e,type:"numeric"})})),t},d.prototype.getNumericRefinement=function(e,t){return this.state.getNumericRefinement(e,t)},d.prototype.getHierarchicalFacetBreadcrumb=function(e){return this.state.getHierarchicalFacetBreadcrumb(e)},d.prototype._search=function(e){var t=this.state,r=[],n=[];e.onlyWithDerivedHelpers||(n=h._getQueries(t.index,t),r.push({state:t,queriesCount:n.length,helper:this}),this.emit("search",{state:t,results:this.lastResults}));var i=this.derivedHelpers.map((function(e){var n=e.getModifiedState(t),i=n.index?h._getQueries(n.index,n):[];return r.push({state:n,queriesCount:i.length,helper:e}),e.emit("search",{state:n,results:e.lastResults}),i})),a=Array.prototype.concat.apply(n,i),s=this._queryId++;if(this._currentNbQueries++,!a.length)return Promise.resolve({results:[]}).then(this._dispatchAlgoliaResponse.bind(this,r,s));try{this.client.search(a).then(this._dispatchAlgoliaResponse.bind(this,r,s)).catch(this._dispatchAlgoliaError.bind(this,s))}catch(c){this.emit("error",{error:c})}},d.prototype._dispatchAlgoliaResponse=function(e,t,r){if(!(t0},d.prototype._change=function(e){var t=e.state,r=e.isPageReset;t!==this.state&&(this.state=t,this.emit("change",{state:this.state,results:this.lastResults,isPageReset:r}))},d.prototype.clearCache=function(){return this.client.clearCache&&this.client.clearCache(),this},d.prototype.setClient=function(e){return this.client===e||("function"==typeof e.addAlgoliaAgent&&e.addAlgoliaAgent("JS Helper ("+m+")"),this.client=e),this},d.prototype.getClient=function(){return this.client},d.prototype.derive=function(e){var t=new i(this,e);return this.derivedHelpers.push(t),t},d.prototype.detachDerivedHelper=function(e){var t=this.derivedHelpers.indexOf(e);if(-1===t)throw new Error("Derived helper already detached");this.derivedHelpers.splice(t,1)},d.prototype.hasPendingRequests=function(){return this._currentNbQueries>0},e.exports=d},4587:e=>{"use strict";e.exports=function(e){return Array.isArray(e)?e.filter(Boolean):[]}},2344:e=>{"use strict";e.exports=function(){return Array.prototype.slice.call(arguments).reduceRight((function(e,t){return Object.keys(Object(t)).forEach((function(r){void 0!==t[r]&&(void 0!==e[r]&&delete e[r],e[r]=t[r])})),e}),{})}},4039:e=>{"use strict";e.exports={escapeFacetValue:function(e){return"string"!=typeof e?e:String(e).replace(/^-/,"\\-")},unescapeFacetValue:function(e){return"string"!=typeof e?e:e.replace(/^\\-/,"-")}}},7888:e=>{"use strict";e.exports=function(e,t){if(Array.isArray(e))for(var r=0;r{"use strict";e.exports=function(e,t){if(!Array.isArray(e))return-1;for(var r=0;r{"use strict";var n=r(7888);e.exports=function(e,t){var r=(t||[]).map((function(e){return e.split(":")}));return e.reduce((function(e,t){var i=t.split(":"),a=n(r,(function(e){return e[0]===i[0]}));return i.length>1||!a?(e[0].push(i[0]),e[1].push(i[1]),e):(e[0].push(a[0]),e[1].push(a[1]),e)}),[[],[]])}},4853:e=>{"use strict";e.exports=function(e,t){e.prototype=Object.create(t.prototype,{constructor:{value:e,enumerable:!1,writable:!0,configurable:!0}})}},2686:e=>{"use strict";e.exports=function(e,t){return e.filter((function(r,n){return t.indexOf(r)>-1&&e.indexOf(r)===n}))}},185:e=>{"use strict";function t(e){return"function"==typeof e||Array.isArray(e)||"[object Object]"===Object.prototype.toString.call(e)}function r(e,n){if(e===n)return e;for(var i in n)if(Object.prototype.hasOwnProperty.call(n,i)&&"__proto__"!==i&&"constructor"!==i){var a=n[i],s=e[i];void 0!==s&&void 0===a||(t(s)&&t(a)?e[i]=r(s,a):e[i]="object"==typeof(c=a)&&null!==c?r(Array.isArray(c)?[]:{},c):c)}var c;return e}e.exports=function(e){t(e)||(e={});for(var n=1,i=arguments.length;n{"use strict";e.exports=function(e){return e&&Object.keys(e).length>0}},9803:e=>{"use strict";e.exports=function(e,t){if(null===e)return{};var r,n,i={},a=Object.keys(e);for(n=0;n=0||(i[r]=e[r]);return i}},2148:e=>{"use strict";function t(e,t){if(e!==t){var r=void 0!==e,n=null===e,i=void 0!==t,a=null===t;if(!a&&e>t||n&&i||!r)return 1;if(!n&&e=n.length?a:"desc"===n[i]?-a:a}return e.index-r.index})),i.map((function(e){return e.value}))}},8023:e=>{"use strict";e.exports=function e(t){if("number"==typeof t)return t;if("string"==typeof t)return parseFloat(t);if(Array.isArray(t))return t.map(e);throw new Error("The value should be a number, a parsable string or an array of those.")}},6394:(e,t,r)=>{"use strict";var n=r(185);function i(e){return Object.keys(e).sort().reduce((function(t,r){return t[r]=e[r],t}),{})}var a={_getQueries:function(e,t){var r=[];return r.push({indexName:e,params:a._getHitsSearchParams(t)}),t.getRefinedDisjunctiveFacets().forEach((function(n){r.push({indexName:e,params:a._getDisjunctiveFacetSearchParams(t,n)})})),t.getRefinedHierarchicalFacets().forEach((function(n){var i=t.getHierarchicalFacetByName(n),s=t.getHierarchicalRefinement(n),c=t._getHierarchicalFacetSeparator(i);if(s.length>0&&s[0].split(c).length>1){var u=s[0].split(c).slice(0,-1).reduce((function(e,t,r){return e.concat({attribute:i.attributes[r],value:0===r?t:[e[e.length-1].value,t].join(c)})}),[]);u.forEach((function(n,s){var c=a._getDisjunctiveFacetSearchParams(t,n.attribute,0===s);function o(e){return i.attributes.some((function(t){return t===e.split(":")[0]}))}var h=(c.facetFilters||[]).reduce((function(e,t){if(Array.isArray(t)){var r=t.filter((function(e){return!o(e)}));r.length>0&&e.push(r)}return"string"!=typeof t||o(t)||e.push(t),e}),[]),f=u[s-1];c.facetFilters=s>0?h.concat(f.attribute+":"+f.value):h.length>0?h:void 0,r.push({indexName:e,params:c})}))}})),r},_getHitsSearchParams:function(e){var t=e.facets.concat(e.disjunctiveFacets).concat(a._getHitsHierarchicalFacetsAttributes(e)).sort(),r=a._getFacetFilters(e),s=a._getNumericFilters(e),c=a._getTagFilters(e),u={facets:t.indexOf("*")>-1?["*"]:t,tagFilters:c};return r.length>0&&(u.facetFilters=r),s.length>0&&(u.numericFilters=s),i(n({},e.getQueryParams(),u))},_getDisjunctiveFacetSearchParams:function(e,t,r){var s=a._getFacetFilters(e,t,r),c=a._getNumericFilters(e,t),u=a._getTagFilters(e),o={hitsPerPage:0,page:0,analytics:!1,clickAnalytics:!1};u.length>0&&(o.tagFilters=u);var h=e.getHierarchicalFacetByName(t);return o.facets=h?a._getDisjunctiveHierarchicalFacetAttribute(e,h,r):t,c.length>0&&(o.numericFilters=c),s.length>0&&(o.facetFilters=s),i(n({},e.getQueryParams(),o))},_getNumericFilters:function(e,t){if(e.numericFilters)return e.numericFilters;var r=[];return Object.keys(e.numericRefinements).forEach((function(n){var i=e.numericRefinements[n]||{};Object.keys(i).forEach((function(e){var a=i[e]||[];t!==n&&a.forEach((function(t){if(Array.isArray(t)){var i=t.map((function(t){return n+e+t}));r.push(i)}else r.push(n+e+t)}))}))})),r},_getTagFilters:function(e){return e.tagFilters?e.tagFilters:e.tagRefinements.join(",")},_getFacetFilters:function(e,t,r){var n=[],i=e.facetsRefinements||{};Object.keys(i).sort().forEach((function(e){(i[e]||[]).sort().forEach((function(t){n.push(e+":"+t)}))}));var a=e.facetsExcludes||{};Object.keys(a).sort().forEach((function(e){(a[e]||[]).sort().forEach((function(t){n.push(e+":-"+t)}))}));var s=e.disjunctiveFacetsRefinements||{};Object.keys(s).sort().forEach((function(e){var r=s[e]||[];if(e!==t&&r&&0!==r.length){var i=[];r.sort().forEach((function(t){i.push(e+":"+t)})),n.push(i)}}));var c=e.hierarchicalFacetsRefinements||{};return Object.keys(c).sort().forEach((function(i){var a=(c[i]||[])[0];if(void 0!==a){var s,u,o=e.getHierarchicalFacetByName(i),h=e._getHierarchicalFacetSeparator(o),f=e._getHierarchicalRootPath(o);if(t===i){if(-1===a.indexOf(h)||!f&&!0===r||f&&f.split(h).length===a.split(h).length)return;f?(u=f.split(h).length-1,a=f):(u=a.split(h).length-2,a=a.slice(0,a.lastIndexOf(h))),s=o.attributes[u]}else u=a.split(h).length-1,s=o.attributes[u];s&&n.push([s+":"+a])}})),n},_getHitsHierarchicalFacetsAttributes:function(e){return e.hierarchicalFacets.reduce((function(t,r){var n=e.getHierarchicalRefinement(r.name)[0];if(!n)return t.push(r.attributes[0]),t;var i=e._getHierarchicalFacetSeparator(r),a=n.split(i).length,s=r.attributes.slice(0,a+1);return t.concat(s)}),[])},_getDisjunctiveHierarchicalFacetAttribute:function(e,t,r){var n=e._getHierarchicalFacetSeparator(t);if(!0===r){var i=e._getHierarchicalRootPath(t),a=0;return i&&(a=i.split(n).length),[t.attributes[a]]}var s=(e.getHierarchicalRefinement(t.name)[0]||"").split(n).length-1;return t.attributes.slice(0,s+1)},getSearchForFacetQuery:function(e,t,r,s){var c=s.isDisjunctiveFacet(e)?s.clearRefinements(e):s,u={facetQuery:t,facetName:e};return"number"==typeof r&&(u.maxFacetHits=r),i(n({},a._getHitsSearchParams(c),u))}};e.exports=a},6801:e=>{"use strict";e.exports=function(e){return null!==e&&/^[a-zA-Z0-9_-]{1,64}$/.test(e)}},4336:e=>{"use strict";e.exports="3.15.0"},290:function(e){e.exports=function(){"use strict";function e(e,t,r){return t in e?Object.defineProperty(e,t,{value:r,enumerable:!0,configurable:!0,writable:!0}):e[t]=r,e}function t(e,t){var r=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),r.push.apply(r,n)}return r}function r(r){for(var n=1;n=0||(i[r]=e[r]);return i}(e,t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,r)&&(i[r]=e[r])}return i}function i(e,t){return function(e){if(Array.isArray(e))return e}(e)||function(e,t){if(Symbol.iterator in Object(e)||"[object Arguments]"===Object.prototype.toString.call(e)){var r=[],n=!0,i=!1,a=void 0;try{for(var s,c=e[Symbol.iterator]();!(n=(s=c.next()).done)&&(r.push(s.value),!t||r.length!==t);n=!0);}catch(e){i=!0,a=e}finally{try{n||null==c.return||c.return()}finally{if(i)throw a}}return r}}(e,t)||function(){throw new TypeError("Invalid attempt to destructure non-iterable instance")}()}function a(e){return function(e){if(Array.isArray(e)){for(var t=0,r=new Array(e.length);t2&&void 0!==arguments[2]?arguments[2]:{miss:function(){return Promise.resolve()}};return Promise.resolve().then((function(){c();var t=JSON.stringify(e);return a()[t]})).then((function(e){return Promise.all([e?e.value:t(),void 0!==e])})).then((function(e){var t=i(e,2),n=t[0],a=t[1];return Promise.all([n,a||r.miss(n)])})).then((function(e){return i(e,1)[0]}))},set:function(e,t){return Promise.resolve().then((function(){var i=a();return i[JSON.stringify(e)]={timestamp:(new Date).getTime(),value:t},n().setItem(r,JSON.stringify(i)),t}))},delete:function(e){return Promise.resolve().then((function(){var t=a();delete t[JSON.stringify(e)],n().setItem(r,JSON.stringify(t))}))},clear:function(){return Promise.resolve().then((function(){n().removeItem(r)}))}}}function c(e){var t=a(e.caches),r=t.shift();return void 0===r?{get:function(e,t){var r=arguments.length>2&&void 0!==arguments[2]?arguments[2]:{miss:function(){return Promise.resolve()}};return t().then((function(e){return Promise.all([e,r.miss(e)])})).then((function(e){return i(e,1)[0]}))},set:function(e,t){return Promise.resolve(t)},delete:function(e){return Promise.resolve()},clear:function(){return Promise.resolve()}}:{get:function(e,n){var i=arguments.length>2&&void 0!==arguments[2]?arguments[2]:{miss:function(){return Promise.resolve()}};return r.get(e,n,i).catch((function(){return c({caches:t}).get(e,n,i)}))},set:function(e,n){return r.set(e,n).catch((function(){return c({caches:t}).set(e,n)}))},delete:function(e){return r.delete(e).catch((function(){return c({caches:t}).delete(e)}))},clear:function(){return r.clear().catch((function(){return c({caches:t}).clear()}))}}}function u(){var e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:{serializable:!0},t={};return{get:function(r,n){var i=arguments.length>2&&void 0!==arguments[2]?arguments[2]:{miss:function(){return Promise.resolve()}},a=JSON.stringify(r);if(a in t)return Promise.resolve(e.serializable?JSON.parse(t[a]):t[a]);var s=n(),c=i&&i.miss||function(){return Promise.resolve()};return s.then((function(e){return c(e)})).then((function(){return s}))},set:function(r,n){return t[JSON.stringify(r)]=e.serializable?JSON.stringify(n):n,Promise.resolve(n)},delete:function(e){return delete t[JSON.stringify(e)],Promise.resolve()},clear:function(){return t={},Promise.resolve()}}}function o(e){for(var t=e.length-1;t>0;t--){var r=Math.floor(Math.random()*(t+1)),n=e[t];e[t]=e[r],e[r]=n}return e}function h(e,t){return t?(Object.keys(t).forEach((function(r){e[r]=t[r](e)})),e):e}function f(e){for(var t=arguments.length,r=new Array(t>1?t-1:0),n=1;n0?n:void 0,timeout:r.timeout||t,headers:r.headers||{},queryParameters:r.queryParameters||{},cacheable:r.cacheable}}var d={Read:1,Write:2,Any:3},p=1,v=2,g=3;function y(e){var t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:p;return r(r({},e),{},{status:t,lastUpdate:Date.now()})}function R(e){return"string"==typeof e?{protocol:"https",url:e,accept:d.Any}:{protocol:e.protocol||"https",url:e.url,accept:e.accept||d.Any}}var F="GET",b="POST";function P(e,t){return Promise.all(t.map((function(t){return e.get(t,(function(){return Promise.resolve(y(t))}))}))).then((function(e){var r=e.filter((function(e){return function(e){return e.status===p||Date.now()-e.lastUpdate>12e4}(e)})),n=e.filter((function(e){return function(e){return e.status===g&&Date.now()-e.lastUpdate<=12e4}(e)})),i=[].concat(a(r),a(n));return{getTimeout:function(e,t){return(0===n.length&&0===e?1:n.length+3+e)*t},statelessHosts:i.length>0?i.map((function(e){return R(e)})):t}}))}function j(e,t,n,i){var s=[],c=function(e,t){if(e.method!==F&&(void 0!==e.data||void 0!==t.data)){var n=Array.isArray(e.data)?e.data:r(r({},e.data),t.data);return JSON.stringify(n)}}(n,i),u=function(e,t){var n=r(r({},e.headers),t.headers),i={};return Object.keys(n).forEach((function(e){var t=n[e];i[e.toLowerCase()]=t})),i}(e,i),o=n.method,h=n.method!==F?{}:r(r({},n.data),i.data),f=r(r(r({"x-algolia-agent":e.userAgent.value},e.queryParameters),h),i.queryParameters),l=0,m=function t(r,a){var h=r.pop();if(void 0===h)throw{name:"RetryError",message:"Unreachable hosts - your application id may be incorrect. If the error persists, contact support@algolia.com.",transporterStackTrace:O(s)};var m={data:c,headers:u,method:o,url:E(h,n.path,f),connectTimeout:a(l,e.timeouts.connect),responseTimeout:a(l,i.timeout)},d=function(e){var t={request:m,response:e,host:h,triesLeft:r.length};return s.push(t),t},p={onSuccess:function(e){return function(e){try{return JSON.parse(e.content)}catch(t){throw function(e,t){return{name:"DeserializationError",message:e,response:t}}(t.message,e)}}(e)},onRetry:function(n){var i=d(n);return n.isTimedOut&&l++,Promise.all([e.logger.info("Retryable failure",w(i)),e.hostsCache.set(h,y(h,n.isTimedOut?g:v))]).then((function(){return t(r,a)}))},onFail:function(e){throw d(e),function(e,t){var r=e.content,n=e.status,i=r;try{i=JSON.parse(r).message}catch(e){}return function(e,t,r){return{name:"ApiError",message:e,status:t,transporterStackTrace:r}}(i,n,t)}(e,O(s))}};return e.requester.send(m).then((function(e){return function(e,t){return function(e){var t=e.status;return e.isTimedOut||function(e){var t=e.isTimedOut,r=e.status;return!t&&0==~~r}(e)||2!=~~(t/100)&&4!=~~(t/100)}(e)?t.onRetry(e):2==~~(e.status/100)?t.onSuccess(e):t.onFail(e)}(e,p)}))};return P(e.hostsCache,t).then((function(e){return m(a(e.statelessHosts).reverse(),e.getTimeout)}))}function _(e){var t={value:"Algolia for JavaScript (".concat(e,")"),add:function(e){var r="; ".concat(e.segment).concat(void 0!==e.version?" (".concat(e.version,")"):"");return-1===t.value.indexOf(r)&&(t.value="".concat(t.value).concat(r)),t}};return t}function E(e,t,r){var n=x(r),i="".concat(e.protocol,"://").concat(e.url,"/").concat("/"===t.charAt(0)?t.substr(1):t);return n.length&&(i+="?".concat(n)),i}function x(e){return Object.keys(e).map((function(t){return f("%s=%s",t,(r=e[t],"[object Object]"===Object.prototype.toString.call(r)||"[object Array]"===Object.prototype.toString.call(r)?JSON.stringify(e[t]):e[t]));var r})).join("&")}function O(e){return e.map((function(e){return w(e)}))}function w(e){var t=e.request.headers["x-algolia-api-key"]?{"x-algolia-api-key":"*****"}:{};return r(r({},e),{},{request:r(r({},e.request),{},{headers:r(r({},e.request.headers),t)})})}var N=function(e){var t=e.appId,n=function(e,t,r){var n={"x-algolia-api-key":r,"x-algolia-application-id":t};return{headers:function(){return e===l.WithinHeaders?n:{}},queryParameters:function(){return e===l.WithinQueryParameters?n:{}}}}(void 0!==e.authMode?e.authMode:l.WithinHeaders,t,e.apiKey),a=function(e){var t=e.hostsCache,r=e.logger,n=e.requester,a=e.requestsCache,s=e.responsesCache,c=e.timeouts,u=e.userAgent,o=e.hosts,h=e.queryParameters,f={hostsCache:t,logger:r,requester:n,requestsCache:a,responsesCache:s,timeouts:c,userAgent:u,headers:e.headers,queryParameters:h,hosts:o.map((function(e){return R(e)})),read:function(e,t){var r=m(t,f.timeouts.read),n=function(){return j(f,f.hosts.filter((function(e){return 0!=(e.accept&d.Read)})),e,r)};if(!0!==(void 0!==r.cacheable?r.cacheable:e.cacheable))return n();var a={request:e,mappedRequestOptions:r,transporter:{queryParameters:f.queryParameters,headers:f.headers}};return f.responsesCache.get(a,(function(){return f.requestsCache.get(a,(function(){return f.requestsCache.set(a,n()).then((function(e){return Promise.all([f.requestsCache.delete(a),e])}),(function(e){return Promise.all([f.requestsCache.delete(a),Promise.reject(e)])})).then((function(e){var t=i(e,2);return t[0],t[1]}))}))}),{miss:function(e){return f.responsesCache.set(a,e)}})},write:function(e,t){return j(f,f.hosts.filter((function(e){return 0!=(e.accept&d.Write)})),e,m(t,f.timeouts.write))}};return f}(r(r({hosts:[{url:"".concat(t,"-dsn.algolia.net"),accept:d.Read},{url:"".concat(t,".algolia.net"),accept:d.Write}].concat(o([{url:"".concat(t,"-1.algolianet.com")},{url:"".concat(t,"-2.algolianet.com")},{url:"".concat(t,"-3.algolianet.com")}]))},e),{},{headers:r(r(r({},n.headers()),{"content-type":"application/x-www-form-urlencoded"}),e.headers),queryParameters:r(r({},n.queryParameters()),e.queryParameters)}));return h({transporter:a,appId:t,addAlgoliaAgent:function(e,t){a.userAgent.add({segment:e,version:t})},clearCache:function(){return Promise.all([a.requestsCache.clear(),a.responsesCache.clear()]).then((function(){}))}},e.methods)},A=function(e){return function(t,r){return t.method===F?e.transporter.read(t,r):e.transporter.write(t,r)}},H=function(e){return function(t){var r=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{};return h({transporter:e.transporter,appId:e.appId,indexName:t},r.methods)}},S=function(e){return function(t,n){var i=t.map((function(e){return r(r({},e),{},{params:x(e.params||{})})}));return e.transporter.read({method:b,path:"1/indexes/*/queries",data:{requests:i},cacheable:!0},n)}},T=function(e){return function(t,i){return Promise.all(t.map((function(t){var a=t.params,s=a.facetName,c=a.facetQuery,u=n(a,["facetName","facetQuery"]);return H(e)(t.indexName,{methods:{searchForFacetValues:k}}).searchForFacetValues(s,c,r(r({},i),u))})))}},Q=function(e){return function(t,r,n){return e.transporter.read({method:b,path:f("1/answers/%s/prediction",e.indexName),data:{query:t,queryLanguages:r},cacheable:!0},n)}},C=function(e){return function(t,r){return e.transporter.read({method:b,path:f("1/indexes/%s/query",e.indexName),data:{query:t},cacheable:!0},r)}},k=function(e){return function(t,r,n){return e.transporter.read({method:b,path:f("1/indexes/%s/facets/%s/query",e.indexName,t),data:{facetQuery:r},cacheable:!0},n)}},I=1,D=2,q=3;function L(e,t,n){var i,a={appId:e,apiKey:t,timeouts:{connect:1,read:2,write:30},requester:{send:function(e){return new Promise((function(t){var r=new XMLHttpRequest;r.open(e.method,e.url,!0),Object.keys(e.headers).forEach((function(t){return r.setRequestHeader(t,e.headers[t])}));var n,i=function(e,n){return setTimeout((function(){r.abort(),t({status:0,content:n,isTimedOut:!0})}),1e3*e)},a=i(e.connectTimeout,"Connection timeout");r.onreadystatechange=function(){r.readyState>r.OPENED&&void 0===n&&(clearTimeout(a),n=i(e.responseTimeout,"Socket timeout"))},r.onerror=function(){0===r.status&&(clearTimeout(a),clearTimeout(n),t({content:r.responseText||"Network request failed",status:r.status,isTimedOut:!1}))},r.onload=function(){clearTimeout(a),clearTimeout(n),t({content:r.responseText,status:r.status,isTimedOut:!1})},r.send(e.data)}))}},logger:(i=q,{debug:function(e,t){return I>=i&&console.debug(e,t),Promise.resolve()},info:function(e,t){return D>=i&&console.info(e,t),Promise.resolve()},error:function(e,t){return console.error(e,t),Promise.resolve()}}),responsesCache:u(),requestsCache:u({serializable:!1}),hostsCache:c({caches:[s({key:"".concat("4.20.0","-").concat(e)}),u()]}),userAgent:_("4.20.0").add({segment:"Browser",version:"lite"}),authMode:l.WithinQueryParameters};return N(r(r(r({},a),n),{},{methods:{search:S,searchForFacetValues:T,multipleQueries:S,multipleSearchForFacetValues:T,customRequest:A,initIndex:function(e){return function(t){return H(e)(t,{methods:{search:C,searchForFacetValues:k,findAnswers:Q}})}}}}))}return L.version="4.20.0",L}()},9172:(e,t,r)=>{"use strict";r.r(t),r.d(t,{default:()=>F});var n=r(7294),i=r(290),a=r.n(i),s=r(8131),c=r.n(s),u=r(6010),o=r(2859),h=r(9960),f=r(412),l=r(3810),m=r(2263),d=r(907),p=r(9565),v=r(8882),g=r(5999);const y={searchQueryInput:"searchQueryInput_dLdO",searchVersionInput:"searchVersionInput_oJeg",searchResultsColumn:"searchResultsColumn_V1kT",algoliaLogo:"algoliaLogo_ieE9",algoliaLogoPathFill:"algoliaLogoPathFill_NLBU",searchResultItem:"searchResultItem_f0c5",searchResultItemHeading:"searchResultItemHeading_59Ih",searchResultItemPath:"searchResultItemPath_utd2",searchResultItemSummary:"searchResultItemSummary_EzNh",searchQueryColumn:"searchQueryColumn_qeTZ",searchVersionColumn:"searchVersionColumn_2Kfj",searchLogoColumn:"searchLogoColumn_8GYL",loadingSpinner:"loadingSpinner_CN74",loadingspin:"loadingspin_ANjV",loader:"loader_-Se+"};function R(e){let{docsSearchVersionsHelpers:t}=e;const r=Object.entries(t.allDocsData).filter((e=>{let[,t]=e;return t.versions.length>1}));return n.createElement("div",{className:(0,u.Z)("col","col--3","padding-left--none",y.searchVersionColumn)},r.map((e=>{let[i,a]=e;const s=r.length>1?`${i}: `:"";return n.createElement("select",{key:i,onChange:e=>t.setSearchVersion(i,e.target.value),defaultValue:t.searchVersions[i],className:y.searchVersionInput},a.versions.map(((e,t)=>n.createElement("option",{key:t,label:`${s}${e.label}`,value:e.name}))))})))}const F=function(){const{siteConfig:{themeConfig:{algolia:{appId:e,apiKey:t,indexName:r,externalUrlRegex:i}}},i18n:{currentLocale:s}}=(0,m.Z)(),F=function(){const{selectMessage:e}=(0,l.c2)();return t=>e(t,(0,g.I)({id:"theme.SearchPage.documentsFound.plurals",description:'Pluralized label for "{count} documents found". Use as much plural forms (separated by "|") as your language support (see https://www.unicode.org/cldr/cldr-aux/charts/34/supplemental/language_plural_rules.html)',message:"One document found|{count} documents found"},{count:t}))}(),b=function(){const e=(0,d._r)(),[t,r]=(0,n.useState)((()=>Object.entries(e).reduce(((e,t)=>{let[r,n]=t;return{...e,[r]:n.versions[0].name}}),{}))),i=Object.values(e).some((e=>e.versions.length>1));return{allDocsData:e,versioningEnabled:i,searchVersions:t,setSearchVersion:(e,t)=>r((r=>({...r,[e]:t})))}}(),{searchQuery:P,setSearchQuery:j}=(0,p.Z)(),_={items:[],query:null,totalResults:null,totalPages:null,lastPage:null,hasMore:null,loading:null},[E,x]=(0,n.useReducer)(((e,t)=>{switch(t.type){case"reset":return _;case"loading":return{...e,loading:!0};case"update":return P!==t.value.query?e:{...t.value,items:0===t.value.lastPage?t.value.items:e.items.concat(t.value.items)};case"advance":{const t=e.totalPages>e.lastPage+1;return{...e,lastPage:t?e.lastPage+1:e.lastPage,hasMore:t}}default:return e}}),_),O=a()(e,t),w=c()(O,r,{hitsPerPage:15,advancedSyntax:!0,disjunctiveFacets:["language","docusaurus_tag"]});w.on("result",(e=>{let{results:{query:t,hits:r,page:n,nbHits:a,nbPages:s}}=e;if(""===t||!(r instanceof Array))return void x({type:"reset"});const c=e=>e.replace(/algolia-docsearch-suggestion--highlight/g,"search-result-match"),u=r.map((e=>{let{url:t,_highlightResult:{hierarchy:r},_snippetResult:n={}}=e;const a=new URL(t),s=Object.keys(r).map((e=>c(r[e].value)));return{title:s.pop(),url:(0,l.Fx)(i,a.href)?a.href:a.pathname+a.hash,summary:n.content?`${c(n.content.value)}...`:"",breadcrumbs:s}}));x({type:"update",value:{items:u,query:t,totalResults:a,totalPages:s,lastPage:n,hasMore:s>n+1,loading:!1}})}));const[N,A]=(0,n.useState)(null),H=(0,n.useRef)(0),S=(0,n.useRef)(f.Z.canUseDOM&&new IntersectionObserver((e=>{const{isIntersecting:t,boundingClientRect:{y:r}}=e[0];t&&H.current>r&&x({type:"advance"}),H.current=r}),{threshold:1})),T=()=>P?(0,g.I)({id:"theme.SearchPage.existingResultsTitle",message:'Search results for "{query}"',description:"The search page title for non-empty query"},{query:P}):(0,g.I)({id:"theme.SearchPage.emptyResultsTitle",message:"Search the documentation",description:"The search page title for empty query"}),Q=(0,l.ed)((function(e){void 0===e&&(e=0),w.addDisjunctiveFacetRefinement("docusaurus_tag","default"),w.addDisjunctiveFacetRefinement("language",s),Object.entries(b.searchVersions).forEach((e=>{let[t,r]=e;w.addDisjunctiveFacetRefinement("docusaurus_tag",`docs-${t}-${r}`)})),w.setQuery(P).setPage(e).search()}));return(0,n.useEffect)((()=>{if(!N)return;const e=S.current;return e?(e.observe(N),()=>e.unobserve(N)):()=>!0}),[N]),(0,n.useEffect)((()=>{x({type:"reset"}),P&&(x({type:"loading"}),setTimeout((()=>{Q()}),300))}),[P,b.searchVersions,Q]),(0,n.useEffect)((()=>{E.lastPage&&0!==E.lastPage&&Q(E.lastPage)}),[Q,E.lastPage]),n.createElement(v.Z,{wrapperClassName:"search-page-wrapper"},n.createElement(o.Z,null,n.createElement("title",null,(0,l.pe)(T())),n.createElement("meta",{property:"robots",content:"noindex, follow"})),n.createElement("div",{className:"container margin-vert--lg"},n.createElement("h1",null,T()),n.createElement("form",{className:"row",onSubmit:e=>e.preventDefault()},n.createElement("div",{className:(0,u.Z)("col",y.searchQueryColumn,{"col--9":b.versioningEnabled,"col--12":!b.versioningEnabled})},n.createElement("input",{type:"search",name:"q",className:y.searchQueryInput,placeholder:(0,g.I)({id:"theme.SearchPage.inputPlaceholder",message:"Type your search here",description:"The placeholder for search page input"}),"aria-label":(0,g.I)({id:"theme.SearchPage.inputLabel",message:"Search",description:"The ARIA label for search page input"}),onChange:e=>j(e.target.value),value:P,autoComplete:"off",autoFocus:!0})),b.versioningEnabled&&n.createElement(R,{docsSearchVersionsHelpers:b})),n.createElement("div",{className:"row"},n.createElement("div",{className:(0,u.Z)("col","col--8",y.searchResultsColumn)},!!E.totalResults&&F(E.totalResults)),n.createElement("div",{className:(0,u.Z)("col","col--4","text--right",y.searchLogoColumn)},n.createElement("a",{target:"_blank",rel:"noopener noreferrer",href:"https://www.algolia.com/","aria-label":(0,g.I)({id:"theme.SearchPage.algoliaLabel",message:"Search by Algolia",description:"The ARIA label for Algolia mention"})},n.createElement("svg",{viewBox:"0 0 168 24",className:y.algoliaLogo},n.createElement("g",{fill:"none"},n.createElement("path",{className:y.algoliaLogoPathFill,d:"M120.925 18.804c-4.386.02-4.386-3.54-4.386-4.106l-.007-13.336 2.675-.424v13.254c0 .322 0 2.358 1.718 2.364v2.248zm-10.846-2.18c.821 0 1.43-.047 1.855-.129v-2.719a6.334 6.334 0 0 0-1.574-.199 5.7 5.7 0 0 0-.897.069 2.699 2.699 0 0 0-.814.24c-.24.116-.439.28-.582.491-.15.212-.219.335-.219.656 0 .628.219.991.616 1.23s.938.362 1.615.362zm-.233-9.7c.883 0 1.629.109 2.231.328.602.218 1.088.525 1.444.915.363.396.609.922.76 1.483.157.56.232 1.175.232 1.85v6.874a32.5 32.5 0 0 1-1.868.314c-.834.123-1.772.185-2.813.185-.69 0-1.327-.069-1.895-.198a4.001 4.001 0 0 1-1.471-.636 3.085 3.085 0 0 1-.951-1.134c-.226-.465-.343-1.12-.343-1.803 0-.656.13-1.073.384-1.525a3.24 3.24 0 0 1 1.047-1.106c.445-.287.95-.492 1.532-.615a8.8 8.8 0 0 1 1.82-.185 8.404 8.404 0 0 1 1.972.24v-.438c0-.307-.035-.6-.11-.874a1.88 1.88 0 0 0-.384-.73 1.784 1.784 0 0 0-.724-.493 3.164 3.164 0 0 0-1.143-.205c-.616 0-1.177.075-1.69.164a7.735 7.735 0 0 0-1.26.307l-.321-2.192c.335-.117.834-.233 1.478-.349a10.98 10.98 0 0 1 2.073-.178zm52.842 9.626c.822 0 1.43-.048 1.854-.13V13.7a6.347 6.347 0 0 0-1.574-.199c-.294 0-.595.021-.896.069a2.7 2.7 0 0 0-.814.24 1.46 1.46 0 0 0-.582.491c-.15.212-.218.335-.218.656 0 .628.218.991.615 1.23.404.245.938.362 1.615.362zm-.226-9.694c.883 0 1.629.108 2.231.327.602.219 1.088.526 1.444.915.355.39.609.923.759 1.483a6.8 6.8 0 0 1 .233 1.852v6.873c-.41.088-1.034.19-1.868.314-.834.123-1.772.184-2.813.184-.69 0-1.327-.068-1.895-.198a4.001 4.001 0 0 1-1.471-.635 3.085 3.085 0 0 1-.951-1.134c-.226-.465-.343-1.12-.343-1.804 0-.656.13-1.073.384-1.524.26-.45.608-.82 1.047-1.107.445-.286.95-.491 1.532-.614a8.803 8.803 0 0 1 2.751-.13c.329.034.671.096 1.04.185v-.437a3.3 3.3 0 0 0-.109-.875 1.873 1.873 0 0 0-.384-.731 1.784 1.784 0 0 0-.724-.492 3.165 3.165 0 0 0-1.143-.205c-.616 0-1.177.075-1.69.164a7.75 7.75 0 0 0-1.26.307l-.321-2.193c.335-.116.834-.232 1.478-.348a11.633 11.633 0 0 1 2.073-.177zm-8.034-1.271a1.626 1.626 0 0 1-1.628-1.62c0-.895.725-1.62 1.628-1.62.904 0 1.63.725 1.63 1.62 0 .895-.733 1.62-1.63 1.62zm1.348 13.22h-2.689V7.27l2.69-.423v11.956zm-4.714 0c-4.386.02-4.386-3.54-4.386-4.107l-.008-13.336 2.676-.424v13.254c0 .322 0 2.358 1.718 2.364v2.248zm-8.698-5.903c0-1.156-.253-2.119-.746-2.788-.493-.677-1.183-1.01-2.067-1.01-.882 0-1.574.333-2.065 1.01-.493.676-.733 1.632-.733 2.788 0 1.168.246 1.953.74 2.63.492.683 1.183 1.018 2.066 1.018.882 0 1.574-.342 2.067-1.019.492-.683.738-1.46.738-2.63zm2.737-.007c0 .902-.13 1.584-.397 2.33a5.52 5.52 0 0 1-1.128 1.906 4.986 4.986 0 0 1-1.752 1.223c-.685.286-1.739.45-2.265.45-.528-.006-1.574-.157-2.252-.45a5.096 5.096 0 0 1-1.744-1.223c-.487-.527-.863-1.162-1.137-1.906a6.345 6.345 0 0 1-.41-2.33c0-.902.123-1.77.397-2.508a5.554 5.554 0 0 1 1.15-1.892 5.133 5.133 0 0 1 1.75-1.216c.679-.287 1.425-.423 2.232-.423.808 0 1.553.142 2.237.423a4.88 4.88 0 0 1 1.753 1.216 5.644 5.644 0 0 1 1.135 1.892c.287.738.431 1.606.431 2.508zm-20.138 0c0 1.12.246 2.363.738 2.882.493.52 1.13.78 1.91.78.424 0 .828-.062 1.204-.178.377-.116.677-.253.917-.417V9.33a10.476 10.476 0 0 0-1.766-.226c-.971-.028-1.71.37-2.23 1.004-.513.636-.773 1.75-.773 2.788zm7.438 5.274c0 1.824-.466 3.156-1.404 4.004-.936.846-2.367 1.27-4.296 1.27-.705 0-2.17-.137-3.34-.396l.431-2.118c.98.205 2.272.26 2.95.26 1.074 0 1.84-.219 2.299-.656.459-.437.684-1.086.684-1.948v-.437a8.07 8.07 0 0 1-1.047.397c-.43.13-.93.198-1.492.198-.739 0-1.41-.116-2.018-.349a4.206 4.206 0 0 1-1.567-1.025c-.431-.45-.774-1.017-1.013-1.694-.24-.677-.363-1.885-.363-2.773 0-.834.13-1.88.384-2.577.26-.696.629-1.298 1.129-1.796.493-.498 1.095-.881 1.8-1.162a6.605 6.605 0 0 1 2.428-.457c.87 0 1.67.109 2.45.24.78.129 1.444.265 1.985.415V18.17zM6.972 6.677v1.627c-.712-.446-1.52-.67-2.425-.67-.585 0-1.045.13-1.38.391a1.24 1.24 0 0 0-.502 1.03c0 .425.164.765.494 1.02.33.256.835.532 1.516.83.447.192.795.356 1.045.495.25.138.537.332.862.582.324.25.563.548.718.894.154.345.23.741.23 1.188 0 .947-.334 1.691-1.004 2.234-.67.542-1.537.814-2.601.814-1.18 0-2.16-.229-2.936-.686v-1.708c.84.628 1.814.942 2.92.942.585 0 1.048-.136 1.388-.407.34-.271.51-.646.51-1.125 0-.287-.1-.55-.302-.79-.203-.24-.42-.42-.655-.542-.234-.123-.585-.29-1.053-.503a61.27 61.27 0 0 1-.582-.271 13.67 13.67 0 0 1-.55-.287 4.275 4.275 0 0 1-.567-.351 6.92 6.92 0 0 1-.455-.4c-.18-.17-.31-.34-.39-.51-.08-.17-.155-.37-.224-.598a2.553 2.553 0 0 1-.104-.742c0-.915.333-1.638.998-2.17.664-.532 1.523-.798 2.576-.798.968 0 1.793.17 2.473.51zm7.468 5.696v-.287c-.022-.607-.187-1.088-.495-1.444-.309-.357-.75-.535-1.324-.535-.532 0-.99.194-1.373.583-.382.388-.622.949-.717 1.683h3.909zm1.005 2.792v1.404c-.596.34-1.383.51-2.362.51-1.255 0-2.255-.377-3-1.132-.744-.755-1.116-1.744-1.116-2.968 0-1.297.34-2.316 1.021-3.055.68-.74 1.548-1.11 2.6-1.11 1.033 0 1.852.323 2.458.966.606.644.91 1.572.91 2.784 0 .33-.033.676-.096 1.038h-5.314c.107.702.405 1.239.894 1.611.49.372 1.106.558 1.85.558.862 0 1.58-.202 2.155-.606zm6.605-1.77h-1.212c-.596 0-1.045.116-1.349.35-.303.234-.454.532-.454.894 0 .372.117.664.35.877.235.213.575.32 1.022.32.51 0 .912-.142 1.204-.424.293-.281.44-.651.44-1.108v-.91zm-4.068-2.554V9.325c.627-.361 1.457-.542 2.489-.542 2.116 0 3.175 1.026 3.175 3.08V17h-1.548v-.957c-.415.68-1.143 1.02-2.186 1.02-.766 0-1.38-.22-1.843-.661-.462-.442-.694-1.003-.694-1.684 0-.776.293-1.38.878-1.81.585-.431 1.404-.647 2.457-.647h1.34V11.8c0-.554-.133-.971-.399-1.253-.266-.282-.707-.423-1.324-.423a4.07 4.07 0 0 0-2.345.718zm9.333-1.93v1.42c.394-1 1.101-1.5 2.123-1.5.148 0 .313.016.494.048v1.531a1.885 1.885 0 0 0-.75-.143c-.542 0-.989.24-1.34.718-.351.479-.527 1.048-.527 1.707V17h-1.563V8.91h1.563zm5.01 4.084c.022.82.272 1.492.75 2.019.479.526 1.15.79 2.01.79.639 0 1.235-.176 1.788-.527v1.404c-.521.319-1.186.479-1.995.479-1.265 0-2.276-.4-3.031-1.197-.755-.798-1.133-1.792-1.133-2.984 0-1.16.38-2.151 1.14-2.975.761-.825 1.79-1.237 3.088-1.237.702 0 1.346.149 1.93.447v1.436a3.242 3.242 0 0 0-1.77-.495c-.84 0-1.513.266-2.019.798-.505.532-.758 1.213-.758 2.042zM40.24 5.72v4.579c.458-1 1.293-1.5 2.505-1.5.787 0 1.42.245 1.899.734.479.49.718 1.17.718 2.042V17h-1.564v-5.106c0-.553-.14-.98-.422-1.284-.282-.303-.652-.455-1.11-.455-.531 0-1.002.202-1.411.606-.41.405-.615 1.022-.615 1.851V17h-1.563V5.72h1.563zm14.966 10.02c.596 0 1.096-.253 1.5-.758.404-.506.606-1.157.606-1.955 0-.915-.202-1.62-.606-2.114-.404-.495-.92-.742-1.548-.742-.553 0-1.05.224-1.491.67-.442.447-.662 1.133-.662 2.058 0 .958.212 1.67.638 2.138.425.469.946.703 1.563.703zM53.004 5.72v4.42c.574-.894 1.388-1.341 2.44-1.341 1.022 0 1.857.383 2.506 1.149.649.766.973 1.781.973 3.047 0 1.138-.309 2.109-.925 2.912-.617.803-1.463 1.205-2.537 1.205-1.075 0-1.894-.447-2.457-1.34V17h-1.58V5.72h1.58zm9.908 11.104l-3.223-7.913h1.739l1.005 2.632 1.26 3.415c.096-.32.48-1.458 1.15-3.415l.909-2.632h1.66l-2.92 7.866c-.777 2.074-1.963 3.11-3.559 3.11a2.92 2.92 0 0 1-.734-.079v-1.34c.17.042.351.064.543.064 1.032 0 1.755-.57 2.17-1.708z"}),n.createElement("path",{fill:"#5468FF",d:"M78.988.938h16.594a2.968 2.968 0 0 1 2.966 2.966V20.5a2.967 2.967 0 0 1-2.966 2.964H78.988a2.967 2.967 0 0 1-2.966-2.964V3.897A2.961 2.961 0 0 1 78.988.938z"}),n.createElement("path",{fill:"white",d:"M89.632 5.967v-.772a.978.978 0 0 0-.978-.977h-2.28a.978.978 0 0 0-.978.977v.793c0 .088.082.15.171.13a7.127 7.127 0 0 1 1.984-.28c.65 0 1.295.088 1.917.259.082.02.164-.04.164-.13m-6.248 1.01l-.39-.389a.977.977 0 0 0-1.382 0l-.465.465a.973.973 0 0 0 0 1.38l.383.383c.062.061.15.047.205-.014.226-.307.472-.601.746-.874.281-.28.568-.526.883-.751.068-.042.075-.137.02-.2m4.16 2.453v3.341c0 .096.104.165.192.117l2.97-1.537c.068-.034.089-.117.055-.184a3.695 3.695 0 0 0-3.08-1.866c-.068 0-.136.054-.136.13m0 8.048a4.489 4.489 0 0 1-4.49-4.482 4.488 4.488 0 0 1 4.49-4.482 4.488 4.488 0 0 1 4.489 4.482 4.484 4.484 0 0 1-4.49 4.482m0-10.85a6.363 6.363 0 1 0 0 12.729 6.37 6.37 0 0 0 6.372-6.368 6.358 6.358 0 0 0-6.371-6.36"})))))),E.items.length>0?n.createElement("main",null,E.items.map(((e,t)=>{let{title:r,url:i,summary:a,breadcrumbs:s}=e;return n.createElement("article",{key:t,className:y.searchResultItem},n.createElement("h2",{className:y.searchResultItemHeading},n.createElement(h.Z,{to:i,dangerouslySetInnerHTML:{__html:r}})),s.length>0&&n.createElement("nav",{"aria-label":"breadcrumbs"},n.createElement("ul",{className:(0,u.Z)("breadcrumbs",y.searchResultItemPath)},s.map(((e,t)=>n.createElement("li",{key:t,className:"breadcrumbs__item",dangerouslySetInnerHTML:{__html:e}}))))),a&&n.createElement("p",{className:y.searchResultItemSummary,dangerouslySetInnerHTML:{__html:a}}))}))):[P&&!E.loading&&n.createElement("p",{key:"no-results"},n.createElement(g.Z,{id:"theme.SearchPage.noResultsText",description:"The paragraph for empty search result"},"No results were found")),!!E.loading&&n.createElement("div",{key:"spinner",className:y.loadingSpinner})],E.hasMore&&n.createElement("div",{className:y.loader,ref:A},n.createElement(g.Z,{id:"theme.SearchPage.fetchingNewResults",description:"The paragraph for fetching new search results"},"Fetching new results..."))))}}}]); \ No newline at end of file diff --git a/assets/js/45e4bd3d.99c63d5e.js.LICENSE.txt b/assets/js/d0cd84a1.f023e02d.js.LICENSE.txt similarity index 100% rename from assets/js/45e4bd3d.99c63d5e.js.LICENSE.txt rename to assets/js/d0cd84a1.f023e02d.js.LICENSE.txt diff --git a/assets/js/d27722cf.dbdf793b.js b/assets/js/d27722cf.dbdf793b.js deleted file mode 100644 index 6bcec891..00000000 --- a/assets/js/d27722cf.dbdf793b.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5500],{3905:(M,L,t)=>{t.d(L,{Zo:()=>o,kt:()=>C});var i=t(67294);function j(M,L,t){return L in M?Object.defineProperty(M,L,{value:t,enumerable:!0,configurable:!0,writable:!0}):M[L]=t,M}function e(M,L){var t=Object.keys(M);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(M);L&&(i=i.filter((function(L){return Object.getOwnPropertyDescriptor(M,L).enumerable}))),t.push.apply(t,i)}return t}function u(M){for(var L=1;L=0||(j[t]=M[t]);return j}(M,L);if(Object.getOwnPropertySymbols){var e=Object.getOwnPropertySymbols(M);for(i=0;i=0||Object.prototype.propertyIsEnumerable.call(M,t)&&(j[t]=M[t])}return j}var a=i.createContext({}),n=function(M){var L=i.useContext(a),t=L;return M&&(t="function"==typeof M?M(L):u(u({},L),M)),t},o=function(M){var L=n(M.components);return i.createElement(a.Provider,{value:L},M.children)},s="mdxType",w={inlineCode:"code",wrapper:function(M){var L=M.children;return i.createElement(i.Fragment,{},L)}},y=i.forwardRef((function(M,L){var t=M.components,j=M.mdxType,e=M.originalType,a=M.parentName,o=N(M,["components","mdxType","originalType","parentName"]),s=n(t),y=j,C=s["".concat(a,".").concat(y)]||s[y]||w[y]||e;return t?i.createElement(C,u(u({ref:L},o),{},{components:t})):i.createElement(C,u({ref:L},o))}));function C(M,L){var t=arguments,j=L&&L.mdxType;if("string"==typeof M||j){var e=t.length,u=new Array(e);u[0]=y;var N={};for(var a in L)hasOwnProperty.call(L,a)&&(N[a]=L[a]);N.originalType=M,N[s]="string"==typeof M?M:j,u[1]=N;for(var n=2;n{t.d(L,{Z:()=>j});var i=t(67294);function j(M){let{className:L,name:t,children:j,githubUrl:e,twitterUrl:u}=M;return i.createElement("div",{className:L},i.createElement("div",{className:"card card--full-height"},i.createElement("div",{className:"card__header"},i.createElement("div",{className:"avatar avatar--vertical"},i.createElement("img",{className:"avatar__photo avatar__photo--xl",src:e+".png"}),i.createElement("div",{className:"avatar__intro"},i.createElement("h3",{className:"avatar__name"},t)))),i.createElement("div",{className:"card__body"},j),i.createElement("div",{className:"card__footer"},i.createElement("div",{className:"button-group button-group--block"},e&&i.createElement("a",{className:"button button--secondary",href:e},"GitHub"),u&&i.createElement("a",{className:"button button--secondary",href:u},"Twitter")))))}},43914:(M,L,t)=>{t.r(L),t.d(L,{TeamProfileCardCol:()=>o,contentTitle:()=>N,default:()=>y,frontMatter:()=>u,metadata:()=>a,toc:()=>n});var i=t(87462),j=(t(67294),t(3905)),e=t(63427);const u={id:"introduction",title:"Introduction",description:"Clinical-grade variant annotation",hide_title:!0,slug:"/"},N=void 0,a={unversionedId:"introduction/introduction",id:"version-3.2.5/introduction/introduction",title:"Introduction",description:"Clinical-grade variant annotation",source:"@site/versioned_docs/version-3.2.5/introduction/introduction.mdx",sourceDirName:"introduction",slug:"/",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/introduction/introduction.mdx",tags:[],version:"3.2.5",frontMatter:{id:"introduction",title:"Introduction",description:"Clinical-grade variant annotation",hide_title:!0,slug:"/"},sidebar:"version-3.2.5/docs",next:{title:"Dependencies",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/introduction/dependencies"}},n=[{value:"What does Nirvana annotate?",id:"what-does-nirvana-annotate",children:[],level:2},{value:"Licensing",id:"licensing",children:[{value:"Code",id:"code",children:[],level:3},{value:"Data",id:"data",children:[],level:3}],level:2},{value:"Nirvana Team",id:"nirvana-team",children:[{value:"Active Team",id:"active-team",children:[],level:3},{value:"Honorary Alumni",id:"honorary-alumni",children:[],level:3}],level:2}];function o(M){return(0,j.kt)(e.Z,(0,i.Z)({},M,{className:"col col--6 margin-bottom--lg",mdxType:"TeamProfileCard"}))}const s={toc:n,TeamProfileCardCol:o},w="wrapper";function y(M){let{components:L,...e}=M;return(0,j.kt)(w,(0,i.Z)({},s,e,{components:L,mdxType:"MDXLayout"}),(0,j.kt)("p",null,(0,j.kt)("img",{src:t(820).Z})),(0,j.kt)("p",null,"Nirvana provides clinical-grade annotation of genomic variants (SNVs, MNVs, insertions, deletions, indels, STRs, gene fusions, and SVs (including CNVs). It can be run as a stand-alone package, as an AWS Lambda function, or integrated into larger software tools that require variant annotation."),(0,j.kt)("p",null,"The input to Nirvana are VCFs and the output is a structured JSON representation of all annotation and sample information (as extracted from the VCF). Nirvana handles multiple alternate alleles and multiple samples with ease."),(0,j.kt)("p",null,"The software is being developed under a rigorous SDLC and testing process to ensure accuracy of the results and enable embedding in other software with regulatory needs. Nirvana uses a continuous integration pipeline where millions of variant annotations are monitored against baseline values daily."),(0,j.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,j.kt)("div",{parentName:"div",className:"admonition-heading"},(0,j.kt)("h5",{parentName:"div"},(0,j.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,j.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,j.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Fun Fact")),(0,j.kt)("div",{parentName:"div",className:"admonition-content"},(0,j.kt)("p",{parentName:"div"},"Nirvana is a backronym for ",(0,j.kt)("strong",{parentName:"p"},"NI"),"mble and ",(0,j.kt)("strong",{parentName:"p"},"R"),"obust ",(0,j.kt)("strong",{parentName:"p"},"VA"),"riant a",(0,j.kt)("strong",{parentName:"p"},"N"),"not",(0,j.kt)("strong",{parentName:"p"},"A"),"tor"))),(0,j.kt)("h2",{id:"what-does-nirvana-annotate"},"What does Nirvana annotate?"),(0,j.kt)("p",null,"We use Sequence Ontology consequences to describe how each variant impacts a given transcript:"),(0,j.kt)("p",null,(0,j.kt)("img",{src:t(55579).Z})),(0,j.kt)("p",null,"In addition, we also use external data sources to provide additional context for each variant:"),(0,j.kt)("p",null,(0,j.kt)("img",{src:t(87979).Z})),(0,j.kt)("h2",{id:"licensing"},"Licensing"),(0,j.kt)("h3",{id:"code"},"Code"),(0,j.kt)("p",null,"Nirvana source code is provided under the ",(0,j.kt)("a",{parentName:"p",href:"https://github.com/Illumina/Nirvana/blob/develop/LICENSE"},"GPLv3")," license. Nirvana includes several third party packages provided under other open source licenses, please see ",(0,j.kt)("a",{parentName:"p",href:"introduction/dependencies"},"Dependencies")," for additional details."),(0,j.kt)("h3",{id:"data"},"Data"),(0,j.kt)("p",null,"The data used by Nirvana is publicly available, however some data sources have special restrictions on use by non-academic entities."),(0,j.kt)("h2",{id:"nirvana-team"},"Nirvana Team"),(0,j.kt)("h3",{id:"active-team"},"Active Team"),(0,j.kt)("p",null,"The Nirvana team works on the core functionality, AWS annotation services, in addition to keeping the annotation data sources up-to-date."),(0,j.kt)("p",null,"Current members of the Nirvana team are listed in alphabetical order below."),(0,j.kt)("div",{className:"row"},(0,j.kt)(o,{name:"Haochen Li",githubUrl:"https://github.com/haochenl",mdxType:"TeamProfileCardCol"},"Active developer. Detail-oriented quick thinker that keeps cool even in the most stressful situations."),(0,j.kt)(o,{name:"Michael Str\xf6mberg",githubUrl:"https://github.com/MichaelStromberg",mdxType:"TeamProfileCardCol"},"Nirvana founder and now ever grateful Nirvana cheerleader to those who actually write code for it."),(0,j.kt)(o,{name:"Rajat Shuvro Roy",githubUrl:"https://github.com/rajatshuvro",mdxType:"TeamProfileCardCol"},"Lead developer. Loves to speed up things and make services available to all interested users.")),(0,j.kt)("h3",{id:"honorary-alumni"},"Honorary Alumni"),(0,j.kt)("p",null,"Nirvana would never be what it is today without the huge contributions from these folks who have moved on to bigger and greater things."),(0,j.kt)("div",{className:"row"},(0,j.kt)(o,{name:"Julien Lajugie",githubUrl:"https://github.com/JulienLajugie",mdxType:"TeamProfileCardCol"},"Julien is a legend around these parts. When he's not taking down opponents in Taekwondo or melting riffs in his rock band, he's demolishing bugs and making the world a better place."),(0,j.kt)(o,{name:"Shuli Kang",githubUrl:"https://github.com/shulik7",mdxType:"TeamProfileCardCol"},"Oncology bioinformatician from USC before joining our team at Illumina. Now working as a Senior Bioinformatics Scientist at Novartis Gene Therapies."),(0,j.kt)(o,{name:"Yu Jiang",githubUrl:"https://github.com/yujiang02",mdxType:"TeamProfileCardCol"},"Biostatistics genius from Duke University before joining our team at Illumina. Now working as a Research Engineer at Facebook AI Research.")))}y.isMDXComponent=!0},820:(M,L,t)=>{t.d(L,{Z:()=>i});const i=""},87979:(M,L,t)=>{t.d(L,{Z:()=>i});const i=t.p+"assets/images/SupplementaryAnnotations-d43d3f1c837f9b80fab530432e0e4b1d.svg"},55579:(M,L,t)=>{t.d(L,{Z:()=>i});const i=t.p+"assets/images/TranscriptConsequences-60ca1c43a36dacf896fecdabf09ce02c.svg"}}]); \ No newline at end of file diff --git a/assets/js/d2bc5330.64169538.js b/assets/js/d2bc5330.64169538.js deleted file mode 100644 index 0692db1e..00000000 --- a/assets/js/d2bc5330.64169538.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8678],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>v});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},p=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},m="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,l=e.originalType,s=e.parentName,p=o(e,["components","mdxType","originalType","parentName"]),m=c(n),u=r,v=m["".concat(s,".").concat(u)]||m[u]||d[u]||l;return n?a.createElement(v,i(i({ref:t},p),{},{components:n})):a.createElement(v,i({ref:t},p))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var l=n.length,i=new Array(l);i[0]=u;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[m]="string"==typeof e?e:r,i[1]=o;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const l={title:"Variant IDs"},i=void 0,o={unversionedId:"core-functionality/variant-ids",id:"version-3.14/core-functionality/variant-ids",title:"Variant IDs",description:"Overview",source:"@site/versioned_docs/version-3.14/core-functionality/variant-ids.md",sourceDirName:"core-functionality",slug:"/core-functionality/variant-ids",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/core-functionality/variant-ids",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/core-functionality/variant-ids.md",tags:[],version:"3.14",frontMatter:{title:"Variant IDs"},sidebar:"version-3.14/docs",previous:{title:"Custom Annotations",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/file-formats/custom-annotations"},next:{title:"Gene Fusion Detection",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/core-functionality/gene-fusions"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Small Variants",id:"small-variants",children:[{value:"VCF Examples",id:"vcf-examples",children:[],level:3},{value:"Format",id:"format",children:[],level:3},{value:"VID Examples",id:"vid-examples",children:[],level:3}],level:2},{value:"Translocation Breakends",id:"translocation-breakends",children:[{value:"VCF Example",id:"vcf-example",children:[],level:3},{value:"Format",id:"format-1",children:[],level:3},{value:"VID Example",id:"vid-example",children:[],level:3}],level:2},{value:"All Other Structural Variants",id:"all-other-structural-variants",children:[{value:"VCF Examples",id:"vcf-examples-1",children:[],level:3},{value:"Format",id:"format-2",children:[],level:3},{value:"VID Examples",id:"vid-examples-1",children:[],level:3}],level:2}],c={toc:s},p="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"Many downstream tools use a variant identifier to store annotation results. We've standardized on using variant identifiers (VIDs) that originated from the notation used by the Broad Institute."),(0,r.kt)("p",null,"The Broad VID scheme is not only simple, but it has the advantage that a user could create a bare bones VCF entry from the information captured in the identifier. One of the limitations of the Broad VID scheme is that it does not define how to handle structural variants. Our VID scheme attempts to fill that gap."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Conventions")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("ul",{parentName:"div"},(0,r.kt)("li",{parentName:"ul"},"all chromosomes use Ensembl style notation (i.e. 22 instead of chr22)"),(0,r.kt)("li",{parentName:"ul"},"for a reference variant (i.e. no alt allele), replace the period (.) with the reference base"),(0,r.kt)("li",{parentName:"ul"},"padding bases are used, neither the reference nor alternate allele can be empty"),(0,r.kt)("li",{parentName:"ul"},"some large variant callers lazily output N for the reference allele. If this is the case, replace it with the true reference base")))),(0,r.kt)("h2",{id:"small-variants"},"Small Variants"),(0,r.kt)("h3",{id:"vcf-examples"},"VCF Examples"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 66507 . T A 184.45 PASS .\nchr1 66521 . T TATATA 144.53 PASS .\nchr1 66572 . GTA G,GTACTATATATTATA 45.45 PASS .\n")),(0,r.kt)("h3",{id:"format"},"Format"),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},"chromosome"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"reference allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"alternate allele")),(0,r.kt)("h3",{id:"vid-examples"},"VID Examples"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"1-66507-T-A"),(0,r.kt)("li",{parentName:"ul"},"1-66521-T-TATATA"),(0,r.kt)("li",{parentName:"ul"},"1-66572-GTA-G"),(0,r.kt)("li",{parentName:"ul"},"1-66572-G-GTACTATATATTA")),(0,r.kt)("h2",{id:"translocation-breakends"},"Translocation Breakends"),(0,r.kt)("h3",{id:"vcf-example"},"VCF Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 2617277 . A AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[ . PASS SVTYPE=BND\n")),(0,r.kt)("h3",{id:"format-1"},"Format"),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},"chromosome"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"reference allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"alternate allele")),(0,r.kt)("h3",{id:"vid-example"},"VID Example"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"1-2617277-A-AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911[")),(0,r.kt)("h2",{id:"all-other-structural-variants"},"All Other Structural Variants"),(0,r.kt)("h3",{id:"vcf-examples-1"},"VCF Examples"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 1000 . G . PASS END=3001000;SVTYPE=ROH\nchr1 1350082 . G . PASS END=1351320;SVTYPE=DEL\nchr1 1477854 . C . PASS END=1477984;SVTYPE=DUP\nchr1 1477968 . T . PASS END=1477968;SVTYPE=INS\nchr1 1715898 . N . PASS SVTYPE=CNV;END=1750149\nchr1 2650426 . N . PASS SVTYPE=CNV;END=2653074\nchr2 321682 . T . PASS SVTYPE=INV;END=421681\nchr20 2633403 . G . PASS END=2633421\n")),(0,r.kt)("h3",{id:"format-2"},"Format"),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},"chromosome"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"end position"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"reference allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"alternate allele"),"\u2014",(0,r.kt)("inlineCode",{parentName:"p"},"SVTYPE")),(0,r.kt)("h3",{id:"vid-examples-1"},"VID Examples"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"1-1000-3001000-G-","<","ROH",">","-ROH"),(0,r.kt)("li",{parentName:"ul"},"1-1350082-1351320-G-","<","DEL",">","-DEL"),(0,r.kt)("li",{parentName:"ul"},"1-1477854-1477984-C-","<","DUP:TANDEM",">","-DUP"),(0,r.kt)("li",{parentName:"ul"},"1-1477968-1477968-T-","<","INS",">","-INS"),(0,r.kt)("li",{parentName:"ul"},"1-1715898-1750149-A-","<","DUP",">","-CNV ",(0,r.kt)("strong",{parentName:"li"},"(replace the N with A)")),(0,r.kt)("li",{parentName:"ul"},"1-2650426-2653074-N-","<","DEL",">","-CNV ",(0,r.kt)("strong",{parentName:"li"},"(keep the N)")),(0,r.kt)("li",{parentName:"ul"},"2-321682-421681-T-","<","INV",">","-INV"),(0,r.kt)("li",{parentName:"ul"},"20-2633403-2633421-G-","<","STR2",">","-STR")))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/d4c3815d.0c7d632f.js b/assets/js/d4c3815d.0c7d632f.js deleted file mode 100644 index 8bfc79ed..00000000 --- a/assets/js/d4c3815d.0c7d632f.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2472],{3905:(e,n,t)=>{t.d(n,{Zo:()=>p,kt:()=>g});var a=t(67294);function i(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function r(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function o(e){for(var n=1;n=0||(i[t]=e[t]);return i}(e,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(i[t]=e[t])}return i}var l=a.createContext({}),c=function(e){var n=a.useContext(l),t=n;return e&&(t="function"==typeof e?e(n):o(o({},n),e)),t},p=function(e){var n=c(e.components);return a.createElement(l.Provider,{value:n},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},u=a.forwardRef((function(e,n){var t=e.components,i=e.mdxType,r=e.originalType,l=e.parentName,p=s(e,["components","mdxType","originalType","parentName"]),d=c(t),u=i,g=d["".concat(l,".").concat(u)]||d[u]||m[u]||r;return t?a.createElement(g,o(o({ref:n},p),{},{components:t})):a.createElement(g,o({ref:n},p))}));function g(e,n){var t=arguments,i=n&&n.mdxType;if("string"==typeof e||i){var r=t.length,o=new Array(r);o[0]=u;var s={};for(var l in n)hasOwnProperty.call(n,l)&&(s[l]=n[l]);s.originalType=e,s[d]="string"==typeof e?e:i,o[1]=s;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>o,default:()=>d,frontMatter:()=>r,metadata:()=>s,toc:()=>l});var a=t(87462),i=(t(67294),t(3905));const r={title:"Gene Fusion Detection"},o=void 0,s={unversionedId:"core-functionality/gene-fusions",id:"version-3.16/core-functionality/gene-fusions",title:"Gene Fusion Detection",description:"Overview",source:"@site/versioned_docs/version-3.16/core-functionality/gene-fusions.md",sourceDirName:"core-functionality",slug:"/core-functionality/gene-fusions",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/core-functionality/gene-fusions",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/core-functionality/gene-fusions.md",tags:[],version:"3.16",frontMatter:{title:"Gene Fusion Detection"},sidebar:"version-3.16/docs",previous:{title:"Canonical Transcripts",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/core-functionality/canonical-transcripts"},next:{title:"MNV Recomposition",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/core-functionality/mnv-recomposition"}},l=[{value:"Overview",id:"overview",children:[],level:2},{value:"Approach",id:"approach",children:[{value:"Variant Types",id:"variant-types",children:[],level:3},{value:"Criteria",id:"criteria",children:[],level:3}],level:2},{value:"ETV6/RUNX1 Example",id:"etv6runx1-example",children:[{value:"VCF",id:"vcf",children:[],level:3},{value:"JSON Output",id:"json-output",children:[{value:"Gene Fusion Data Sources",id:"gene-fusion-data-sources",children:[],level:4},{value:"Consequences",id:"consequences",children:[],level:4},{value:"Gene Fusions Section",id:"gene-fusions-section",children:[],level:4}],level:3}],level:2}],c={toc:l},p="wrapper";function d(e){let{components:n,...r}=e;return(0,i.kt)(p,(0,a.Z)({},c,r,{components:n,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"Gene fusions often result from large genomic rearrangements such as structural variants. While WGS secondary analysis pipelines typically contain alignment and variant calling stages, very few of them contain dedicated gene fusion callers. When they are included, they are usually associated with RNA-Seq pipelines where gene fusions can be readily observed."),(0,i.kt)("p",null,"Since gene fusions are frequently observed in cancer and since many sequencing experiments do not include paired RNA-Seq data, we have added gene fusion detection and annotation to Nirvana."),(0,i.kt)("p",null,"The rich diversity in gene fusion architectures and their likely mechanisms can be seen below:"),(0,i.kt)("p",null,(0,i.kt)("img",{src:t(90306).Z})),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Kumar-Sinha, C., Kalyana-Sundaram, S. & Chinnaiyan, A.M. ",(0,i.kt)("a",{parentName:"p",href:"https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-015-0252-1"},"Landscape of gene fusions in epithelial cancers: seq and ye shall find"),". Genome Med 7, 129 (2015)"))),(0,i.kt)("h2",{id:"approach"},"Approach"),(0,i.kt)("p",null,"Nirvana uses structural variant calls to evaluate if they form either putative intra-chromosomal or inter-chromosomal gene fusions. Let's consider two transcripts, ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_014206.3")," (",(0,i.kt)("strong",{parentName:"p"},"TMEM258"),") and ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_013402.4")," (",(0,i.kt)("strong",{parentName:"p"},"FADS1"),"). Both of these genes are on the reverse strand in the genome. The vertical bar indicates the breakpoint where these transcripts are fused:"),(0,i.kt)("p",null,(0,i.kt)("img",{alt:"TMEM258 & FADS1 transcripts",src:t(31195).Z})),(0,i.kt)("p",null,"The above explains where the transcripts are fused together, but it doesn't explain in which orientation. By using the directionality encoded in the translocation breakend, we can rearrange these two transcripts in four ways:"),(0,i.kt)("p",null,(0,i.kt)("img",{alt:"TMEM258 & FADS1 gene fusions",src:t(72746).Z})),(0,i.kt)("p",null,"Only two of the combinations yields a fusion contains both the transcription start site (TSS) and the stop codon. In one case, we can even detect an in-frame gene fusion."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Interpreting translocation breakends")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"At first glance, translocation breakends are a bit daunting. However, once you understand how they work, they're actually quite simple. For more information, we recommend reading section 5.4 in the ",(0,i.kt)("a",{parentName:"p",href:"https://samtools.github.io/hts-specs/VCFv4.2.pdf"},"VCF 4.2 specification"),"."),(0,i.kt)("table",{parentName:"div"},(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"REF"),(0,i.kt)("th",{parentName:"tr",align:"left"},"ALT"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Meaning"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"t[p["),(0,i.kt)("td",{parentName:"tr",align:"left"},"piece extending to the right of p is joined after t")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"t]p]"),(0,i.kt)("td",{parentName:"tr",align:"left"},"reverse comp piece extending left of p is joined after t")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"]p]t"),(0,i.kt)("td",{parentName:"tr",align:"left"},"piece extending to the left of p is joined before t")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"[p[t"),(0,i.kt)("td",{parentName:"tr",align:"left"},"reverse comp piece extending right of p is joined before t")))))),(0,i.kt)("h3",{id:"variant-types"},"Variant Types"),(0,i.kt)("p",null,"Specifically we can identify gene fusions from the following structural variant types:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"deletions (",(0,i.kt)("inlineCode",{parentName:"li"},""),")"),(0,i.kt)("li",{parentName:"ul"},"tandem_duplications (",(0,i.kt)("inlineCode",{parentName:"li"},""),")"),(0,i.kt)("li",{parentName:"ul"},"inversions (",(0,i.kt)("inlineCode",{parentName:"li"},""),")"),(0,i.kt)("li",{parentName:"ul"},"translocation breakpoints (",(0,i.kt)("inlineCode",{parentName:"li"},"AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911["),") ")),(0,i.kt)("h3",{id:"criteria"},"Criteria"),(0,i.kt)("p",null,"The following criteria must be met for Nirvana to identify a gene fusion:"),(0,i.kt)("ol",null,(0,i.kt)("li",{parentName:"ol"},"After accounting for gene orientation and genomic rearrangements, both transcripts must have the same orientation"),(0,i.kt)("li",{parentName:"ol"},"Both transcripts must be from the same transcript source (i.e. we won't mix and match between RefSeq and Ensembl transcripts)"),(0,i.kt)("li",{parentName:"ol"},"Both transcripts must belong to different genes"),(0,i.kt)("li",{parentName:"ol"},"Both transcripts cannot have a coding region that already overlaps without the variant (i.e. in cases where two genes naturally overlap, we don't want to call a gene fusion)")),(0,i.kt)("h2",{id:"etv6runx1-example"},"ETV6/RUNX1 Example"),(0,i.kt)("p",null,"ETV6/RUNX1 is the most common gene fusion in childhood B-cell precursor acute lymphoblastic leukemia (ALL). Patients with this translocation are associated with a good prognosis and excellent response to treatment."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Sun C., Chang L., Zhu X. ",(0,i.kt)("a",{parentName:"p",href:"https://www.oncotarget.com/article/16367/text/"},"Pathogenesis of ETV6/RUNX1-positive childhood acute lymphoblastic leukemia and mechanisms underlying its relapse"),". Oncotarget. 2017; 8: 35445-35459"))),(0,i.kt)("h3",{id:"vcf"},"VCF"),(0,i.kt)("p",null,"Here's a simplified representation of the translocation breakends called by the Manta structural variant caller:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\nchr12 12026270 . C [chr21:36420865[C . PASS SVTYPE=BND\nchr12 12026305 . A A]chr21:36420571] . PASS SVTYPE=BND\nchr21 36420571 . C C]chr12:12026305] . PASS SVTYPE=BND\nchr21 36420865 . C [chr12:12026270[C . PASS SVTYPE=BND\n")),(0,i.kt)("p",null,"When you put these calls together, the resulting genomic rearrangement looks something like this:"),(0,i.kt)("p",null,(0,i.kt)("img",{src:t(57062).Z})),(0,i.kt)("h3",{id:"json-output"},"JSON Output"),(0,i.kt)("p",null,"The annotation for the first variant in the VCF looks like this:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{139,141-198,211,213-222}","{139,141-198,211,213-222}":!0},'{\n "chromosome": "chr12",\n "position": 12026270,\n "refAllele": "C",\n "altAlleles": [\n "[chr21:36420865[C"\n ],\n "filters": [\n "PASS"\n ],\n "cytogeneticBand": "12p13.2",\n "clingen": [\n {\n "chromosome": "12",\n "begin": 173786,\n "end": 34835837,\n "variantType": "copy_number_gain",\n "id": "nsv995956",\n "clinicalInterpretation": "pathogenic",\n "phenotypes": [\n "Decreased calvarial ossification",\n "Delayed gross motor development",\n "Feeding difficulties",\n "Frontal bossing",\n "Morphological abnormality of the central nervous system",\n "Patchy alopecia"\n ],\n "phenotypeIds": [\n "HP:0002007",\n "HP:0002011",\n "HP:0002194",\n "HP:0002232",\n "HP:0005474",\n "HP:0011968",\n "MedGen:C0232466",\n "MedGen:C1862862",\n "MedGen:CN001816",\n "MedGen:CN001820",\n "MedGen:CN001989",\n "MedGen:CN004852"\n ],\n "observedGains": 1,\n "validated": true\n }\n ],\n "variants": [\n {\n "vid": "12-12026270-C-[chr21:36420865[C",\n "chromosome": "chr12",\n "begin": 12026270,\n "end": 12026270,\n "isStructuralVariant": true,\n "refAllele": "C",\n "altAllele": "[chr21:36420865[C",\n "variantType": "translocation_breakend",\n "cosmicGeneFusions": [\n {\n "id": "COSF2245",\n "numSamples": 249,\n "geneSymbols": [\n "ETV6",\n "RUNX1"\n ],\n "hgvsr": "ENST00000396373.4(ETV6):r.1_1283::ENST00000300305.3(RUNX1):r.504_6222",\n "histologies": [\n {\n "name": "acute lymphoblastic B cell leukaemia",\n "numSamples": 169\n },\n {\n "name": "acute lymphoblastic leukaemia",\n "numSamples": 80\n }\n ],\n "sites": [\n {\n "name": "haematopoietic and lymphoid tissue",\n "numSamples": 249\n }\n ],\n "pubMedIds": [\n 7761424,\n 7780150,\n 8609706,\n 8751464,\n 8982044,\n 9067587,\n 9207408,\n 9226156,\n 9628428,\n 10463610,\n 10774753,\n 11091202,\n 12621238,\n 12661004,\n 12750722,\n 15104290,\n 15642392,\n 24557455,\n 26925663\n ]\n }\n ],\n "fusionCatcher": [\n {\n "genes": {\n "first": {\n "hgnc": "ETV6",\n "isOncogene": true\n },\n "second": {\n "hgnc": "RUNX1",\n "isOncogene": true\n }\n },\n "somaticSources": [\n "DepMap CCLE",\n "Cancer Genome Project",\n "ChimerKB 4.0",\n "ChimerPub 4.0",\n "ChimerSeq 4.0",\n "Known",\n "Mitelman DB",\n "OncoKB",\n "TICdb"\n ]\n }\n ],\n "transcripts": [\n {\n "transcript": "ENST00000396373.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "introns": "5/7",\n "geneId": "ENSG00000139083",\n "hgnc": "ETV6",\n "consequence": [\n "transcript_variant",\n "unidirectional_gene_fusion"\n ],\n "geneFusions": [\n {\n "transcript": "ENST00000437180.1",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000437180.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"\n },\n {\n "transcript": "ENST00000300305.3",\n "bioType": "protein_coding",\n "intron": 1,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000300305.3(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"\n },\n {\n "transcript": "ENST00000482318.1",\n "bioType": "nonsense_mediated_decay",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000482318.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"\n },\n {\n "transcript": "ENST00000486278.2",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000486278.2(RUNX1):r.?_-15+274::ENST00000396373.4(ETV6):r.1009+3367_?"\n },\n {\n "transcript": "ENST00000455571.1",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000455571.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"\n },\n {\n "transcript": "ENST00000475045.2",\n "bioType": "protein_coding",\n "intron": 11,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000475045.2(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"\n },\n {\n "transcript": "ENST00000416754.1",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000416754.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"\n }\n ],\n "isCanonical": true,\n "proteinId": "ENSP00000379658.3"\n },\n {\n "transcript": "NM_001987.4",\n "source": "RefSeq",\n "bioType": "protein_coding",\n "introns": "5/7",\n "geneId": "2120",\n "hgnc": "ETV6",\n "consequence": [\n "transcript_variant",\n "unidirectional_gene_fusion"\n ],\n "geneFusions": [\n {\n "transcript": "NM_001754.4",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "861",\n "hgnc": "RUNX1",\n "hgvsr": "NM_001754.4(RUNX1):r.?_58+274::NM_001987.4(ETV6):r.1009+3367_?"\n }\n ],\n "isCanonical": true,\n "proteinId": "NP_001978.1"\n }\n ]\n }\n ]\n}\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"transcript"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"transcript ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"bioType"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"descriptions of the ",(0,i.kt)("a",{parentName:"td",href:"https://uswest.ensembl.org/info/genome/genebuild/biotypes.html"},"biotypes from Ensembl"))),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"exon"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"},"exon that contained fusion breakpoint")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"intron"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"},"intron that contained fusion breakpoint")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"geneId"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"gene ID. e.g. ENSG00000116062")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hgvsr"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"HGVS RNA nomenclature")))),(0,i.kt)("h4",{id:"gene-fusion-data-sources"},"Gene Fusion Data Sources"),(0,i.kt)("p",null,"To provide more context to our gene fusions, we provide the following gene fusion data sources:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"../data-sources/cosmic"},"COSMIC")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"../data-sources/fusioncatcher"},"FusionCatcher"))),(0,i.kt)("h4",{id:"consequences"},"Consequences"),(0,i.kt)("p",null,"When a gene fusion is identified, we add the following Sequence Ontology consequence:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{3}","{3}":!0},' "consequence": [\n "transcript_variant",\n "unidirectional_gene_fusion"\n ],\n')),(0,i.kt)("h4",{id:"gene-fusions-section"},"Gene Fusions Section"),(0,i.kt)("p",null,"The ",(0,i.kt)("inlineCode",{parentName:"p"},"geneFusions")," section is contained within the object of the originating transcript. It will contain all the pairwise gene fusions that obey the criteria outline above. In the case of ",(0,i.kt)("inlineCode",{parentName:"p"},"ENST00000396373.4"),", there 7 other Ensembl transcripts that would produce a gene fusion. For ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001987.4"),", there was only one transcript (",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001754.4"),") that produce a gene fusion."),(0,i.kt)("p",null,"For each originating transcript, we report the following for each partner transcript:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"transcript ID"),(0,i.kt)("li",{parentName:"ul"},"gene ID"),(0,i.kt)("li",{parentName:"ul"},"HGNC gene symbol"),(0,i.kt)("li",{parentName:"ul"},"transcript bio type (e.g. protein_coding)"),(0,i.kt)("li",{parentName:"ul"},"intron or exon number containing the breakpoint"),(0,i.kt)("li",{parentName:"ul"},"HGVS RNA notation")),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Before Nirvana 3.15, we provided HGVS coding notation. However, HGVS r. notation is more appropriate for these types fusion splicing events (see ",(0,i.kt)("a",{parentName:"p",href:"https://varnomen.hgvs.org/bg-material/consultation/svd-wg007"},"HGVS SVD-WG007"),")."))),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{8}","{8}":!0},' "geneFusions": [\n {\n "transcript": "NM_001754.4",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "861",\n "hgnc": "RUNX1",\n "hgvsr": "NM_001754.4(RUNX1):r.?_58+274::NM_001987.4(ETV6):r.1009+3367_?"\n }\n ],\n')),(0,i.kt)("p",null,"The HGVS RNA notation above indicates that the gene fusion starts with ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001754.4")," (RUNX1) until CDS position 58 and continues with ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001987.4")," (ETV6). ",(0,i.kt)("inlineCode",{parentName:"p"},"1009+3367")," indicates that the fusion occurred 3367 bp within intron 2."))}d.isMDXComponent=!0},72746:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/TMEM258_FADS1_GeneFusions-e5e3758ea9d2c07d3591e3801b2bf7e3.svg"},31195:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/TMEM258_FADS1_Transcripts-fe1b9c6be1f7cbfefbce887f8cec5d58.svg"},57062:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/etv6-runx1-fusion-ec8f4312c9aca496bde0d6e2b1bbd50d.svg"},90306:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/gene-fusions-fig2-1cce8ac31b00465c8d36bdc47ec3309e.svg"}}]); \ No newline at end of file diff --git a/assets/js/d6dedfc9.c3b18bac.js b/assets/js/d6dedfc9.c3b18bac.js deleted file mode 100644 index f20d3f1f..00000000 --- a/assets/js/d6dedfc9.c3b18bac.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[228,1966],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>h});var o=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function a(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);t&&(o=o.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,o)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);for(o=0;o=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=o.createContext({}),p=function(e){var t=o.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=p(e.components);return o.createElement(s.Provider,{value:t},e.children)},d="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return o.createElement(o.Fragment,{},t)}},m=o.forwardRef((function(e,t){var n=e.components,r=e.mdxType,a=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),d=p(n),m=r,h=d["".concat(s,".").concat(m)]||d[m]||u[m]||a;return n?o.createElement(h,i(i({ref:t},c),{},{components:n})):o.createElement(h,i({ref:t},c))}));function h(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var a=n.length,i=new Array(a);i[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[d]="string"==typeof e?e:r,i[1]=l;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>d,frontMatter:()=>a,metadata:()=>l,toc:()=>s});var o=n(87462),r=(n(67294),n(3905));const a={},i=void 0,l={unversionedId:"data-sources/phylop-json",id:"version-3.16/data-sources/phylop-json",title:"phylop-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/phylop-json.md",sourceDirName:"data-sources",slug:"/data-sources/phylop-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/phylop-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/phylop-json.md",tags:[],version:"3.16",frontMatter:{}},s=[],p={toc:s},c="wrapper";function d(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,o.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"variants":[\n {\n "vid":"2:48010488:A",\n "chromosome":"chr2",\n "begin":48010488,\n "end":48010488,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "phylopScore":0.459\n }\n] \n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phylopScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: -14.08 to 6.424")))))}d.isMDXComponent=!0},87985:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>i,metadata:()=>s,toc:()=>p});var o=n(87462),r=(n(67294),n(3905)),a=n(18301);const i={title:"PhyloP"},l=void 0,s={unversionedId:"data-sources/phylop",id:"version-3.16/data-sources/phylop",title:"PhyloP",description:"Overview",source:"@site/versioned_docs/version-3.16/data-sources/phylop.mdx",sourceDirName:"data-sources",slug:"/data-sources/phylop",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/phylop",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/phylop.mdx",tags:[],version:"3.16",frontMatter:{title:"PhyloP"},sidebar:"version-3.16/docs",previous:{title:"OMIM",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/omim"},next:{title:"Primate AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/primate-ai"}},p=[{value:"Overview",id:"overview",children:[],level:2},{value:"WigFix File",id:"wigfix-file",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:p},d="wrapper";function u(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,o.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"PhyloP (phylogenetic p-values) conservation scores are obtained from the ","[PHAST package]"," (",(0,r.kt)("a",{parentName:"p",href:"http://compgen.bscb.cornell.edu/phast/"},"http://compgen.bscb.cornell.edu/phast/"),") for multiple alignments of vertebrate genomes to the human genome. For GRCh38, the multiple alignments are against 19 mammals and for GRCh37, it is against 45 vertebrate genomes."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Siepel A, Bejerano G, Pedersen JS, Hinrichs AS, Hou M, Rosenbloom K, Clawson H, Spieth J, Hillier LW, Richards S, et al. Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. ",(0,r.kt)("strong",{parentName:"p"},"Genome Res. 2005")," Aug;15(8):1034-50. (",(0,r.kt)("a",{parentName:"p",href:"http://www.genome.org/cgi/doi/10.1101/gr.3715005"},"http://www.genome.org/cgi/doi/10.1101/gr.3715005"),")"))),(0,r.kt)("h2",{id:"wigfix-file"},"WigFix File"),(0,r.kt)("p",null,"The data is provided in WigFix files which is a text file that provides conservation scores for contiguous intervals in the following format:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"fixedStep chrom=chr1 start=10918 step=1\n0.064\n0.058\n0.064\n0.058\n0.064\n0.064\nfixedStep chrom=chr1 start=34045 step=1\n0.111\n0.100\n0.111\n0.111\n0.100\n0.111\n0.111\n0.111\n0.100\n0.111\n-1.636\n")),(0,r.kt)("p",null,"We convert them to binary files with indexes for fast query. Note that these are scores for genomic positions and are reported only for SNVs."),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,"GRCh37: ",(0,r.kt)("a",{parentName:"p",href:"http://hgdownload.cse.ucsc.edu/goldenpath/hg19/phyloP46way/vertebrate/"},"http://hgdownload.cse.ucsc.edu/goldenpath/hg19/phyloP46way/vertebrate/")),(0,r.kt)("p",null,"GRCh38: ",(0,r.kt)("a",{parentName:"p",href:"http://hgdownload.cse.ucsc.edu/goldenPath/hg38/phyloP20way/"},"http://hgdownload.cse.ucsc.edu/goldenPath/hg38/phyloP20way/")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)("p",null,"Unlike other supplemetary datasources, phyloP scores are reported in the variants section."),(0,r.kt)(a.default,{mdxType:"JSON"}))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/da04ca12.eb25b4f1.js b/assets/js/da04ca12.eb25b4f1.js deleted file mode 100644 index 1f9bf6a1..00000000 --- a/assets/js/da04ca12.eb25b4f1.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6116],{24807:e=>{e.exports=JSON.parse('{"pluginId":"default","version":"3.2.5","label":"3.2.5","banner":"unmaintained","badge":true,"className":"docs-version-3.2.5","isLast":false,"docsSidebars":{"version-3.2.5/docs":[{"collapsed":true,"type":"category","label":"Introduction","items":[{"type":"link","label":"Introduction","href":"/IlluminaConnectedAnnotationsDocumentation/3.2.5/","docId":"introduction/introduction"},{"type":"link","label":"Dependencies","href":"/IlluminaConnectedAnnotationsDocumentation/3.2.5/introduction/dependencies","docId":"introduction/dependencies"},{"type":"link","label":"Getting Started","href":"/IlluminaConnectedAnnotationsDocumentation/3.2.5/introduction/getting-started","docId":"introduction/getting-started"}],"collapsible":true},{"collapsed":true,"type":"category","label":"Data Sources","items":[{"type":"link","label":"1000 Genomes","href":"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/1000Genomes","docId":"data-sources/1000Genomes"},{"type":"link","label":"ClinVar","href":"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/clinvar","docId":"data-sources/clinvar"},{"type":"link","label":"dbSNP","href":"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/dbsnp","docId":"data-sources/dbsnp"},{"type":"link","label":"gnomAD","href":"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/gnomad","docId":"data-sources/gnomad"}],"collapsible":true},{"collapsed":true,"type":"category","label":"File Formats","items":[{"type":"link","label":"Nirvana JSON File Format","href":"/IlluminaConnectedAnnotationsDocumentation/3.2.5/file-formats/nirvana-json-file-format","docId":"file-formats/nirvana-json-file-format"}],"collapsible":true},{"collapsed":true,"type":"category","label":"Core Functionality","items":[{"type":"link","label":"Variant IDs","href":"/IlluminaConnectedAnnotationsDocumentation/3.2.5/core-functionality/variant-ids","docId":"core-functionality/variant-ids"},{"type":"link","label":"Gene Fusion Detection","href":"/IlluminaConnectedAnnotationsDocumentation/3.2.5/core-functionality/gene-fusions","docId":"core-functionality/gene-fusions"}],"collapsible":true}]},"docs":{"core-functionality/gene-fusions":{"id":"core-functionality/gene-fusions","title":"Gene Fusion Detection","description":"Overview","sidebar":"version-3.2.5/docs"},"core-functionality/variant-ids":{"id":"core-functionality/variant-ids","title":"Variant IDs","description":"Overview","sidebar":"version-3.2.5/docs"},"data-sources/1000Genomes":{"id":"data-sources/1000Genomes","title":"1000 Genomes","description":"Overview","sidebar":"version-3.2.5/docs"},"data-sources/1000Genomes-snv-json":{"id":"data-sources/1000Genomes-snv-json","title":"1000Genomes-snv-json","description":"| Field | Type | Notes |"},"data-sources/1000Genomes-sv-json":{"id":"data-sources/1000Genomes-sv-json","title":"1000Genomes-sv-json","description":"| Field | Type | Notes |"},"data-sources/clinvar":{"id":"data-sources/clinvar","title":"ClinVar","description":"Overview","sidebar":"version-3.2.5/docs"},"data-sources/clinvar-json":{"id":"data-sources/clinvar-json","title":"clinvar-json","description":"| Field | Type | Notes |"},"data-sources/dbsnp":{"id":"data-sources/dbsnp","title":"dbSNP","description":"Overview","sidebar":"version-3.2.5/docs"},"data-sources/dbsnp-json":{"id":"data-sources/dbsnp-json","title":"dbsnp-json","description":"| Field | Type | Notes |"},"data-sources/gnomad":{"id":"data-sources/gnomad","title":"gnomAD","description":"Overview","sidebar":"version-3.2.5/docs"},"data-sources/gnomad-exomes-small-variants-json":{"id":"data-sources/gnomad-exomes-small-variants-json","title":"gnomad-exomes-small-variants-json","description":"| Field | Type | Notes |"},"data-sources/gnomad-genomes-small-variants-json":{"id":"data-sources/gnomad-genomes-small-variants-json","title":"gnomad-genomes-small-variants-json","description":"| Field | Type | Notes |"},"file-formats/nirvana-json-file-format":{"id":"file-formats/nirvana-json-file-format","title":"Nirvana JSON File Format","description":"Overview","sidebar":"version-3.2.5/docs"},"introduction/dependencies":{"id":"introduction/dependencies","title":"Dependencies","description":"All of the following dependencies have been included in this repository.","sidebar":"version-3.2.5/docs"},"introduction/getting-started":{"id":"introduction/getting-started","title":"Getting Started","description":"Nirvana is written in C# using .NET Core (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files.","sidebar":"version-3.2.5/docs"},"introduction/introduction":{"id":"introduction/introduction","title":"Introduction","description":"Clinical-grade variant annotation","sidebar":"version-3.2.5/docs"}}}')}}]); \ No newline at end of file diff --git a/assets/js/dc5476b0.85c885a5.js b/assets/js/dc5476b0.85c885a5.js deleted file mode 100644 index da4eade5..00000000 --- a/assets/js/dc5476b0.85c885a5.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9198,8459],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>h});var o=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function a(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);t&&(o=o.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,o)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);for(o=0;o=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=o.createContext({}),p=function(e){var t=o.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=p(e.components);return o.createElement(s.Provider,{value:t},e.children)},d="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return o.createElement(o.Fragment,{},t)}},m=o.forwardRef((function(e,t){var n=e.components,r=e.mdxType,a=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),d=p(n),m=r,h=d["".concat(s,".").concat(m)]||d[m]||u[m]||a;return n?o.createElement(h,i(i({ref:t},c),{},{components:n})):o.createElement(h,i({ref:t},c))}));function h(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var a=n.length,i=new Array(a);i[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[d]="string"==typeof e?e:r,i[1]=l;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>d,frontMatter:()=>a,metadata:()=>l,toc:()=>s});var o=n(87462),r=(n(67294),n(3905));const a={},i=void 0,l={unversionedId:"data-sources/phylop-json",id:"version-3.17/data-sources/phylop-json",title:"phylop-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/phylop-json.md",sourceDirName:"data-sources",slug:"/data-sources/phylop-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/phylop-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/phylop-json.md",tags:[],version:"3.17",frontMatter:{}},s=[],p={toc:s},c="wrapper";function d(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,o.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"variants":[\n {\n "vid":"2:48010488:A",\n "chromosome":"chr2",\n "begin":48010488,\n "end":48010488,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "phylopScore":0.459\n }\n] \n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phylopScore"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: -14.08 to 6.424")))))}d.isMDXComponent=!0},71341:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>u,frontMatter:()=>i,metadata:()=>s,toc:()=>p});var o=n(87462),r=(n(67294),n(3905)),a=n(61689);const i={title:"PhyloP"},l=void 0,s={unversionedId:"data-sources/phylop",id:"version-3.17/data-sources/phylop",title:"PhyloP",description:"Overview",source:"@site/versioned_docs/version-3.17/data-sources/phylop.mdx",sourceDirName:"data-sources",slug:"/data-sources/phylop",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/phylop",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/phylop.mdx",tags:[],version:"3.17",frontMatter:{title:"PhyloP"},sidebar:"version-3.17/docs",previous:{title:"OMIM",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/omim"},next:{title:"Primate AI",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/primate-ai"}},p=[{value:"Overview",id:"overview",children:[],level:2},{value:"WigFix File",id:"wigfix-file",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:p},d="wrapper";function u(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,o.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"PhyloP (phylogenetic p-values) conservation scores are obtained from the ","[PHAST package]"," (",(0,r.kt)("a",{parentName:"p",href:"http://compgen.bscb.cornell.edu/phast/"},"http://compgen.bscb.cornell.edu/phast/"),") for multiple alignments of vertebrate genomes to the human genome. For GRCh38, the multiple alignments are against 19 mammals and for GRCh37, it is against 45 vertebrate genomes."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Siepel A, Bejerano G, Pedersen JS, Hinrichs AS, Hou M, Rosenbloom K, Clawson H, Spieth J, Hillier LW, Richards S, et al. Evolutionarily conserved elements in vertebrate, insect, worm, and yeast genomes. ",(0,r.kt)("strong",{parentName:"p"},"Genome Res. 2005")," Aug;15(8):1034-50. (",(0,r.kt)("a",{parentName:"p",href:"http://www.genome.org/cgi/doi/10.1101/gr.3715005"},"http://www.genome.org/cgi/doi/10.1101/gr.3715005"),")"))),(0,r.kt)("h2",{id:"wigfix-file"},"WigFix File"),(0,r.kt)("p",null,"The data is provided in WigFix files which is a text file that provides conservation scores for contiguous intervals in the following format:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"fixedStep chrom=chr1 start=10918 step=1\n0.064\n0.058\n0.064\n0.058\n0.064\n0.064\nfixedStep chrom=chr1 start=34045 step=1\n0.111\n0.100\n0.111\n0.111\n0.100\n0.111\n0.111\n0.111\n0.100\n0.111\n-1.636\n")),(0,r.kt)("p",null,"We convert them to binary files with indexes for fast query. Note that these are scores for genomic positions and are reported only for SNVs."),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,"GRCh37: ",(0,r.kt)("a",{parentName:"p",href:"http://hgdownload.cse.ucsc.edu/goldenpath/hg19/phyloP46way/vertebrate/"},"http://hgdownload.cse.ucsc.edu/goldenpath/hg19/phyloP46way/vertebrate/")),(0,r.kt)("p",null,"GRCh38: ",(0,r.kt)("a",{parentName:"p",href:"http://hgdownload.cse.ucsc.edu/goldenPath/hg38/phyloP20way/"},"http://hgdownload.cse.ucsc.edu/goldenPath/hg38/phyloP20way/")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)("p",null,"Unlike other supplemetary datasources, phyloP scores are reported in the variants section."),(0,r.kt)(a.default,{mdxType:"JSON"}))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/dcd47c64.c08be6b8.js b/assets/js/dcd47c64.c08be6b8.js deleted file mode 100644 index effdb23b..00000000 --- a/assets/js/dcd47c64.c08be6b8.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2973],{3905:(e,t,n)=>{n.d(t,{Zo:()=>m,kt:()=>k});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var p=a.createContext({}),s=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},m=function(e){var t=s(e.components);return a.createElement(p.Provider,{value:t},e.children)},d="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},g=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,p=e.parentName,m=l(e,["components","mdxType","originalType","parentName"]),d=s(n),g=r,k=d["".concat(p,".").concat(g)]||d[g]||c[g]||i;return n?a.createElement(k,o(o({ref:t},m),{},{components:n})):a.createElement(k,o({ref:t},m))}));function k(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=g;var l={};for(var p in t)hasOwnProperty.call(t,p)&&(l[p]=t[p]);l.originalType=e,l[d]="string"==typeof e?e:r,o[1]=l;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>l,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const i={title:"MNV Recomposition"},o=void 0,l={unversionedId:"core-functionality/mnv-recomposition",id:"version-3.17/core-functionality/mnv-recomposition",title:"MNV Recomposition",description:"Overview",source:"@site/versioned_docs/version-3.17/core-functionality/mnv-recomposition.md",sourceDirName:"core-functionality",slug:"/core-functionality/mnv-recomposition",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/core-functionality/mnv-recomposition",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/core-functionality/mnv-recomposition.md",tags:[],version:"3.17",frontMatter:{title:"MNV Recomposition"},sidebar:"version-3.17/docs",previous:{title:"Gene Fusion Detection",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/core-functionality/gene-fusions"},next:{title:"Variant IDs",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/core-functionality/variant-ids"}},p=[{value:"Overview",id:"overview",children:[],level:2},{value:"Criteria",id:"criteria",children:[],level:2},{value:"Examples",id:"examples",children:[{value:"Multiple Samples",id:"multiple-samples",children:[],level:3},{value:"Phase Sets",id:"phase-sets",children:[{value:"Homozygous variants, same phase set",id:"homozygous-variants-same-phase-set",children:[],level:4},{value:"Mixing phased and unphased variants",id:"mixing-phased-and-unphased-variants",children:[],level:4},{value:"Variants in different phase sets",id:"variants-in-different-phase-sets",children:[],level:4},{value:"Unphased homozygous variants",id:"unphased-homozygous-variants",children:[],level:4},{value:"Homozygous variants are not commutative",id:"homozygous-variants-are-not-commutative",children:[],level:4}],level:3},{value:"Conflicting Genotypes",id:"conflicting-genotypes",children:[],level:3}],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],s={toc:p},m="wrapper";function d(e){let{components:t,...i}=e;return(0,r.kt)(m,(0,a.Z)({},s,i,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"Most annotation tools handle variants independently. The problem with this approach is that nearby variants could affect the same codon leading to a very different annotation. For example, consider the following example (Danecek, 2017):"),(0,r.kt)("p",null,(0,r.kt)("img",{src:n(90997).Z})),(0,r.kt)("p",null,"When handled independently, the two variants (C\u2192T & G\u2192A) would be annotated as missense annotations. However, if we consider them together, the resulting MNV would yield a stop gain."),(0,r.kt)("p",null,"By default, Nirvana identifies these types of cases where two or more SNVs would affect the same codon. In addition, it's able to perform this operation on VCFs containing large numbers of samples (we've tested this on 2,500+ samples using the 1000 Genomes Project VCF files)."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Petr Danecek, Shane A McCarthy, ",(0,r.kt)("a",{parentName:"p",href:"https://academic.oup.com/bioinformatics/article-abstract/33/13/2037/3000373"},"BCFtools/csq: haplotype-aware variant consequences"),", Bioinformatics, Volume 33, Issue 13, 1 July 2017, Pages 2037\u20132039"))),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Supported variant types")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"At the moment, ",(0,r.kt)("strong",{parentName:"p"},"Nirvana only supports recomposing multiple SNVs into an MNV"),". The Danecek paper makes a compelling case for supporting frameshifting variants paired with frame-restoring variants. We've also received requests for supporting the recomposition of an SNV with insertions and deletions. While this is something we've looked into, it represents functionality that many of our clinical customers are not yet comfortable with."))),(0,r.kt)("h2",{id:"criteria"},"Criteria"),(0,r.kt)("p",null,"Nirvana will recompose a set of SNVs if two or more SNVs are located in the same codon for any codon in any of the overlapping transcripts."),(0,r.kt)("p",null,"The following criteria must also be met for at least one sample:"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"Genotypes are provided for the VCF variants and all variants are in phase or homozygous variant."),(0,r.kt)("li",{parentName:"ol"},"All the available phase set IDs are the same (homozygous variants are available to all phase sets)"),(0,r.kt)("li",{parentName:"ol"},"The genotype ploidy for all the variants are the same."),(0,r.kt)("li",{parentName:"ol"},"No unsupported variant type (i.e. insertion or deletion) overlaps the recomposed variants"),(0,r.kt)("li",{parentName:"ol"},"The first and last base in at least one of the recomposed alleles must be non-reference.")),(0,r.kt)("h2",{id:"examples"},"Examples"),(0,r.kt)("p",null,"During variant recomposition, if two SNVs affect the same codon, it becomes the seed codon. If there are SNVs in the adjacent codons, they will be aggregated into the seed codon."),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Three SNVs in two adjacent codons. The recomposed alternate allele is ",(0,r.kt)("inlineCode",{parentName:"p"},"ATAG"),":\n",(0,r.kt)("img",{src:n(12481).Z}))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Three SNVs in two adjacent codons (larger distance). The recomposed alternate allele is ",(0,r.kt)("inlineCode",{parentName:"p"},"ATATCC"),":\n",(0,r.kt)("img",{src:n(12527).Z}))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Nirvana can use ",(0,r.kt)("strong",{parentName:"p"},"multiple reading frames")," to aggregate the seed codon. In this example, the seed codon is highlighted in green. If we look at reading frame 1, we see that the T\u2192A variant occurs in the ",(0,r.kt)("inlineCode",{parentName:"p"},"ACT")," codon. The adjacent codon to the left also has a variant C\u2192T. As a result, there can be up to four bases between SNVs when aggregating the flanking codons. The recomposed alternate allele is ",(0,r.kt)("inlineCode",{parentName:"p"},"TTCACATAGCACTCAC"),":\n",(0,r.kt)("img",{src:n(27411).Z}))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("p",{parentName:"li"},"Nothing will be recomposed if there's no seed codon:\n",(0,r.kt)("img",{src:n(65928).Z})))),(0,r.kt)("h3",{id:"multiple-samples"},"Multiple Samples"),(0,r.kt)("p",null,"Recomposing variants while handling multiple samples can be complex. The recomposition criteria described above often leads to sample-specific recomposed variants. Here we show the recomposition of three variants with sample-specific criteria marked in bold:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Sample 1"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Sample 2"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Sample 3"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},(0,r.kt)("strong",{parentName:"td"},"0/1")),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 3"),(0,r.kt)("td",{parentName:"tr",align:"center"},"102"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},(0,r.kt)("strong",{parentName:"td"},".")),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AC"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AG, CG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"."),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","2"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"ACT"),(0,r.kt)("td",{parentName:"tr",align:"center"},"CCT, CCA"),(0,r.kt)("td",{parentName:"tr",align:"center"},"."),(0,r.kt)("td",{parentName:"tr",align:"center"},"."),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","2")))),(0,r.kt)("p",null,"In the example above, the heterozygous genotype in sample 1 at position 101 would prevent the MNVs from being recomposed. Similarly, the unknown genotype for sample 2 at position 102 would produce a smaller MNV than the one expressed for sample 3."),(0,r.kt)("h3",{id:"phase-sets"},"Phase Sets"),(0,r.kt)("h4",{id:"homozygous-variants-same-phase-set"},"Homozygous variants, same phase set"),(0,r.kt)("p",null,"Recomposed phase set becomes ",(0,r.kt)("inlineCode",{parentName:"p"},".")," since homozygous variants belong to all phase sets."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Genotype"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Phase Set"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AC"),(0,r.kt)("td",{parentName:"tr",align:"center"},"TG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")))),(0,r.kt)("h4",{id:"mixing-phased-and-unphased-variants"},"Mixing phased and unphased variants"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Genotype"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Phase Set"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1/1"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AC"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AG,TG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")))),(0,r.kt)("h4",{id:"variants-in-different-phase-sets"},"Variants in different phase sets"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Genotype"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Phase Set"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"890")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AC"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AG,TG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","2"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")))),(0,r.kt)("h4",{id:"unphased-homozygous-variants"},"Unphased homozygous variants"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Genotype"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Phase Set"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1/1"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1/1"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AC"),(0,r.kt)("td",{parentName:"tr",align:"center"},"TG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1/1"),(0,r.kt)("td",{parentName:"tr",align:"center"},".")))),(0,r.kt)("h4",{id:"homozygous-variants-are-not-commutative"},"Homozygous variants are not commutative"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Genotype"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Phase Set"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"C"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Decomposed Variant 3"),(0,r.kt)("td",{parentName:"tr",align:"center"},"102"),(0,r.kt)("td",{parentName:"tr",align:"center"},"G"),(0,r.kt)("td",{parentName:"tr",align:"center"},"T"),(0,r.kt)("td",{parentName:"tr",align:"center"},"0","|","1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"890")))),(0,r.kt)("p",null,"In this example, the homozygous variant at position 101 cannot bridge the gap between other two variants since there could be a switching error between phase sets 567 & 890. As a result, we have to create two overlapping MNVs:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"}),(0,r.kt)("th",{parentName:"tr",align:"center"},"POS"),(0,r.kt)("th",{parentName:"tr",align:"center"},"REF"),(0,r.kt)("th",{parentName:"tr",align:"center"},"ALT"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Genotype"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Phase Set"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant 1"),(0,r.kt)("td",{parentName:"tr",align:"center"},"100"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AC"),(0,r.kt)("td",{parentName:"tr",align:"center"},"AG, TG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"567")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"Recomposed Variant 2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"101"),(0,r.kt)("td",{parentName:"tr",align:"center"},"CG"),(0,r.kt)("td",{parentName:"tr",align:"center"},"GG, GT"),(0,r.kt)("td",{parentName:"tr",align:"center"},"1","|","2"),(0,r.kt)("td",{parentName:"tr",align:"center"},"890")))),(0,r.kt)("h3",{id:"conflicting-genotypes"},"Conflicting Genotypes"),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)("p",null,"Given the following VCF entries:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT S1 S2 S3\nchr1 12861477 . T C . PASS . GT:PS 0/0:. 0/0:. 0|1:12861477\nchr1 12861478 . G A . PASS . GT:PS 0/0:. 0/0:. 0|1:12861477\n")),(0,r.kt)("p",null,"Each original variant would be annotated as usual. The difference is that both will now have a ",(0,r.kt)("inlineCode",{parentName:"p"},"isDecomposedVariant")," flag set to true in addition to an entry in the ",(0,r.kt)("inlineCode",{parentName:"p"},"linkedVids")," field that points to the new MNV:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json",metastring:"{31-34,70-73}","{31-34,70-73}":!0},'{\n "chromosome":"chr1",\n "position":12861477,\n "refAllele":"T",\n "altAlleles":[\n "C"\n ],\n "filters":[\n "PASS"\n ],\n "samples":[\n {\n "genotype":"0/0",\n },\n {\n "genotype":"0/0",\n },\n {\n "genotype":"0|1",\n }\n ],\n "variants":[\n {\n "vid":"1-12861477-T-C",\n "chromosome":"chr1",\n "begin":12861477,\n "end":12861477,\n "refAllele":"T",\n "altAllele":"C",\n "variantType":"SNV",\n "isDecomposedVariant":true,\n "linkedVids":[\n "1-12861477-TG-CA"\n ],\n "hgvsg":"NC_000001.11:g.12861477T>C",\n "transcripts":[ ... ]\n }\n ]\n},\n{\n "chromosome":"chr1",\n "position":12861478,\n "refAllele":"G",\n "altAlleles":[\n "A"\n ],\n "filters":[\n "PASS"\n ],\n "samples":[\n {\n "genotype":"0/0",\n },\n {\n "genotype":"0/0",\n },\n {\n "genotype":"0|1",\n }\n ],\n "variants":[\n {\n "vid":"1-12861478-G-A",\n "chromosome":"chr1",\n "begin":12861478,\n "end":12861478,\n "refAllele":"G",\n "altAllele":"A",\n "variantType":"SNV",\n "isDecomposedVariant":true,\n "linkedVids":[\n "1-12861477-TG-CA"\n ],\n "hgvsg":"NC_000001.11:g.12861478G>A",\n "transcripts":[ ... ]\n }\n ]\n}\n')),(0,r.kt)("p",null,"The recomposed variant gets a separate entry where the ",(0,r.kt)("inlineCode",{parentName:"p"},"isRecomposedVariant")," flag is set to true and the ",(0,r.kt)("inlineCode",{parentName:"p"},"linkedVids")," field links to the constituent SNVs:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json",metastring:"{32-36}","{32-36}":!0},' {\n "chromosome": "chr1",\n "position": 12861477,\n "refAllele": "TG",\n "altAlleles": [\n "CA"\n ],\n "filters": [\n "PASS"\n ],\n "cytogeneticBand": "1p36.21",\n "samples": [\n {\n "genotype": "0|0"\n },\n {\n "genotype": "0|0"\n },\n {\n "genotype": "0|1"\n }\n ],\n "variants": [\n {\n "vid": "1-12861477-TG-CA",\n "chromosome": "chr1",\n "begin": 12861477,\n "end": 12861478,\n "refAllele": "TG",\n "altAllele": "CA",\n "variantType": "MNV",\n "isRecomposedVariant": true,\n "linkedVids": [\n "1-12861477-T-C",\n "1-12861478-G-A"\n ],\n "hgvsg": "NC_000001.11:g.12861477_12861478inv",\n "transcripts":[ ... ]\n ]\n }\n ]\n },\n')),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Recomposed QUAL, FILTER, and GQ")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Although the example above does not demonstrate it, Nirvana tries to set the quality score, filter, and genotype quality (GQ) for the recomposed variant. The QUAL score is calculated to be the ",(0,r.kt)("strong",{parentName:"p"},"minimum")," QUAL score for all the constituent SNVs. The same method is used for the genotype quality (GQ) scores. For the ",(0,r.kt)("inlineCode",{parentName:"p"},"filters")," field, ",(0,r.kt)("inlineCode",{parentName:"p"},"PASS")," will be used if all constituent variants passed their filters, otherwise we set it to ",(0,r.kt)("inlineCode",{parentName:"p"},"FilteredVariantsRecomposed"),"."))))}d.isMDXComponent=!0},90997:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/BCFtools-csq-fig1a-a266b0be1c6d74f085fcacb2f433f750.png"},27411:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/multiple-reading-frames-19e896fe74a8781afdd1fa2539edff88.png"},65928:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/no-recomposition-b63eb855b0ed62b8ae331eafc538223d.png"},12527:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/three-SNVs-larger-separation-85b12d5bafd32ee312103a1b9b588720.png"},12481:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/images/three-SNVs-two-codons-bc45a465809b53d51dbfb32deaa6324a.png"}}]); \ No newline at end of file diff --git a/assets/js/de297997.3756a6e7.js b/assets/js/de297997.3756a6e7.js deleted file mode 100644 index 18ccb495..00000000 --- a/assets/js/de297997.3756a6e7.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7128],{91858:e=>{e.exports=JSON.parse('{"pluginId":"default","version":"3.16","label":"3.16","banner":"unmaintained","badge":true,"className":"docs-version-3.16","isLast":false,"docsSidebars":{"version-3.16/docs":[{"type":"category","label":"Introduction","items":[{"type":"link","label":"Introduction","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/","docId":"introduction/introduction"},{"type":"link","label":"Dependencies","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/introduction/dependencies","docId":"introduction/dependencies"},{"type":"link","label":"Getting Started","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/introduction/getting-started","docId":"introduction/getting-started"},{"type":"link","label":"Parsing Nirvana JSON","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/introduction/parsing-json","docId":"introduction/parsing-json"},{"type":"link","label":"Annotating COVID-19","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/introduction/covid19","docId":"introduction/covid19"}],"collapsible":true,"collapsed":true},{"type":"category","label":"Data Sources","items":[{"type":"link","label":"1000 Genomes","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/1000Genomes","docId":"data-sources/1000Genomes"},{"type":"link","label":"Amino Acid Conservation","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/amino-acid-conservation","docId":"data-sources/amino-acid-conservation"},{"type":"link","label":"ClinGen","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/clingen","docId":"data-sources/clingen"},{"type":"link","label":"ClinVar","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/clinvar","docId":"data-sources/clinvar"},{"type":"link","label":"COSMIC","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/cosmic","docId":"data-sources/cosmic"},{"type":"link","label":"dbSNP","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/dbsnp","docId":"data-sources/dbsnp"},{"type":"link","label":"FusionCatcher","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/fusioncatcher","docId":"data-sources/fusioncatcher"},{"type":"link","label":"gnomAD","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/gnomad","docId":"data-sources/gnomad"},{"type":"link","label":"Mitochondrial Heteroplasmy","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/mito-heteroplasmy","docId":"data-sources/mito-heteroplasmy"},{"type":"link","label":"MITOMAP","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/mitomap","docId":"data-sources/mitomap"},{"type":"link","label":"OMIM","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/omim","docId":"data-sources/omim"},{"type":"link","label":"PhyloP","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/phylop","docId":"data-sources/phylop"},{"type":"link","label":"Primate AI","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/primate-ai","docId":"data-sources/primate-ai"},{"type":"link","label":"REVEL","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/revel","docId":"data-sources/revel"},{"type":"link","label":"Splice AI","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/splice-ai","docId":"data-sources/splice-ai"},{"type":"link","label":"TOPMed","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/topmed","docId":"data-sources/topmed"}],"collapsible":true,"collapsed":true},{"type":"category","label":"File Formats","items":[{"type":"link","label":"Nirvana JSON File Format","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/file-formats/nirvana-json-file-format","docId":"file-formats/nirvana-json-file-format"},{"type":"link","label":"Custom Annotations","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/file-formats/custom-annotations","docId":"file-formats/custom-annotations"}],"collapsible":true,"collapsed":true},{"type":"category","label":"Core Functionality","items":[{"type":"link","label":"Canonical Transcripts","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/core-functionality/canonical-transcripts","docId":"core-functionality/canonical-transcripts"},{"type":"link","label":"Gene Fusion Detection","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/core-functionality/gene-fusions","docId":"core-functionality/gene-fusions"},{"type":"link","label":"MNV Recomposition","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/core-functionality/mnv-recomposition","docId":"core-functionality/mnv-recomposition"},{"type":"link","label":"Variant IDs","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/core-functionality/variant-ids","docId":"core-functionality/variant-ids"}],"collapsible":true,"collapsed":true},{"type":"category","label":"Utilities","items":[{"type":"link","label":"Jasix","href":"/IlluminaConnectedAnnotationsDocumentation/3.16/utilities/jasix","docId":"utilities/jasix"}],"collapsible":true,"collapsed":true}]},"docs":{"core-functionality/canonical-transcripts":{"id":"core-functionality/canonical-transcripts","title":"Canonical Transcripts","description":"Overview","sidebar":"version-3.16/docs"},"core-functionality/gene-fusions":{"id":"core-functionality/gene-fusions","title":"Gene Fusion Detection","description":"Overview","sidebar":"version-3.16/docs"},"core-functionality/mnv-recomposition":{"id":"core-functionality/mnv-recomposition","title":"MNV Recomposition","description":"Overview","sidebar":"version-3.16/docs"},"core-functionality/variant-ids":{"id":"core-functionality/variant-ids","title":"Variant IDs","description":"Overview","sidebar":"version-3.16/docs"},"data-sources/1000Genomes":{"id":"data-sources/1000Genomes","title":"1000 Genomes","description":"Overview","sidebar":"version-3.16/docs"},"data-sources/1000Genomes-snv-json":{"id":"data-sources/1000Genomes-snv-json","title":"1000Genomes-snv-json","description":"| Field | Type | Notes |"},"data-sources/1000Genomes-sv-json":{"id":"data-sources/1000Genomes-sv-json","title":"1000Genomes-sv-json","description":"| Field | Type | Notes |"},"data-sources/amino-acid-conservation":{"id":"data-sources/amino-acid-conservation","title":"Amino Acid Conservation","description":"Overview","sidebar":"version-3.16/docs"},"data-sources/amino-acid-conservation-json":{"id":"data-sources/amino-acid-conservation-json","title":"amino-acid-conservation-json","description":"| Field | Type | Notes |"},"data-sources/clingen":{"id":"data-sources/clingen","title":"ClinGen","description":"Overview","sidebar":"version-3.16/docs"},"data-sources/clingen-dosage-json":{"id":"data-sources/clingen-dosage-json","title":"clingen-dosage-json","description":"| Field | Type | Notes |"},"data-sources/clingen-gene-validity-json":{"id":"data-sources/clingen-gene-validity-json","title":"clingen-gene-validity-json","description":"| Field | Type | Notes |"},"data-sources/clingen-json":{"id":"data-sources/clingen-json","title":"clingen-json","description":"| Field | Type | Notes |"},"data-sources/clinvar":{"id":"data-sources/clinvar","title":"ClinVar","description":"Overview","sidebar":"version-3.16/docs"},"data-sources/clinvar-json":{"id":"data-sources/clinvar-json","title":"clinvar-json","description":"| Field | Type | Notes |"},"data-sources/cosmic":{"id":"data-sources/cosmic","title":"COSMIC","description":"Overview","sidebar":"version-3.16/docs"},"data-sources/cosmic-json":{"id":"data-sources/cosmic-json","title":"cosmic-json","description":"| Field | Type | Notes |"},"data-sources/dbsnp":{"id":"data-sources/dbsnp","title":"dbSNP","description":"Overview","sidebar":"version-3.16/docs"},"data-sources/dbsnp-json":{"id":"data-sources/dbsnp-json","title":"dbsnp-json","description":"| Field | Type | Notes |"},"data-sources/fusioncatcher":{"id":"data-sources/fusioncatcher","title":"FusionCatcher","description":"Overview","sidebar":"version-3.16/docs"},"data-sources/fusioncatcher-json":{"id":"data-sources/fusioncatcher-json","title":"fusioncatcher-json","description":"| Field | Type | Notes |"},"data-sources/gnomad":{"id":"data-sources/gnomad","title":"gnomAD","description":"Overview","sidebar":"version-3.16/docs"},"data-sources/gnomad-lof-json":{"id":"data-sources/gnomad-lof-json","title":"gnomad-lof-json","description":"| Field | Type | Notes |"},"data-sources/gnomad-small-variants-json":{"id":"data-sources/gnomad-small-variants-json","title":"gnomad-small-variants-json","description":"| Field | Type | Notes |"},"data-sources/mito-heteroplasmy":{"id":"data-sources/mito-heteroplasmy","title":"Mitochondrial Heteroplasmy","description":"Overview","sidebar":"version-3.16/docs"},"data-sources/mitomap":{"id":"data-sources/mitomap","title":"MITOMAP","description":"Overview","sidebar":"version-3.16/docs"},"data-sources/mitomap-small-variants-json":{"id":"data-sources/mitomap-small-variants-json","title":"mitomap-small-variants-json","description":"| Field | Type | Notes |"},"data-sources/mitomap-structural-variants-json":{"id":"data-sources/mitomap-structural-variants-json","title":"mitomap-structural-variants-json","description":"| Field | Type | Notes |"},"data-sources/omim":{"id":"data-sources/omim","title":"OMIM","description":"Overview","sidebar":"version-3.16/docs"},"data-sources/omim-json":{"id":"data-sources/omim-json","title":"omim-json","description":"| Field | Type | Notes |"},"data-sources/phylop":{"id":"data-sources/phylop","title":"PhyloP","description":"Overview","sidebar":"version-3.16/docs"},"data-sources/phylop-json":{"id":"data-sources/phylop-json","title":"phylop-json","description":"| Field | Type | Notes |"},"data-sources/primate-ai":{"id":"data-sources/primate-ai","title":"Primate AI","description":"Overview","sidebar":"version-3.16/docs"},"data-sources/primate-ai-json":{"id":"data-sources/primate-ai-json","title":"primate-ai-json","description":"| Field | Type | Notes |"},"data-sources/revel":{"id":"data-sources/revel","title":"REVEL","description":"Overview","sidebar":"version-3.16/docs"},"data-sources/revel-json":{"id":"data-sources/revel-json","title":"revel-json","description":"| Field | Type | Notes |"},"data-sources/splice-ai":{"id":"data-sources/splice-ai","title":"Splice AI","description":"Overview","sidebar":"version-3.16/docs"},"data-sources/splice-ai-json":{"id":"data-sources/splice-ai-json","title":"splice-ai-json","description":"| Field | Type | Notes |"},"data-sources/topmed":{"id":"data-sources/topmed","title":"TOPMed","description":"Overview","sidebar":"version-3.16/docs"},"data-sources/topmed-json":{"id":"data-sources/topmed-json","title":"topmed-json","description":"| Field | Type | Notes |"},"file-formats/custom-annotations":{"id":"file-formats/custom-annotations","title":"Custom Annotations","description":"Overview","sidebar":"version-3.16/docs"},"file-formats/nirvana-json-file-format":{"id":"file-formats/nirvana-json-file-format","title":"Nirvana JSON File Format","description":"Overview","sidebar":"version-3.16/docs"},"introduction/covid19":{"id":"introduction/covid19","title":"Annotating COVID-19","description":"The Nirvana development team is mainly focused on providing annotations for the human genome. This focus allows us to maximize our resources towards understanding human health.","sidebar":"version-3.16/docs"},"introduction/dependencies":{"id":"introduction/dependencies","title":"Dependencies","description":"All of the following dependencies have been included in this repository.","sidebar":"version-3.16/docs"},"introduction/getting-started":{"id":"introduction/getting-started","title":"Getting Started","description":"Nirvana is written in C# using .NET Core (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files.","sidebar":"version-3.16/docs"},"introduction/introduction":{"id":"introduction/introduction","title":"Introduction","description":"Clinical-grade variant annotation","sidebar":"version-3.16/docs"},"introduction/parsing-json":{"id":"introduction/parsing-json","title":"Parsing Nirvana JSON","description":"Why JSON?","sidebar":"version-3.16/docs"},"utilities/jasix":{"id":"utilities/jasix","title":"Jasix","description":"Overview","sidebar":"version-3.16/docs"}}}')}}]); \ No newline at end of file diff --git a/assets/js/de45087c.dd01b0e5.js b/assets/js/de45087c.dd01b0e5.js deleted file mode 100644 index cf55261c..00000000 --- a/assets/js/de45087c.dd01b0e5.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2116],{3905:(t,n,e)=>{e.d(n,{Zo:()=>m,kt:()=>k});var a=e(67294);function l(t,n,e){return n in t?Object.defineProperty(t,n,{value:e,enumerable:!0,configurable:!0,writable:!0}):t[n]=e,t}function r(t,n){var e=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(t,n).enumerable}))),e.push.apply(e,a)}return e}function o(t){for(var n=1;n=0||(l[e]=t[e]);return l}(t,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,e)&&(l[e]=t[e])}return l}var p=a.createContext({}),u=function(t){var n=a.useContext(p),e=n;return t&&(e="function"==typeof t?t(n):o(o({},n),t)),e},m=function(t){var n=u(t.components);return a.createElement(p.Provider,{value:n},t.children)},d="mdxType",g={inlineCode:"code",wrapper:function(t){var n=t.children;return a.createElement(a.Fragment,{},n)}},N=a.forwardRef((function(t,n){var e=t.components,l=t.mdxType,r=t.originalType,p=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),d=u(e),N=l,k=d["".concat(p,".").concat(N)]||d[N]||g[N]||r;return e?a.createElement(k,o(o({ref:n},m),{},{components:e})):a.createElement(k,o({ref:n},m))}));function k(t,n){var e=arguments,l=n&&n.mdxType;if("string"==typeof t||l){var r=e.length,o=new Array(r);o[0]=N;var i={};for(var p in n)hasOwnProperty.call(n,p)&&(i[p]=n[p]);i.originalType=t,i[d]="string"==typeof t?t:l,o[1]=i;for(var u=2;u{e.r(n),e.d(n,{contentTitle:()=>o,default:()=>d,frontMatter:()=>r,metadata:()=>i,toc:()=>p});var a=e(87462),l=(e(67294),e(3905));const r={},o=void 0,i={unversionedId:"data-sources/gnomad-small-variants-json",id:"version-3.14/data-sources/gnomad-small-variants-json",title:"gnomad-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/gnomad-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/gnomad-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/gnomad-small-variants-json.md",tags:[],version:"3.14",frontMatter:{}},p=[],u={toc:p},m="wrapper";function d(t){let{components:n,...e}=t;return(0,l.kt)(m,(0,a.Z)({},u,e,{components:n,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"gnomad":{ \n "coverage":20,\n "allAf":0.190317,\n "maleAf":0.193,\n "femaleAf": 0.1935,\n "afrAf":0.222876,\n "amrAf":0.121394,\n "easAf":0.239802,\n "finAf":0.136833,\n "nfeAf":0.181282,\n "asjAf":0.258278,\n "othAf":0.186094,\n "allAn":30796,\n "maleAn":15096,\n "femaleAn":15700\n "afrAn":8664,\n "amrAn":832,\n "easAn":1618,\n "finAn":3486,\n "nfeAn":14916,\n "asjAn":302,\n "othAn":978,\n "allAc":5861,\n "maleAc":2930,\n "femaleAc": 2931,\n "afrAc":1931,\n "amrAc":101,\n "easAc":388,\n "finAc":477,\n "nfeAc":2704,\n "asjAc":78,\n "othAc":182,\n "allHc":561,\n "afrHc":208,\n "amrHc":6,\n "easHc":42,\n "finHc":31,\n "nfeHc":242,\n "asjHc":13,\n "othHc":19,\n "maleHc":280,\n "femaleHc":281,\n "controlsAllAf":0.190317,\n "controlsAllAn":30796,\n "controlsAllAc":5861,\n "lowComplexityRegion":true,\n "failedFilter":true\n}\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Field"),(0,l.kt)("th",{parentName:"tr",align:null},"Type"),(0,l.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"coverage"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"average coverage (non-negative integer values)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the controls subset. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for all populations. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for male population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for female population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the controls subset. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for all populations. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for male population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for female population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"controlsAllAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the controls subset. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"allHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the African / African American population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the African / African American population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the African / African American population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for African / African American population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Latino population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Latino population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Latino population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Latino population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"easHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for East Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Finnish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Finnish population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Finnish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"finHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Finnish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Non-Finnish European population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Non-Finnish European population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Non-Finnish European population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"nfeHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Non-Finnish European population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Other population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Other population. Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Other population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"othHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for Other population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ashkenazi Jewish population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the Ashkenazi Jewish population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the Ashkenazi Jewish population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"asjHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Ashkenazi Jewish population. Non-negative integer")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAf"),(0,l.kt)("td",{parentName:"tr",align:null},"float"),(0,l.kt)("td",{parentName:"tr",align:null},"allele frequency for the South Asian population. Range: 0 - 1.0")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele count for the South Asian population Integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasAn"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"allele number for the South Asian population. Non-zero integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"sasHc"),(0,l.kt)("td",{parentName:"tr",align:null},"int"),(0,l.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the South Asian population. Non-negative integer.")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"lowComplexityRegion"),(0,l.kt)("td",{parentName:"tr",align:null},"bool"),(0,l.kt)("td",{parentName:"tr",align:null},"True if this variant is located in a low complexity region.")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/df61b626.1e5ace8a.js b/assets/js/df61b626.1e5ace8a.js deleted file mode 100644 index 47743d7d..00000000 --- a/assets/js/df61b626.1e5ace8a.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6776],{3905:(t,e,n)=>{n.d(e,{Zo:()=>s,kt:()=>f});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function o(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function i(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var l=r.createContext({}),p=function(t){var e=r.useContext(l),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},s=function(t){var e=p(t.components);return r.createElement(l.Provider,{value:e},t.children)},d="mdxType",m={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},u=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,o=t.originalType,l=t.parentName,s=c(t,["components","mdxType","originalType","parentName"]),d=p(n),u=a,f=d["".concat(l,".").concat(u)]||d[u]||m[u]||o;return n?r.createElement(f,i(i({ref:e},s),{},{components:n})):r.createElement(f,i({ref:e},s))}));function f(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var o=n.length,i=new Array(o);i[0]=u;var c={};for(var l in e)hasOwnProperty.call(e,l)&&(c[l]=e[l]);c.originalType=t,c[d]="string"==typeof t?t:a,i[1]=c;for(var p=2;p{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>d,frontMatter:()=>o,metadata:()=>c,toc:()=>l});var r=n(87462),a=(n(67294),n(3905));const o={},i=void 0,c={unversionedId:"data-sources/splice-ai-json",id:"version-3.18/data-sources/splice-ai-json",title:"splice-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/splice-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/splice-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/splice-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/splice-ai-json.md",tags:[],version:"3.18",frontMatter:{}},l=[],p={toc:l},s="wrapper";function d(t){let{components:e,...n}=t;return(0,a.kt)(s,(0,r.Z)({},p,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"spliceAI":[ \n {\n "hgnc":"BLCAP",\n "acceptorGainDistance":-3,\n "acceptorGainScore":0.3,\n "donorLossDistance":7,\n "donorLossScore":0.9\n },\n { \n "hgnc":"NNAT",\n "acceptorGainDistance":-1,\n "acceptorGainScore":0.2,\n "donorGainDistance":-2,\n "donorGainScore":0.3\n }\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorGainDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorGainScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorLossDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorLossScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorGainDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorGainScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorLossDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorLossScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/e024c39c.350f5490.js b/assets/js/e024c39c.350f5490.js deleted file mode 100644 index 7f3d24a1..00000000 --- a/assets/js/e024c39c.350f5490.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7958],{3905:(M,L,t)=>{t.d(L,{Zo:()=>o,kt:()=>C});var i=t(67294);function e(M,L,t){return L in M?Object.defineProperty(M,L,{value:t,enumerable:!0,configurable:!0,writable:!0}):M[L]=t,M}function j(M,L){var t=Object.keys(M);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(M);L&&(i=i.filter((function(L){return Object.getOwnPropertyDescriptor(M,L).enumerable}))),t.push.apply(t,i)}return t}function u(M){for(var L=1;L=0||(e[t]=M[t]);return e}(M,L);if(Object.getOwnPropertySymbols){var j=Object.getOwnPropertySymbols(M);for(i=0;i=0||Object.prototype.propertyIsEnumerable.call(M,t)&&(e[t]=M[t])}return e}var N=i.createContext({}),n=function(M){var L=i.useContext(N),t=L;return M&&(t="function"==typeof M?M(L):u(u({},L),M)),t},o=function(M){var L=n(M.components);return i.createElement(N.Provider,{value:L},M.children)},s="mdxType",w={inlineCode:"code",wrapper:function(M){var L=M.children;return i.createElement(i.Fragment,{},L)}},y=i.forwardRef((function(M,L){var t=M.components,e=M.mdxType,j=M.originalType,N=M.parentName,o=a(M,["components","mdxType","originalType","parentName"]),s=n(t),y=e,C=s["".concat(N,".").concat(y)]||s[y]||w[y]||j;return t?i.createElement(C,u(u({ref:L},o),{},{components:t})):i.createElement(C,u({ref:L},o))}));function C(M,L){var t=arguments,e=L&&L.mdxType;if("string"==typeof M||e){var j=t.length,u=new Array(j);u[0]=y;var a={};for(var N in L)hasOwnProperty.call(L,N)&&(a[N]=L[N]);a.originalType=M,a[s]="string"==typeof M?M:e,u[1]=a;for(var n=2;n{t.d(L,{Z:()=>e});var i=t(67294);function e(M){let{className:L,name:t,children:e,githubUrl:j,twitterUrl:u}=M;return i.createElement("div",{className:L},i.createElement("div",{className:"card card--full-height"},i.createElement("div",{className:"card__header"},i.createElement("div",{className:"avatar avatar--vertical"},i.createElement("img",{className:"avatar__photo avatar__photo--xl",src:j+".png"}),i.createElement("div",{className:"avatar__intro"},i.createElement("h3",{className:"avatar__name"},t)))),i.createElement("div",{className:"card__body"},e),i.createElement("div",{className:"card__footer"},i.createElement("div",{className:"button-group button-group--block"},j&&i.createElement("a",{className:"button button--secondary",href:j},"GitHub"),u&&i.createElement("a",{className:"button button--secondary",href:u},"Twitter")))))}},18481:(M,L,t)=>{t.r(L),t.d(L,{TeamProfileCardCol:()=>o,contentTitle:()=>a,default:()=>y,frontMatter:()=>u,metadata:()=>N,toc:()=>n});var i=t(87462),e=(t(67294),t(3905)),j=t(63427);const u={id:"introduction",title:"Introduction",description:"Clinical-grade variant annotation",hide_title:!0,slug:"/"},a=void 0,N={unversionedId:"introduction/introduction",id:"version-3.17/introduction/introduction",title:"Introduction",description:"Clinical-grade variant annotation",source:"@site/versioned_docs/version-3.17/introduction/introduction.mdx",sourceDirName:"introduction",slug:"/",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/introduction/introduction.mdx",tags:[],version:"3.17",frontMatter:{id:"introduction",title:"Introduction",description:"Clinical-grade variant annotation",hide_title:!0,slug:"/"},sidebar:"version-3.17/docs",next:{title:"Dependencies",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/introduction/dependencies"}},n=[{value:"What does Nirvana annotate?",id:"what-does-nirvana-annotate",children:[],level:2},{value:"Licensing",id:"licensing",children:[{value:"Code",id:"code",children:[],level:3},{value:"Data",id:"data",children:[],level:3}],level:2},{value:"Nirvana Team",id:"nirvana-team",children:[{value:"Active Team",id:"active-team",children:[],level:3},{value:"Honorary Alumni",id:"honorary-alumni",children:[],level:3}],level:2}];function o(M){return(0,e.kt)(j.Z,(0,i.Z)({},M,{className:"col col--6 margin-bottom--lg",mdxType:"TeamProfileCard"}))}const s={toc:n,TeamProfileCardCol:o},w="wrapper";function y(M){let{components:L,...j}=M;return(0,e.kt)(w,(0,i.Z)({},s,j,{components:L,mdxType:"MDXLayout"}),(0,e.kt)("p",null,(0,e.kt)("img",{src:t(21319).Z})),(0,e.kt)("p",null,"Nirvana provides clinical-grade annotation of genomic variants (SNVs, MNVs, insertions, deletions, indels, STRs, gene fusions, and SVs (including CNVs). It can be run as a stand-alone package, as an AWS Lambda function, or integrated into larger software tools that require variant annotation."),(0,e.kt)("p",null,"The input to Nirvana are VCFs and the output is a structured JSON representation of all annotation and sample information (as extracted from the VCF). Nirvana handles multiple alternate alleles and multiple samples with ease."),(0,e.kt)("p",null,"The software is being developed under a rigorous SDLC and testing process to ensure accuracy of the results and enable embedding in other software with regulatory needs. Nirvana uses a continuous integration pipeline where millions of variant annotations are monitored against baseline values daily."),(0,e.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,e.kt)("div",{parentName:"div",className:"admonition-heading"},(0,e.kt)("h5",{parentName:"div"},(0,e.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,e.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,e.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Fun Fact")),(0,e.kt)("div",{parentName:"div",className:"admonition-content"},(0,e.kt)("p",{parentName:"div"},"Nirvana is a backronym for ",(0,e.kt)("strong",{parentName:"p"},"NI"),"mble and ",(0,e.kt)("strong",{parentName:"p"},"R"),"obust ",(0,e.kt)("strong",{parentName:"p"},"VA"),"riant a",(0,e.kt)("strong",{parentName:"p"},"N"),"not",(0,e.kt)("strong",{parentName:"p"},"A"),"tor"))),(0,e.kt)("h2",{id:"what-does-nirvana-annotate"},"What does Nirvana annotate?"),(0,e.kt)("p",null,"We use Sequence Ontology consequences to describe how each variant impacts a given transcript:"),(0,e.kt)("p",null,(0,e.kt)("img",{src:t(67476).Z})),(0,e.kt)("p",null,"In addition, we also use external data sources to provide additional context for each variant:"),(0,e.kt)("p",null,(0,e.kt)("img",{src:t(44341).Z})),(0,e.kt)("h2",{id:"licensing"},"Licensing"),(0,e.kt)("h3",{id:"code"},"Code"),(0,e.kt)("p",null,"Nirvana source code is provided under the ",(0,e.kt)("a",{parentName:"p",href:"https://github.com/Illumina/Nirvana/blob/develop/LICENSE"},"GPLv3")," license. Nirvana includes several third party packages provided under other open source licenses, please see ",(0,e.kt)("a",{parentName:"p",href:"introduction/dependencies"},"Dependencies")," for additional details."),(0,e.kt)("h3",{id:"data"},"Data"),(0,e.kt)("p",null,"The data used by Nirvana is publicly available, however some data sources have special restrictions on use by non-academic entities."),(0,e.kt)("h2",{id:"nirvana-team"},"Nirvana Team"),(0,e.kt)("h3",{id:"active-team"},"Active Team"),(0,e.kt)("p",null,"The Nirvana team works on the core functionality, AWS annotation services, in addition to keeping the annotation data sources up-to-date."),(0,e.kt)("p",null,"Current members of the Nirvana team are listed in alphabetical order below."),(0,e.kt)("div",{className:"row"},(0,e.kt)(o,{name:"Joseph Platzer",githubUrl:"https://github.com/jplatzer2",mdxType:"TeamProfileCardCol"},"Test Lead. Joins Nirvana with a history of building sequencing tools and keeping the customer first."),(0,e.kt)(o,{name:"Michael Str\xf6mberg",githubUrl:"https://github.com/MichaelStromberg",mdxType:"TeamProfileCardCol"},"Nirvana founder and now ever grateful Nirvana cheerleader to those who actually write code for it."),(0,e.kt)(o,{name:"Rajat Shuvro Roy",githubUrl:"https://github.com/rajatshuvro",mdxType:"TeamProfileCardCol"},"Lead developer. Loves to speed up things and make services available to all interested users.")),(0,e.kt)("h3",{id:"honorary-alumni"},"Honorary Alumni"),(0,e.kt)("p",null,"Nirvana would never be what it is today without the huge contributions from these folks who have moved on to bigger and greater things."),(0,e.kt)("div",{className:"row"},(0,e.kt)(o,{name:"Haochen Li",githubUrl:"https://github.com/haochenl",mdxType:"TeamProfileCardCol"},"Detail-oriented quick thinker that keeps cool even in the most stressful situations. Now working as a Senior Bioinformatics Data Scientist at GRAIL."),(0,e.kt)(o,{name:"Julien Lajugie",githubUrl:"https://github.com/JulienLajugie",mdxType:"TeamProfileCardCol"},"Julien is a legend around these parts. When he's not taking down opponents in Taekwondo or melting riffs in his rock band, he's demolishing bugs and making the world a better place."),(0,e.kt)(o,{name:"Shuli Kang",githubUrl:"https://github.com/shulik7",mdxType:"TeamProfileCardCol"},"Oncology bioinformatician from USC before joining our team at Illumina. Now working as a Senior Bioinformatics Scientist at Novartis Gene Therapies."),(0,e.kt)(o,{name:"Yu Jiang",githubUrl:"https://github.com/yujiang02",mdxType:"TeamProfileCardCol"},"Biostatistics genius from Duke University before joining our team at Illumina. Now working as a Research Engineer at Facebook AI Research.")))}y.isMDXComponent=!0},21319:(M,L,t)=>{t.d(L,{Z:()=>i});const i=""},44341:(M,L,t)=>{t.d(L,{Z:()=>i});const i=t.p+"assets/images/SupplementaryAnnotations-d43d3f1c837f9b80fab530432e0e4b1d.svg"},67476:(M,L,t)=>{t.d(L,{Z:()=>i});const i=t.p+"assets/images/TranscriptConsequences-60ca1c43a36dacf896fecdabf09ce02c.svg"}}]); \ No newline at end of file diff --git a/assets/js/e19781ed.2422b716.js b/assets/js/e19781ed.2422b716.js deleted file mode 100644 index fa1b3de9..00000000 --- a/assets/js/e19781ed.2422b716.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6427],{74415:e=>{e.exports=JSON.parse('{"pluginId":"default","version":"3.18","label":"3.18","banner":"unmaintained","badge":true,"className":"docs-version-3.18","isLast":false,"docsSidebars":{"docs":[{"type":"category","label":"Introduction","items":[{"type":"link","label":"Introduction","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/","docId":"introduction/introduction"},{"type":"link","label":"Dependencies","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/introduction/dependencies","docId":"introduction/dependencies"},{"type":"link","label":"Getting Started","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/introduction/getting-started","docId":"introduction/getting-started"},{"type":"link","label":"Parsing Nirvana JSON","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/introduction/parsing-json","docId":"introduction/parsing-json"},{"type":"link","label":"Annotating COVID-19","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/introduction/covid19","docId":"introduction/covid19"}],"collapsible":true,"collapsed":true},{"type":"category","label":"Data Sources","items":[{"type":"link","label":"1000 Genomes","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/1000Genomes","docId":"data-sources/1000Genomes"},{"type":"link","label":"Amino Acid Conservation","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/amino-acid-conservation","docId":"data-sources/amino-acid-conservation"},{"type":"link","label":"ClinGen","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/clingen","docId":"data-sources/clingen"},{"type":"link","label":"ClinVar","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/clinvar","docId":"data-sources/clinvar"},{"type":"link","label":"COSMIC","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/cosmic","docId":"data-sources/cosmic"},{"type":"link","label":"DANN","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/dann","docId":"data-sources/dann"},{"type":"link","label":"dbSNP","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/dbsnp","docId":"data-sources/dbsnp"},{"type":"link","label":"DECIPHER","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/decipher","docId":"data-sources/decipher"},{"type":"link","label":"FusionCatcher","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/fusioncatcher","docId":"data-sources/fusioncatcher"},{"type":"link","label":"GERP","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gerp","docId":"data-sources/gerp"},{"type":"link","label":"GME Variome","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gme","docId":"data-sources/gme"},{"type":"link","label":"gnomAD","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/gnomad","docId":"data-sources/gnomad"},{"type":"link","label":"Mitochondrial Heteroplasmy","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/mito-heteroplasmy","docId":"data-sources/mito-heteroplasmy"},{"type":"link","label":"MITOMAP","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/mitomap","docId":"data-sources/mitomap"},{"type":"link","label":"OMIM","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/omim","docId":"data-sources/omim"},{"type":"link","label":"PhyloP","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/phylop","docId":"data-sources/phylop"},{"type":"link","label":"Primate AI","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/primate-ai","docId":"data-sources/primate-ai"},{"type":"link","label":"REVEL","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/revel","docId":"data-sources/revel"},{"type":"link","label":"Splice AI","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/splice-ai","docId":"data-sources/splice-ai"},{"type":"link","label":"TOPMed","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/topmed","docId":"data-sources/topmed"}],"collapsible":true,"collapsed":true},{"type":"category","label":"File Formats","items":[{"type":"link","label":"Nirvana JSON File Format","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/file-formats/nirvana-json-file-format","docId":"file-formats/nirvana-json-file-format"},{"type":"link","label":"Custom Annotations","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/file-formats/custom-annotations","docId":"file-formats/custom-annotations"}],"collapsible":true,"collapsed":true},{"type":"category","label":"Core Functionality","items":[{"type":"link","label":"Canonical Transcripts","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/core-functionality/canonical-transcripts","docId":"core-functionality/canonical-transcripts"},{"type":"link","label":"Gene Fusion Detection","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/core-functionality/gene-fusions","docId":"core-functionality/gene-fusions"},{"type":"link","label":"MNV Recomposition","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/core-functionality/mnv-recomposition","docId":"core-functionality/mnv-recomposition"},{"type":"link","label":"Variant IDs","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/core-functionality/variant-ids","docId":"core-functionality/variant-ids"}],"collapsible":true,"collapsed":true},{"type":"category","label":"Utilities","items":[{"type":"link","label":"Jasix","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/utilities/jasix","docId":"utilities/jasix"},{"type":"link","label":"SAUtils","href":"/IlluminaConnectedAnnotationsDocumentation/3.18/utilities/sautils","docId":"utilities/sautils"}],"collapsible":true,"collapsed":true}]},"docs":{"core-functionality/canonical-transcripts":{"id":"core-functionality/canonical-transcripts","title":"Canonical Transcripts","description":"Overview","sidebar":"docs"},"core-functionality/gene-fusions":{"id":"core-functionality/gene-fusions","title":"Gene Fusion Detection","description":"Overview","sidebar":"docs"},"core-functionality/mnv-recomposition":{"id":"core-functionality/mnv-recomposition","title":"MNV Recomposition","description":"Overview","sidebar":"docs"},"core-functionality/variant-ids":{"id":"core-functionality/variant-ids","title":"Variant IDs","description":"Overview","sidebar":"docs"},"data-sources/1000Genomes":{"id":"data-sources/1000Genomes","title":"1000 Genomes","description":"Overview","sidebar":"docs"},"data-sources/1000Genomes-snv-json":{"id":"data-sources/1000Genomes-snv-json","title":"1000Genomes-snv-json","description":"| Field | Type | Notes |"},"data-sources/1000Genomes-sv-json":{"id":"data-sources/1000Genomes-sv-json","title":"1000Genomes-sv-json","description":"| Field | Type | Notes |"},"data-sources/amino-acid-conservation":{"id":"data-sources/amino-acid-conservation","title":"Amino Acid Conservation","description":"Overview","sidebar":"docs"},"data-sources/amino-acid-conservation-json":{"id":"data-sources/amino-acid-conservation-json","title":"amino-acid-conservation-json","description":"| Field | Type | Notes |"},"data-sources/clingen":{"id":"data-sources/clingen","title":"ClinGen","description":"Overview","sidebar":"docs"},"data-sources/clingen-dosage-json":{"id":"data-sources/clingen-dosage-json","title":"clingen-dosage-json","description":"| Field | Type | Notes |"},"data-sources/clingen-gene-validity-json":{"id":"data-sources/clingen-gene-validity-json","title":"clingen-gene-validity-json","description":"| Field | Type | Notes |"},"data-sources/clingen-json":{"id":"data-sources/clingen-json","title":"clingen-json","description":"| Field | Type | Notes |"},"data-sources/clinvar":{"id":"data-sources/clinvar","title":"ClinVar","description":"Overview","sidebar":"docs"},"data-sources/clinvar-json":{"id":"data-sources/clinvar-json","title":"clinvar-json","description":"small variants:"},"data-sources/cosmic":{"id":"data-sources/cosmic","title":"COSMIC","description":"Overview","sidebar":"docs"},"data-sources/cosmic-json":{"id":"data-sources/cosmic-json","title":"cosmic-json","description":"| Field | Type | Notes |"},"data-sources/dann":{"id":"data-sources/dann","title":"DANN","description":"Overview","sidebar":"docs"},"data-sources/dann-json":{"id":"data-sources/dann-json","title":"dann-json","description":"| Field | Type | Notes |"},"data-sources/dbsnp":{"id":"data-sources/dbsnp","title":"dbSNP","description":"Overview","sidebar":"docs"},"data-sources/dbsnp-json":{"id":"data-sources/dbsnp-json","title":"dbsnp-json","description":"| Field | Type | Notes |"},"data-sources/decipher":{"id":"data-sources/decipher","title":"DECIPHER","description":"Overview","sidebar":"docs"},"data-sources/decipher-json":{"id":"data-sources/decipher-json","title":"decipher-json","description":"| Field | Type | Notes |"},"data-sources/fusioncatcher":{"id":"data-sources/fusioncatcher","title":"FusionCatcher","description":"Overview","sidebar":"docs"},"data-sources/fusioncatcher-json":{"id":"data-sources/fusioncatcher-json","title":"fusioncatcher-json","description":"| Field | Type | Notes |"},"data-sources/gerp":{"id":"data-sources/gerp","title":"GERP","description":"Overview","sidebar":"docs"},"data-sources/gerp-json":{"id":"data-sources/gerp-json","title":"gerp-json","description":"| Field | Type | Notes |"},"data-sources/gme":{"id":"data-sources/gme","title":"GME Variome","description":"Overview","sidebar":"docs"},"data-sources/gme-json":{"id":"data-sources/gme-json","title":"gme-json","description":"| Field | Type | Notes |"},"data-sources/gnomad":{"id":"data-sources/gnomad","title":"gnomAD","description":"Overview","sidebar":"docs"},"data-sources/gnomad-lof-json":{"id":"data-sources/gnomad-lof-json","title":"gnomad-lof-json","description":"| Field | Type | Notes |"},"data-sources/gnomad-small-variants-json":{"id":"data-sources/gnomad-small-variants-json","title":"gnomad-small-variants-json","description":"| Field | Type | Notes |"},"data-sources/gnomad-structural-variants-data_description":{"id":"data-sources/gnomad-structural-variants-data_description","title":"gnomad-structural-variants-data_description","description":"Bed Example"},"data-sources/gnomad-structural-variants-json":{"id":"data-sources/gnomad-structural-variants-json","title":"gnomad-structural-variants-json","description":"| Field | Type | Notes |"},"data-sources/mito-heteroplasmy":{"id":"data-sources/mito-heteroplasmy","title":"Mitochondrial Heteroplasmy","description":"Overview","sidebar":"docs"},"data-sources/mitomap":{"id":"data-sources/mitomap","title":"MITOMAP","description":"Overview","sidebar":"docs"},"data-sources/mitomap-small-variants-json":{"id":"data-sources/mitomap-small-variants-json","title":"mitomap-small-variants-json","description":"| Field | Type | Notes |"},"data-sources/mitomap-structural-variants-json":{"id":"data-sources/mitomap-structural-variants-json","title":"mitomap-structural-variants-json","description":"| Field | Type | Notes |"},"data-sources/omim":{"id":"data-sources/omim","title":"OMIM","description":"Overview","sidebar":"docs"},"data-sources/omim-json":{"id":"data-sources/omim-json","title":"omim-json","description":"| Field | Type | Notes |"},"data-sources/phylop":{"id":"data-sources/phylop","title":"PhyloP","description":"Overview","sidebar":"docs"},"data-sources/phylop-json":{"id":"data-sources/phylop-json","title":"phylop-json","description":"| Field | Type | Notes |"},"data-sources/primate-ai":{"id":"data-sources/primate-ai","title":"Primate AI","description":"Overview","sidebar":"docs"},"data-sources/primate-ai-json":{"id":"data-sources/primate-ai-json","title":"primate-ai-json","description":"| Field | Type | Notes |"},"data-sources/revel":{"id":"data-sources/revel","title":"REVEL","description":"Overview","sidebar":"docs"},"data-sources/revel-json":{"id":"data-sources/revel-json","title":"revel-json","description":"| Field | Type | Notes |"},"data-sources/splice-ai":{"id":"data-sources/splice-ai","title":"Splice AI","description":"Overview","sidebar":"docs"},"data-sources/splice-ai-json":{"id":"data-sources/splice-ai-json","title":"splice-ai-json","description":"| Field | Type | Notes |"},"data-sources/topmed":{"id":"data-sources/topmed","title":"TOPMed","description":"Overview","sidebar":"docs"},"data-sources/topmed-json":{"id":"data-sources/topmed-json","title":"topmed-json","description":"| Field | Type | Notes |"},"file-formats/custom-annotations":{"id":"file-formats/custom-annotations","title":"Custom Annotations","description":"Overview","sidebar":"docs"},"file-formats/nirvana-json-file-format":{"id":"file-formats/nirvana-json-file-format","title":"Nirvana JSON File Format","description":"Overview","sidebar":"docs"},"introduction/covid19":{"id":"introduction/covid19","title":"Annotating COVID-19","description":"The Nirvana development team is mainly focused on providing annotations for the human genome. This focus allows us to maximize our resources towards understanding human health.","sidebar":"docs"},"introduction/dependencies":{"id":"introduction/dependencies","title":"Dependencies","description":"All of the following dependencies have been included in this repository.","sidebar":"docs"},"introduction/getting-started":{"id":"introduction/getting-started","title":"Getting Started","description":"Nirvana is written in C# using .NET Core (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files.","sidebar":"docs"},"introduction/introduction":{"id":"introduction/introduction","title":"Introduction","description":"Clinical-grade variant annotation","sidebar":"docs"},"introduction/parsing-json":{"id":"introduction/parsing-json","title":"Parsing Nirvana JSON","description":"Why JSON?","sidebar":"docs"},"utilities/jasix":{"id":"utilities/jasix","title":"Jasix","description":"Overview","sidebar":"docs"},"utilities/sautils":{"id":"utilities/sautils","title":"SAUtils","description":"Overview","sidebar":"docs"}}}')}}]); \ No newline at end of file diff --git a/assets/js/e1e7c361.1d2eea68.js b/assets/js/e1e7c361.1d2eea68.js deleted file mode 100644 index 44bca96f..00000000 --- a/assets/js/e1e7c361.1d2eea68.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1443],{3905:(n,e,t)=>{t.d(e,{Zo:()=>p,kt:()=>m});var o=t(67294);function i(n,e,t){return e in n?Object.defineProperty(n,e,{value:t,enumerable:!0,configurable:!0,writable:!0}):n[e]=t,n}function r(n,e){var t=Object.keys(n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(n);e&&(o=o.filter((function(e){return Object.getOwnPropertyDescriptor(n,e).enumerable}))),t.push.apply(t,o)}return t}function a(n){for(var e=1;e=0||(i[t]=n[t]);return i}(n,e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(n);for(o=0;o=0||Object.prototype.propertyIsEnumerable.call(n,t)&&(i[t]=n[t])}return i}var c=o.createContext({}),l=function(n){var e=o.useContext(c),t=e;return n&&(t="function"==typeof n?n(e):a(a({},e),n)),t},p=function(n){var e=l(n.components);return o.createElement(c.Provider,{value:e},n.children)},d="mdxType",g={inlineCode:"code",wrapper:function(n){var e=n.children;return o.createElement(o.Fragment,{},e)}},u=o.forwardRef((function(n,e){var t=n.components,i=n.mdxType,r=n.originalType,c=n.parentName,p=s(n,["components","mdxType","originalType","parentName"]),d=l(t),u=i,m=d["".concat(c,".").concat(u)]||d[u]||g[u]||r;return t?o.createElement(m,a(a({ref:e},p),{},{components:t})):o.createElement(m,a({ref:e},p))}));function m(n,e){var t=arguments,i=e&&e.mdxType;if("string"==typeof n||i){var r=t.length,a=new Array(r);a[0]=u;var s={};for(var c in e)hasOwnProperty.call(e,c)&&(s[c]=e[c]);s.originalType=n,s[d]="string"==typeof n?n:i,a[1]=s;for(var l=2;l{t.r(e),t.d(e,{contentTitle:()=>a,default:()=>d,frontMatter:()=>r,metadata:()=>s,toc:()=>c});var o=t(87462),i=(t(67294),t(3905));const r={title:"Parsing Illumina Connected Annotations JSON"},a=void 0,s={unversionedId:"introduction/parsing-json",id:"introduction/parsing-json",title:"Parsing Illumina Connected Annotations JSON",description:"Parsing JSON",source:"@site/docs/introduction/parsing-json.md",sourceDirName:"introduction",slug:"/introduction/parsing-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/introduction/parsing-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/introduction/parsing-json.md",tags:[],version:"current",frontMatter:{title:"Parsing Illumina Connected Annotations JSON"}},c=[{value:"Parsing JSON",id:"parsing-json",children:[{value:"Organization",id:"organization",children:[],level:3},{value:"JASIX",id:"jasix",children:[],level:3}],level:2}],l={toc:c},p="wrapper";function d(n){let{components:e,...r}=n;return(0,i.kt)(p,(0,o.Z)({},l,r,{components:e,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"parsing-json"},"Parsing JSON"),(0,i.kt)("p",null,"Our JSON files are organized similarly to original VCF variants:"),(0,i.kt)("p",null,(0,i.kt)("img",{src:t(66410).Z})),(0,i.kt)("p",null,"Illumina Connected Annotations JSON files can get very large and sometimes we receive feedback that a bioinformatician tried to read the JSON file into Python or R resulting in a program that ran out of available RAM. This happens because those parsers try to load everything into memory all at once."),(0,i.kt)("p",null,"To get around those issues, we play some clever tricks with newlines that enables our users to parse our JSON files quickly and efficiently."),(0,i.kt)("h3",{id:"organization"},"Organization"),(0,i.kt)("p",null,"Our JSON file is arranged as follows:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the header section is located on the first line"),(0,i.kt)("li",{parentName:"ul"},"each line after that corresponds to a position (same as a row in a VCF file)",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"until you reach the genes section ",(0,i.kt)("inlineCode",{parentName:"li"},'],"genes":[')))),(0,i.kt)("li",{parentName:"ul"},"each line after that corresponds to a gene",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"until you reach the end ",(0,i.kt)("inlineCode",{parentName:"li"},"]}"))))),(0,i.kt)("p",null,"Knowing this, you can load each position line as an independent JSON object and extract the information you need. "),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Jupyter Notebook")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"To demonstrate this, we have put together a ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/blob/master/static/files/parse-nirvana-json-python.ipynb"},"Jupyter notebook demonstrating how to do this in Python")," and a ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/blob/master/static/files/parse-nirvana-json-r.ipynb"},"R version")," as well."))),(0,i.kt)("h3",{id:"jasix"},"JASIX"),(0,i.kt)("p",null,"One of the tools that we really like in the VCF ecosystem is ",(0,i.kt)("a",{parentName:"p",href:"https://dx.doi.org/10.1093%2Fbioinformatics%2Fbtq671"},"tabix"),". Unfortunately, tabix only works for tab-delimited file formats. As a result, we created a similar tool for Illumina Connected Annotations JSON files called JASIX."),(0,i.kt)("p",null,"Here's an example of how you might use JASIX:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/net6.0/Jasix.dll -i dragen.json.gz -q chr1:942450-942455\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the Illumina Connected Annotations JSON path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-q")," argument specifies a genomic range ",(0,i.kt)("em",{parentName:"li"},"(you can use as many of these as you want)"))),(0,i.kt)("p",null,"JASIX also includes additional options for showing the Illumina Connected Annotations header or for extracting different sections (like the genes section)."),(0,i.kt)("p",null,"The output from JASIX is compliant JSON object shown in pretty-printed form:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{"positions":[\n{\n "chromosome": "chr1",\n "position": 942451,\n "refAllele": "T",\n "altAlleles": [\n "C"\n ],\n "quality": 484.23,\n "filters": [\n "PASS"\n ],\n "cytogeneticBand": "1p36.33",\n "samples": [\n {\n "genotype": "1/1",\n "variantFrequencies": [\n 1\n ],\n "totalDepth": 21,\n "genotypeQuality": 60,\n "alleleDepths": [\n 0,\n 21\n ]\n },\n {\n "genotype": "1/1",\n "variantFrequencies": [\n 1\n ],\n "totalDepth": 32,\n "genotypeQuality": 93,\n "alleleDepths": [\n 0,\n 32\n ]\n },\n {\n "genotype": "1/1",\n "variantFrequencies": [\n 1\n ],\n "totalDepth": 36,\n "genotypeQuality": 105,\n "alleleDepths": [\n 0,\n 36\n ]\n }\n ],\n "variants": [\n {\n "vid": "1-942451-T-C",\n "chromosome": "chr1",\n "begin": 942451,\n "end": 942451,\n "refAllele": "T",\n "altAllele": "C",\n "variantType": "SNV",\n "hgvsg": "NC_000001.11:g.942451T>C",\n "phylopScore": -0.1,\n "clinvar": [\n {\n "id": "VCV000836156.1",\n "reviewStatus": "criteria provided, single submitter",\n "significance": [\n "uncertain significance"\n ],\n "refAllele": "T",\n "altAllele": "T",\n "lastUpdatedDate": "2020-08-20"\n },\n {\n "id": "RCV001037211.1",\n "variationId": 836156,\n "reviewStatus": "criteria provided, single submitter",\n "alleleOrigins": [\n "germline"\n ],\n "refAllele": "T",\n "altAllele": "T",\n "phenotypes": [\n "not provided"\n ],\n "medGenIds": [\n "CN517202"\n ],\n "significance": [\n "uncertain significance"\n ],\n "lastUpdatedDate": "2020-08-20",\n "pubMedIds": [\n "28492532"\n ]\n }\n ],\n "dbsnp": [\n "rs6672356"\n ],\n "gnomad": {\n "coverage": 25,\n "allAf": 0.999855,\n "allAn": 123742,\n "allAc": 123724,\n "allHc": 61853,\n "afrAf": 0.999416,\n "afrAn": 10278,\n "afrAc": 10272,\n "afrHc": 5133,\n "amrAf": 0.99995,\n "amrAn": 20008,\n "amrAc": 20007,\n "amrHc": 10003,\n "easAf": 1,\n "easAn": 6054,\n "easAc": 6054,\n "easHc": 3027,\n "finAf": 1,\n "finAn": 8696,\n "finAc": 8696,\n "finHc": 4348,\n "nfeAf": 0.999899,\n "nfeAn": 49590,\n "nfeAc": 49585,\n "nfeHc": 24790,\n "asjAf": 1,\n "asjAn": 7208,\n "asjAc": 7208,\n "asjHc": 3604,\n "sasAf": 0.99967,\n "sasAn": 18160,\n "sasAc": 18154,\n "sasHc": 9074,\n "othAf": 1,\n "othAn": 3748,\n "othAc": 3748,\n "othHc": 1874,\n "maleAf": 0.9999,\n "maleAn": 69780,\n "maleAc": 69773,\n "maleHc": 34883,\n "femaleAf": 0.999796,\n "femaleAn": 53962,\n "femaleAc": 53951,\n "femaleHc": 26970,\n "controlsAllAf": 0.999815,\n "controlsAllAn": 48654,\n "controlsAllAc": 48645\n },\n "oneKg": {\n "allAf": 1,\n "afrAf": 1,\n "amrAf": 1,\n "easAf": 1,\n "eurAf": 1,\n "sasAf": 1,\n "allAn": 5008,\n "afrAn": 1322,\n "amrAn": 694,\n "easAn": 1008,\n "eurAn": 1006,\n "sasAn": 978,\n "allAc": 5008,\n "afrAc": 1322,\n "amrAc": 694,\n "easAc": 1008,\n "eurAc": 1006,\n "sasAc": 978\n },\n "primateAI": [\n {\n "hgnc": "SAMD11",\n "scorePercentile": 0.87\n }\n ],\n "revel": {\n "score": 0.145\n },\n "topmed": {\n "allAf": 0.999809,\n "allAn": 125568,\n "allAc": 125544,\n "allHc": 62760\n },\n "transcripts": [\n {\n "transcript": "ENST00000420190.6",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "downstream_gene_variant"\n ],\n "proteinId": "ENSP00000411579.2"\n },\n {\n "transcript": "ENST00000342066.7",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "1110",\n "cdsPos": "1027",\n "exons": "10/14",\n "proteinPos": "343",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000342066.7:c.1027T>C",\n "hgvsp": "ENSP00000342313.3:p.(Trp343Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000342313.3",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000618181.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "732",\n "cdsPos": "652",\n "exons": "7/11",\n "proteinPos": "218",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000618181.4:c.652T>C",\n "hgvsp": "ENSP00000480870.1:p.(Trp218Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000480870.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000622503.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "1110",\n "cdsPos": "1030",\n "exons": "10/14",\n "proteinPos": "344",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000622503.4:c.1030T>C",\n "hgvsp": "ENSP00000482138.1:p.(Trp344Arg)",\n "isCanonical": true,\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000482138.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000618323.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "cTg/cCg",\n "aminoAcids": "L/P",\n "cdnaPos": "712",\n "cdsPos": "632",\n "exons": "8/12",\n "proteinPos": "211",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000618323.4:c.632T>C",\n "hgvsp": "ENSP00000480678.1:p.(Leu211Pro)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "unknown",\n "proteinId": "ENSP00000480678.1",\n "siftScore": 0.03,\n "siftPrediction": "deleterious - low confidence"\n },\n {\n "transcript": "ENST00000616016.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "ccT/ccC",\n "aminoAcids": "P",\n "cdnaPos": "944",\n "cdsPos": "864",\n "exons": "9/13",\n "proteinPos": "288",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "synonymous_variant"\n ],\n "hgvsc": "ENST00000616016.4:c.864T>C",\n "hgvsp": "ENST00000616016.4:c.864T>C(p.(Pro288=))",\n "proteinId": "ENSP00000478421.1"\n },\n {\n "transcript": "ENST00000618779.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "921",\n "cdsPos": "841",\n "exons": "9/13",\n "proteinPos": "281",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000618779.4:c.841T>C",\n "hgvsp": "ENSP00000484256.1:p.(Trp281Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000484256.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000616125.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "783",\n "cdsPos": "703",\n "exons": "8/12",\n "proteinPos": "235",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000616125.4:c.703T>C",\n "hgvsp": "ENSP00000484643.1:p.(Trp235Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000484643.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000620200.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "cTg/cCg",\n "aminoAcids": "L/P",\n "cdnaPos": "427",\n "cdsPos": "347",\n "exons": "5/9",\n "proteinPos": "116",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000620200.4:c.347T>C",\n "hgvsp": "ENSP00000484820.1:p.(Leu116Pro)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "unknown",\n "proteinId": "ENSP00000484820.1",\n "siftScore": 0.16,\n "siftPrediction": "tolerated - low confidence"\n },\n {\n "transcript": "ENST00000617307.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "867",\n "cdsPos": "787",\n "exons": "9/13",\n "proteinPos": "263",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000617307.4:c.787T>C",\n "hgvsp": "ENSP00000482090.1:p.(Trp263Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000482090.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "NM_152486.2",\n "source": "RefSeq",\n "bioType": "protein_coding",\n "codons": "Cgg/Cgg",\n "aminoAcids": "R",\n "cdnaPos": "1107",\n "cdsPos": "1027",\n "exons": "10/14",\n "proteinPos": "343",\n "geneId": "148398",\n "hgnc": "SAMD11",\n "consequence": [\n "synonymous_variant"\n ],\n "hgvsc": "NM_152486.2:c.1027T>C",\n "hgvsp": "NM_152486.2:c.1027T>C(p.(Arg343=))",\n "isCanonical": true,\n "proteinId": "NP_689699.2"\n },\n {\n "transcript": "ENST00000341065.8",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "750",\n "cdsPos": "751",\n "exons": "8/12",\n "proteinPos": "251",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000341065.8:c.750T>C",\n "hgvsp": "ENSP00000349216.4:p.(Trp251Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000349216.4",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000455979.1",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "507",\n "cdsPos": "508",\n "exons": "4/7",\n "proteinPos": "170",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000455979.1:c.507T>C",\n "hgvsp": "ENSP00000412228.1:p.(Trp170Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000412228.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000478729.1",\n "source": "Ensembl",\n "bioType": "processed_transcript",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "downstream_gene_variant"\n ]\n },\n {\n "transcript": "ENST00000474461.1",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "cdnaPos": "389",\n "exons": "3/4",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "non_coding_transcript_exon_variant"\n ],\n "hgvsc": "ENST00000474461.1:n.389T>C"\n },\n {\n "transcript": "ENST00000466827.1",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "cdnaPos": "191",\n "exons": "2/2",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "non_coding_transcript_exon_variant"\n ],\n "hgvsc": "ENST00000466827.1:n.191T>C"\n },\n {\n "transcript": "ENST00000464948.1",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "cdnaPos": "286",\n "exons": "1/2",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "non_coding_transcript_exon_variant"\n ],\n "hgvsc": "ENST00000464948.1:n.286T>C"\n },\n {\n "transcript": "NM_015658.3",\n "source": "RefSeq",\n "bioType": "protein_coding",\n "geneId": "26155",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ],\n "isCanonical": true,\n "proteinId": "NP_056473.2"\n },\n {\n "transcript": "ENST00000483767.5",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "geneId": "ENSG00000188976",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ]\n },\n {\n "transcript": "ENST00000327044.6",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "geneId": "ENSG00000188976",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ],\n "isCanonical": true,\n "proteinId": "ENSP00000317992.6"\n },\n {\n "transcript": "ENST00000477976.5",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "geneId": "ENSG00000188976",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ]\n },\n {\n "transcript": "ENST00000496938.1",\n "source": "Ensembl",\n "bioType": "processed_transcript",\n "geneId": "ENSG00000188976",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ]\n }\n ]\n }\n ]\n}\n]}\n')))}d.isMDXComponent=!0},66410:(n,e,t)=>{t.d(e,{Z:()=>o});const o=t.p+"assets/images/JSON-Layout-fc8e5c0cf4c8428981cd206fe9b6feac.svg"}}]); \ No newline at end of file diff --git a/assets/js/e1e7c361.2eaea79b.js b/assets/js/e1e7c361.2eaea79b.js new file mode 100644 index 00000000..fca5cf95 --- /dev/null +++ b/assets/js/e1e7c361.2eaea79b.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1443],{3905:(n,e,t)=>{t.d(e,{Zo:()=>p,kt:()=>m});var o=t(7294);function i(n,e,t){return e in n?Object.defineProperty(n,e,{value:t,enumerable:!0,configurable:!0,writable:!0}):n[e]=t,n}function r(n,e){var t=Object.keys(n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(n);e&&(o=o.filter((function(e){return Object.getOwnPropertyDescriptor(n,e).enumerable}))),t.push.apply(t,o)}return t}function a(n){for(var e=1;e=0||(i[t]=n[t]);return i}(n,e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(n);for(o=0;o=0||Object.prototype.propertyIsEnumerable.call(n,t)&&(i[t]=n[t])}return i}var c=o.createContext({}),l=function(n){var e=o.useContext(c),t=e;return n&&(t="function"==typeof n?n(e):a(a({},e),n)),t},p=function(n){var e=l(n.components);return o.createElement(c.Provider,{value:e},n.children)},d="mdxType",g={inlineCode:"code",wrapper:function(n){var e=n.children;return o.createElement(o.Fragment,{},e)}},u=o.forwardRef((function(n,e){var t=n.components,i=n.mdxType,r=n.originalType,c=n.parentName,p=s(n,["components","mdxType","originalType","parentName"]),d=l(t),u=i,m=d["".concat(c,".").concat(u)]||d[u]||g[u]||r;return t?o.createElement(m,a(a({ref:e},p),{},{components:t})):o.createElement(m,a({ref:e},p))}));function m(n,e){var t=arguments,i=e&&e.mdxType;if("string"==typeof n||i){var r=t.length,a=new Array(r);a[0]=u;var s={};for(var c in e)hasOwnProperty.call(e,c)&&(s[c]=e[c]);s.originalType=n,s[d]="string"==typeof n?n:i,a[1]=s;for(var l=2;l{t.r(e),t.d(e,{contentTitle:()=>a,default:()=>d,frontMatter:()=>r,metadata:()=>s,toc:()=>c});var o=t(7462),i=(t(7294),t(3905));const r={title:"Parsing Illumina Connected Annotations JSON"},a=void 0,s={unversionedId:"introduction/parsing-json",id:"introduction/parsing-json",title:"Parsing Illumina Connected Annotations JSON",description:"Parsing JSON",source:"@site/docs/introduction/parsing-json.md",sourceDirName:"introduction",slug:"/introduction/parsing-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/introduction/parsing-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/introduction/parsing-json.md",tags:[],version:"current",frontMatter:{title:"Parsing Illumina Connected Annotations JSON"}},c=[{value:"Parsing JSON",id:"parsing-json",children:[{value:"Organization",id:"organization",children:[],level:3},{value:"JASIX",id:"jasix",children:[],level:3}],level:2}],l={toc:c},p="wrapper";function d(n){let{components:e,...r}=n;return(0,i.kt)(p,(0,o.Z)({},l,r,{components:e,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"parsing-json"},"Parsing JSON"),(0,i.kt)("p",null,"Our JSON files are organized similarly to original VCF variants:"),(0,i.kt)("p",null,(0,i.kt)("img",{src:t(6837).Z})),(0,i.kt)("p",null,"Illumina Connected Annotations JSON files can get very large and sometimes we receive feedback that a bioinformatician tried to read the JSON file into Python or R resulting in a program that ran out of available RAM. This happens because those parsers try to load everything into memory all at once."),(0,i.kt)("p",null,"To get around those issues, we play some clever tricks with newlines that enables our users to parse our JSON files quickly and efficiently."),(0,i.kt)("h3",{id:"organization"},"Organization"),(0,i.kt)("p",null,"Our JSON file is arranged as follows:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the header section is located on the first line"),(0,i.kt)("li",{parentName:"ul"},"each line after that corresponds to a position (same as a row in a VCF file)",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"until you reach the genes section ",(0,i.kt)("inlineCode",{parentName:"li"},'],"genes":[')))),(0,i.kt)("li",{parentName:"ul"},"each line after that corresponds to a gene",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"until you reach the end ",(0,i.kt)("inlineCode",{parentName:"li"},"]}"))))),(0,i.kt)("p",null,"Knowing this, you can load each position line as an independent JSON object and extract the information you need. "),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Jupyter Notebook")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"To demonstrate this, we have put together a ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/blob/master/static/files/parse-nirvana-json-python.ipynb"},"Jupyter notebook demonstrating how to do this in Python")," and a ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/blob/master/static/files/parse-nirvana-json-r.ipynb"},"R version")," as well."))),(0,i.kt)("h3",{id:"jasix"},"JASIX"),(0,i.kt)("p",null,"One of the tools that we really like in the VCF ecosystem is ",(0,i.kt)("a",{parentName:"p",href:"https://dx.doi.org/10.1093%2Fbioinformatics%2Fbtq671"},"tabix"),". Unfortunately, tabix only works for tab-delimited file formats. As a result, we created a similar tool for Illumina Connected Annotations JSON files called JASIX."),(0,i.kt)("p",null,"Here's an example of how you might use JASIX:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/net6.0/Jasix.dll -i dragen.json.gz -q chr1:942450-942455\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the Illumina Connected Annotations JSON path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-q")," argument specifies a genomic range ",(0,i.kt)("em",{parentName:"li"},"(you can use as many of these as you want)"))),(0,i.kt)("p",null,"JASIX also includes additional options for showing the Illumina Connected Annotations header or for extracting different sections (like the genes section)."),(0,i.kt)("p",null,"The output from JASIX is compliant JSON object shown in pretty-printed form:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{"positions":[\n{\n "chromosome": "chr1",\n "position": 942451,\n "refAllele": "T",\n "altAlleles": [\n "C"\n ],\n "quality": 484.23,\n "filters": [\n "PASS"\n ],\n "cytogeneticBand": "1p36.33",\n "samples": [\n {\n "genotype": "1/1",\n "variantFrequencies": [\n 1\n ],\n "totalDepth": 21,\n "genotypeQuality": 60,\n "alleleDepths": [\n 0,\n 21\n ]\n },\n {\n "genotype": "1/1",\n "variantFrequencies": [\n 1\n ],\n "totalDepth": 32,\n "genotypeQuality": 93,\n "alleleDepths": [\n 0,\n 32\n ]\n },\n {\n "genotype": "1/1",\n "variantFrequencies": [\n 1\n ],\n "totalDepth": 36,\n "genotypeQuality": 105,\n "alleleDepths": [\n 0,\n 36\n ]\n }\n ],\n "variants": [\n {\n "vid": "1-942451-T-C",\n "chromosome": "chr1",\n "begin": 942451,\n "end": 942451,\n "refAllele": "T",\n "altAllele": "C",\n "variantType": "SNV",\n "hgvsg": "NC_000001.11:g.942451T>C",\n "phylopScore": -0.1,\n "clinvar": [\n {\n "id": "VCV000836156.1",\n "reviewStatus": "criteria provided, single submitter",\n "significance": [\n "uncertain significance"\n ],\n "refAllele": "T",\n "altAllele": "T",\n "lastUpdatedDate": "2020-08-20"\n },\n {\n "id": "RCV001037211.1",\n "variationId": 836156,\n "reviewStatus": "criteria provided, single submitter",\n "alleleOrigins": [\n "germline"\n ],\n "refAllele": "T",\n "altAllele": "T",\n "phenotypes": [\n "not provided"\n ],\n "medGenIds": [\n "CN517202"\n ],\n "significance": [\n "uncertain significance"\n ],\n "lastUpdatedDate": "2020-08-20",\n "pubMedIds": [\n "28492532"\n ]\n }\n ],\n "dbsnp": [\n "rs6672356"\n ],\n "gnomad": {\n "coverage": 25,\n "allAf": 0.999855,\n "allAn": 123742,\n "allAc": 123724,\n "allHc": 61853,\n "afrAf": 0.999416,\n "afrAn": 10278,\n "afrAc": 10272,\n "afrHc": 5133,\n "amrAf": 0.99995,\n "amrAn": 20008,\n "amrAc": 20007,\n "amrHc": 10003,\n "easAf": 1,\n "easAn": 6054,\n "easAc": 6054,\n "easHc": 3027,\n "finAf": 1,\n "finAn": 8696,\n "finAc": 8696,\n "finHc": 4348,\n "nfeAf": 0.999899,\n "nfeAn": 49590,\n "nfeAc": 49585,\n "nfeHc": 24790,\n "asjAf": 1,\n "asjAn": 7208,\n "asjAc": 7208,\n "asjHc": 3604,\n "sasAf": 0.99967,\n "sasAn": 18160,\n "sasAc": 18154,\n "sasHc": 9074,\n "othAf": 1,\n "othAn": 3748,\n "othAc": 3748,\n "othHc": 1874,\n "maleAf": 0.9999,\n "maleAn": 69780,\n "maleAc": 69773,\n "maleHc": 34883,\n "femaleAf": 0.999796,\n "femaleAn": 53962,\n "femaleAc": 53951,\n "femaleHc": 26970,\n "controlsAllAf": 0.999815,\n "controlsAllAn": 48654,\n "controlsAllAc": 48645\n },\n "oneKg": {\n "allAf": 1,\n "afrAf": 1,\n "amrAf": 1,\n "easAf": 1,\n "eurAf": 1,\n "sasAf": 1,\n "allAn": 5008,\n "afrAn": 1322,\n "amrAn": 694,\n "easAn": 1008,\n "eurAn": 1006,\n "sasAn": 978,\n "allAc": 5008,\n "afrAc": 1322,\n "amrAc": 694,\n "easAc": 1008,\n "eurAc": 1006,\n "sasAc": 978\n },\n "primateAI": [\n {\n "hgnc": "SAMD11",\n "scorePercentile": 0.87\n }\n ],\n "revel": {\n "score": 0.145\n },\n "topmed": {\n "allAf": 0.999809,\n "allAn": 125568,\n "allAc": 125544,\n "allHc": 62760\n },\n "transcripts": [\n {\n "transcript": "ENST00000420190.6",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "downstream_gene_variant"\n ],\n "proteinId": "ENSP00000411579.2"\n },\n {\n "transcript": "ENST00000342066.7",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "1110",\n "cdsPos": "1027",\n "exons": "10/14",\n "proteinPos": "343",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000342066.7:c.1027T>C",\n "hgvsp": "ENSP00000342313.3:p.(Trp343Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000342313.3",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000618181.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "732",\n "cdsPos": "652",\n "exons": "7/11",\n "proteinPos": "218",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000618181.4:c.652T>C",\n "hgvsp": "ENSP00000480870.1:p.(Trp218Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000480870.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000622503.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "1110",\n "cdsPos": "1030",\n "exons": "10/14",\n "proteinPos": "344",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000622503.4:c.1030T>C",\n "hgvsp": "ENSP00000482138.1:p.(Trp344Arg)",\n "isCanonical": true,\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000482138.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000618323.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "cTg/cCg",\n "aminoAcids": "L/P",\n "cdnaPos": "712",\n "cdsPos": "632",\n "exons": "8/12",\n "proteinPos": "211",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000618323.4:c.632T>C",\n "hgvsp": "ENSP00000480678.1:p.(Leu211Pro)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "unknown",\n "proteinId": "ENSP00000480678.1",\n "siftScore": 0.03,\n "siftPrediction": "deleterious - low confidence"\n },\n {\n "transcript": "ENST00000616016.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "ccT/ccC",\n "aminoAcids": "P",\n "cdnaPos": "944",\n "cdsPos": "864",\n "exons": "9/13",\n "proteinPos": "288",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "synonymous_variant"\n ],\n "hgvsc": "ENST00000616016.4:c.864T>C",\n "hgvsp": "ENST00000616016.4:c.864T>C(p.(Pro288=))",\n "proteinId": "ENSP00000478421.1"\n },\n {\n "transcript": "ENST00000618779.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "921",\n "cdsPos": "841",\n "exons": "9/13",\n "proteinPos": "281",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000618779.4:c.841T>C",\n "hgvsp": "ENSP00000484256.1:p.(Trp281Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000484256.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000616125.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "783",\n "cdsPos": "703",\n "exons": "8/12",\n "proteinPos": "235",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000616125.4:c.703T>C",\n "hgvsp": "ENSP00000484643.1:p.(Trp235Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000484643.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000620200.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "cTg/cCg",\n "aminoAcids": "L/P",\n "cdnaPos": "427",\n "cdsPos": "347",\n "exons": "5/9",\n "proteinPos": "116",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000620200.4:c.347T>C",\n "hgvsp": "ENSP00000484820.1:p.(Leu116Pro)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "unknown",\n "proteinId": "ENSP00000484820.1",\n "siftScore": 0.16,\n "siftPrediction": "tolerated - low confidence"\n },\n {\n "transcript": "ENST00000617307.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "867",\n "cdsPos": "787",\n "exons": "9/13",\n "proteinPos": "263",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000617307.4:c.787T>C",\n "hgvsp": "ENSP00000482090.1:p.(Trp263Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000482090.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "NM_152486.2",\n "source": "RefSeq",\n "bioType": "protein_coding",\n "codons": "Cgg/Cgg",\n "aminoAcids": "R",\n "cdnaPos": "1107",\n "cdsPos": "1027",\n "exons": "10/14",\n "proteinPos": "343",\n "geneId": "148398",\n "hgnc": "SAMD11",\n "consequence": [\n "synonymous_variant"\n ],\n "hgvsc": "NM_152486.2:c.1027T>C",\n "hgvsp": "NM_152486.2:c.1027T>C(p.(Arg343=))",\n "isCanonical": true,\n "proteinId": "NP_689699.2"\n },\n {\n "transcript": "ENST00000341065.8",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "750",\n "cdsPos": "751",\n "exons": "8/12",\n "proteinPos": "251",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000341065.8:c.750T>C",\n "hgvsp": "ENSP00000349216.4:p.(Trp251Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000349216.4",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000455979.1",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "507",\n "cdsPos": "508",\n "exons": "4/7",\n "proteinPos": "170",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000455979.1:c.507T>C",\n "hgvsp": "ENSP00000412228.1:p.(Trp170Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000412228.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000478729.1",\n "source": "Ensembl",\n "bioType": "processed_transcript",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "downstream_gene_variant"\n ]\n },\n {\n "transcript": "ENST00000474461.1",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "cdnaPos": "389",\n "exons": "3/4",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "non_coding_transcript_exon_variant"\n ],\n "hgvsc": "ENST00000474461.1:n.389T>C"\n },\n {\n "transcript": "ENST00000466827.1",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "cdnaPos": "191",\n "exons": "2/2",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "non_coding_transcript_exon_variant"\n ],\n "hgvsc": "ENST00000466827.1:n.191T>C"\n },\n {\n "transcript": "ENST00000464948.1",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "cdnaPos": "286",\n "exons": "1/2",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "non_coding_transcript_exon_variant"\n ],\n "hgvsc": "ENST00000464948.1:n.286T>C"\n },\n {\n "transcript": "NM_015658.3",\n "source": "RefSeq",\n "bioType": "protein_coding",\n "geneId": "26155",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ],\n "isCanonical": true,\n "proteinId": "NP_056473.2"\n },\n {\n "transcript": "ENST00000483767.5",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "geneId": "ENSG00000188976",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ]\n },\n {\n "transcript": "ENST00000327044.6",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "geneId": "ENSG00000188976",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ],\n "isCanonical": true,\n "proteinId": "ENSP00000317992.6"\n },\n {\n "transcript": "ENST00000477976.5",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "geneId": "ENSG00000188976",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ]\n },\n {\n "transcript": "ENST00000496938.1",\n "source": "Ensembl",\n "bioType": "processed_transcript",\n "geneId": "ENSG00000188976",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ]\n }\n ]\n }\n ]\n}\n]}\n')))}d.isMDXComponent=!0},6837:(n,e,t)=>{t.d(e,{Z:()=>o});const o=t.p+"assets/images/JSON-Layout-fc8e5c0cf4c8428981cd206fe9b6feac.svg"}}]); \ No newline at end of file diff --git a/assets/js/e1f9a248.86f120a1.js b/assets/js/e1f9a248.86f120a1.js deleted file mode 100644 index f9b9f9d2..00000000 --- a/assets/js/e1f9a248.86f120a1.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1063],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>u});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var c=r.createContext({}),s=function(e){var t=r.useContext(c),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},p=function(e){var t=s(e.components);return r.createElement(c.Provider,{value:t},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},g=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,c=e.parentName,p=i(e,["components","mdxType","originalType","parentName"]),d=s(n),g=a,u=d["".concat(c,".").concat(g)]||d[g]||m[g]||o;return n?r.createElement(u,l(l({ref:t},p),{},{components:n})):r.createElement(u,l({ref:t},p))}));function u(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=g;var i={};for(var c in t)hasOwnProperty.call(t,c)&&(i[c]=t[c]);i.originalType=e,i[d]="string"==typeof e?e:a,l[1]=i;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>d,frontMatter:()=>o,metadata:()=>i,toc:()=>c});var r=n(87462),a=(n(67294),n(3905));const o={},l=void 0,i={unversionedId:"data-sources/fusioncatcher-json",id:"version-3.17/data-sources/fusioncatcher-json",title:"fusioncatcher-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/fusioncatcher-json.md",sourceDirName:"data-sources",slug:"/data-sources/fusioncatcher-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/fusioncatcher-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/fusioncatcher-json.md",tags:[],version:"3.17",frontMatter:{}},c=[{value:"genes",id:"genes",children:[],level:4},{value:"gene",id:"gene",children:[],level:4}],s={toc:c},p="wrapper";function d(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},' "fusionCatcher":[\n {\n "genes":{\n "first":{\n "hgnc":"ETV6",\n "isOncogene":true\n },\n "second":{\n "hgnc":"RUNX1"\n },\n "isParalogPair":true,\n "isPseudogenePair":true,\n "isReadthrough":true\n },\n "germlineSources":[\n "1000 Genomes Project"\n ],\n "somaticSources":[\n "COSMIC",\n "TCGA oesophageal carcinomas"\n ]\n }\n ]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"genes"),(0,a.kt)("td",{parentName:"tr",align:"center"},"genes object"),(0,a.kt)("td",{parentName:"tr",align:"left"},"5' gene & 3' gene")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"germlineSources"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"matches in known germline data sources")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"somaticSources"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"matches in known somatic data sources")))),(0,a.kt)("h4",{id:"genes"},"genes"),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"first"),(0,a.kt)("td",{parentName:"tr",align:"center"},"gene object"),(0,a.kt)("td",{parentName:"tr",align:"left"},"5' gene")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"second"),(0,a.kt)("td",{parentName:"tr",align:"center"},"gene object"),(0,a.kt)("td",{parentName:"tr",align:"left"},"3' gene")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"isParalogPair"),(0,a.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,a.kt)("td",{parentName:"tr",align:"left"},"true when both genes are paralogs for each other")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"isPseudogenePair"),(0,a.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,a.kt)("td",{parentName:"tr",align:"left"},"true when both genes are pseudogenes for each other")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"isReadthrough"),(0,a.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,a.kt)("td",{parentName:"tr",align:"left"},"true when this fusion gene is a readthrough event (both are on the same strand and there are no genes between them)")))),(0,a.kt)("h4",{id:"gene"},"gene"),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"isOncogene"),(0,a.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,a.kt)("td",{parentName:"tr",align:"left"},"true when this gene is an oncogene")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/e20ab9db.54f83c7d.js b/assets/js/e20ab9db.54f83c7d.js deleted file mode 100644 index e94d0b89..00000000 --- a/assets/js/e20ab9db.54f83c7d.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6403],{3905:(t,e,n)=>{n.d(e,{Zo:()=>s,kt:()=>f});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function o(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function i(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var l=r.createContext({}),p=function(t){var e=r.useContext(l),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},s=function(t){var e=p(t.components);return r.createElement(l.Provider,{value:e},t.children)},d="mdxType",m={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},u=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,o=t.originalType,l=t.parentName,s=c(t,["components","mdxType","originalType","parentName"]),d=p(n),u=a,f=d["".concat(l,".").concat(u)]||d[u]||m[u]||o;return n?r.createElement(f,i(i({ref:e},s),{},{components:n})):r.createElement(f,i({ref:e},s))}));function f(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var o=n.length,i=new Array(o);i[0]=u;var c={};for(var l in e)hasOwnProperty.call(e,l)&&(c[l]=e[l]);c.originalType=t,c[d]="string"==typeof t?t:a,i[1]=c;for(var p=2;p{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>d,frontMatter:()=>o,metadata:()=>c,toc:()=>l});var r=n(87462),a=(n(67294),n(3905));const o={},i=void 0,c={unversionedId:"data-sources/splice-ai-json",id:"version-3.21/data-sources/splice-ai-json",title:"splice-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/splice-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/splice-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/splice-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/splice-ai-json.md",tags:[],version:"3.21",frontMatter:{}},l=[],p={toc:l},s="wrapper";function d(t){let{components:e,...n}=t;return(0,a.kt)(s,(0,r.Z)({},p,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"spliceAI":[ \n {\n "hgnc":"BLCAP",\n "acceptorGainDistance":-3,\n "acceptorGainScore":0.3,\n "donorLossDistance":7,\n "donorLossScore":0.9\n },\n { \n "hgnc":"NNAT",\n "acceptorGainDistance":-1,\n "acceptorGainScore":0.2,\n "donorGainDistance":-2,\n "donorGainScore":0.3\n }\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorGainDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorGainScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorLossDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorLossScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorGainDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorGainScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorLossDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorLossScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/e286457f.af93973e.js b/assets/js/e286457f.af93973e.js new file mode 100644 index 00000000..9ed4fcde --- /dev/null +++ b/assets/js/e286457f.af93973e.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4773],{3905:(t,e,a)=>{a.d(e,{Zo:()=>p,kt:()=>g});var n=a(7294);function l(t,e,a){return e in t?Object.defineProperty(t,e,{value:a,enumerable:!0,configurable:!0,writable:!0}):t[e]=a,t}function i(t,e){var a=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),a.push.apply(a,n)}return a}function r(t){for(var e=1;e=0||(l[a]=t[a]);return l}(t,e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,a)&&(l[a]=t[a])}return l}var s=n.createContext({}),m=function(t){var e=n.useContext(s),a=e;return t&&(a="function"==typeof t?t(e):r(r({},e),t)),a},p=function(t){var e=m(t.components);return n.createElement(s.Provider,{value:e},t.children)},d="mdxType",k={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},c=n.forwardRef((function(t,e){var a=t.components,l=t.mdxType,i=t.originalType,s=t.parentName,p=o(t,["components","mdxType","originalType","parentName"]),d=m(a),c=l,g=d["".concat(s,".").concat(c)]||d[c]||k[c]||i;return a?n.createElement(g,r(r({ref:e},p),{},{components:a})):n.createElement(g,r({ref:e},p))}));function g(t,e){var a=arguments,l=e&&e.mdxType;if("string"==typeof t||l){var i=a.length,r=new Array(i);r[0]=c;var o={};for(var s in e)hasOwnProperty.call(e,s)&&(o[s]=e[s]);o.originalType=t,o[d]="string"==typeof t?t:l,r[1]=o;for(var m=2;m{a.r(e),a.d(e,{contentTitle:()=>r,default:()=>d,frontMatter:()=>i,metadata:()=>o,toc:()=>s});var n=a(7462),l=(a(7294),a(3905));const i={title:"Custom Annotations"},r=void 0,o={unversionedId:"file-formats/custom-annotations",id:"file-formats/custom-annotations",title:"Custom Annotations",description:"Overview",source:"@site/docs/file-formats/custom-annotations.md",sourceDirName:"file-formats",slug:"/file-formats/custom-annotations",permalink:"/IlluminaConnectedAnnotationsDocumentation/file-formats/custom-annotations",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/file-formats/custom-annotations.md",tags:[],version:"current",frontMatter:{title:"Custom Annotations"},sidebar:"docs",previous:{title:"Illumina Connected Annotations JSON File Format",permalink:"/IlluminaConnectedAnnotationsDocumentation/file-formats/illumina-annotator-json-file-format"},next:{title:"Canonical Transcripts",permalink:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/canonical-transcripts"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Variant File Format",id:"variant-file-format",children:[{value:"Basic Allele Frequency Example",id:"basic-allele-frequency-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv",children:[],level:4},{value:"Convert to Illumina Connected Annotations Format",id:"convert-to-illumina-connected-annotations-format",children:[],level:4},{value:"Annotate with Illumina Connected Annotations",id:"annotate-with-illumina-connected-annotations",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results",children:[],level:4}],level:3},{value:"Categories & Descriptions Example",id:"categories--descriptions-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-1",children:[],level:4},{value:"Annotate with Illumina Connected Annotations",id:"annotate-with-illumina-connected-annotations-1",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-1",children:[],level:4},{value:"Using Positional Matches",id:"using-positional-matches",children:[],level:4}],level:3},{value:"Genomic Region Example",id:"genomic-region-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-2",children:[],level:4},{value:"Annotate with Illumina Connected Annotations",id:"annotate-with-illumina-connected-annotations-2",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-2",children:[],level:4}],level:3},{value:"Genomic Regions for Structural Variants Example",id:"genomic-regions-for-structural-variants-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-3",children:[],level:4},{value:"Annotate with Illumina Connected Annotations",id:"annotate-with-illumina-connected-annotations-3",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-3",children:[],level:4}],level:3},{value:"Mixing Small Variants and Genomic Regions",id:"mixing-small-variants-and-genomic-regions",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-4",children:[],level:4},{value:"Annotate with Illumina Connected Annotations",id:"annotate-with-illumina-connected-annotations-4",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-4",children:[],level:4}],level:3}],level:2},{value:"Gene File Format",id:"gene-file-format",children:[{value:"Basic Gene Example",id:"basic-gene-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-5",children:[],level:4},{value:"Annotate with Illumina Connected Annotations",id:"annotate-with-illumina-connected-annotations-5",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-5",children:[],level:4}],level:3}],level:2},{value:"Customizing the Header",id:"customizing-the-header",children:[{value:"Title",id:"title",children:[],level:3},{value:"Genome Assemblies",id:"genome-assemblies",children:[],level:3},{value:"Matching Criteria",id:"matching-criteria",children:[],level:3},{value:"Categories",id:"categories",children:[],level:3},{value:"Descriptions",id:"descriptions",children:[{value:"Populations",id:"populations",children:[],level:4}],level:3},{value:"Data Types",id:"data-types",children:[],level:3}],level:2},{value:"Using SAUtils",id:"using-sautils",children:[{value:"Convert Variant File",id:"convert-variant-file",children:[],level:3},{value:"Convert Gene File",id:"convert-gene-file",children:[],level:3}],level:2}],m={toc:s},p="wrapper";function d(t){let{components:e,...a}=t;return(0,l.kt)(p,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,l.kt)("h2",{id:"overview"},"Overview"),(0,l.kt)("p",null,"While the team tries to keep data sources up-to-date, you might want to start incorporate new annotations ahead of our update cycle. Another\ncommon use case involves protected health information (PHI). Custom annotations are a mechanism that enables both use cases."),(0,l.kt)("p",null,"Here are some examples of how our collaborators use custom annotations:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"associating context from both a sample-level and a sample cohort level with the variant annotations"),(0,l.kt)("li",{parentName:"ul"},"adding content that is licensed (e.g. HGMD) to the variant annotations")),(0,l.kt)("p",null,"At the moment, we have two different custom annotation file formats. One provides additional annotations to variants (both small variants and SVs)\nwhile the other caters to gene annotations."),(0,l.kt)("p",null,"In both cases, the custom annotation file format is a tab-delimited file that is separated into two parts: the header & the data."),(0,l.kt)("p",null,"The header is where you can customize how you want the data to appear in the JSON file and provide context about the genome assembly and how\nIllumina Connected Annotations should match the variants."),(0,l.kt)("p",null,"At Illumina, there are usually many components downstream of Illumina Connected Annotations that have to parse our annotations. If a customer provides a custom\nannotation, those downstream tools need to understand more about the data such as:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"data type (e.g. number, boolean, or a string)"),(0,l.kt)("li",{parentName:"ul"},"data category (e.g. is this an allele count, allele number, allele frequency, etc.)"),(0,l.kt)("li",{parentName:"ul"},"associated population (i.e. if this is an allele frequency)")),(0,l.kt)("p",null,"For each custom annotation, Illumina Connected Annotations uses this context to create a ",(0,l.kt)("a",{parentName:"p",href:"https://json-schema.org/"},"JSON schema")," that can be sent to downstream tools. If\na tool knows that this is an allele frequency, it can validate user input to ensure that it's in the range of ","[0, 1]","."),(0,l.kt)("h2",{id:"variant-file-format"},"Variant File Format"),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"File Format")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Illumina Connected Annotations expects plain text (or gzipped text) files. Using tools like Excel can add extra characters that can break parsing. We highly recommend creating and modifying these files with plain text editor like Notepad, Notepad++ or Atom."))),(0,l.kt)("h3",{id:"basic-allele-frequency-example"},"Basic Allele Frequency Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Imagine that you want to create a basic allele frequency custom annotation for small variants. If we visualized the tab-delimited file\n(TSV), it would look something like this:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allAf")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleFrequency")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"number")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"23603511"),(0,l.kt)("td",{parentName:"tr",align:"left"},"TGA"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006579")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"68801894"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006569")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr19"),(0,l.kt)("td",{parentName:"tr",align:"left"},"11107436"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.00003291")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/MyDataSource.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over the header and discuss the contents:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"title")," indicates the name of the JSON key"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"assembly")," indicates that this data is only valid for ",(0,l.kt)("inlineCode",{parentName:"li"},"GRCh38"),"."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"matchVariantsBy")," indicates how annotations should be matched and reported. In this case annotations will be matched and reported by allele."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"categories")," provides hints to downstream tools on how they might want to treat the data. In this case, we indicate that it's an allele frequency."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"descriptions")," are used in special circumstances to provide more context. Even though column 5 is called ",(0,l.kt)("inlineCode",{parentName:"li"},"allAf"),", it might not be clear to a\ndownstream tool that this means a global allele frequency using all sub-populations. In this case, ",(0,l.kt)("inlineCode",{parentName:"li"},"ALL")," indicates the intended population."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"type")," indicates to downstream tools the data type. Since allele frequencies are numbers, we'll write ",(0,l.kt)("inlineCode",{parentName:"li"},"number")," in this column.")),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Reference Base Checking")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Illumina Connected Annotations validates all the reference bases in a custom annotation. If a variant or genomic region is specified that has the wrong reference base, an error will be produced."))),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Sorting")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"The variants within each chromosome must be sorted by genomic position."))),(0,l.kt)("h4",{id:"convert-to-illumina-connected-annotations-format"},"Convert to Illumina Connected Annotations Format"),(0,l.kt)("p",null,"First we need to convert the TSV file to Illumina Connected Annotations's native file format and let's put that file in a new directory called CA:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"$ mkdir CA\n$ dotnet bin/Release/netcoreapp2.1/SAUtils.dll customvar \\\n -r Data/References/Homo_sapiens.GRCh38.Nirvana.dat -i MyDataSource.tsv -o CA\n---------------------------------------------------------------------------\nSAUtils (c) 2020 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0\n---------------------------------------------------------------------------\n\nChromosome 16 completed in 00:00:00.1\nChromosome 19 completed in 00:00:00.0\n\nTime: 00:00:00.2\n")),(0,l.kt)("h4",{id:"annotate-with-illumina-connected-annotations"},"Annotate with Illumina Connected Annotations"),(0,l.kt)("p",null,"Let's annotate the following VCF (notice that it's one of the variants that we have in our custom annotation):"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n16 68801894 . G A . . .\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/TestCA.vcf"},"the full VCF file"),"."),(0,l.kt)("p",null,"Since Illumina Connected Annotations can handle multiple directories with external annotations, all we need to do is specify our new CA directory in addition to\nthe normal Illumina Connected Annotations command-line."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash",metastring:"{3}","{3}":!0},"$ dotnet Annotator.dll -c Data/Cache/GRCh38/Both \\\n -r Data/References/Homo_sapiens.GRCh38.Nirvana.dat \\\n --sd Data/SupplementaryAnnotation/GRCh38 --sd CA -i TestCA.vcf -o TestCA\n---------------------------------------------------------------------------\nIlluminaConnectedAnnotations (c) 2020 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0\n---------------------------------------------------------------------------\n\nInitialization Time Positions/s\n---------------------------------------------------------------------------\nCache 00:00:01.8\nSA Position Scan 00:00:00.0 19\n\nReference Preload Annotation Variants/s\n---------------------------------------------------------------------------\nchr16 00:00:00.2 00:00:01.3 1\n\nSummary Time Percent\n---------------------------------------------------------------------------\nInitialization 00:00:01.9 25.5 %\nPreload 00:00:00.2 3.3 %\nAnnotation 00:00:01.3 18.2 %\n\nTime: 00:00:06.3\n")),(0,l.kt)("h4",{id:"investigate-the-results"},"Investigate the Results"),(0,l.kt)("p",null,"We would expect the following data to show up in our JSON output file:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-16}","{12-16}":!0},' "variants": [\n {\n "vid": "16-68801894-G-A",\n "chromosome": "16",\n "begin": 68801894,\n "end": 68801894,\n "refAllele": "G",\n "altAllele": "A",\n "variantType": "SNV",\n "hgvsg": "NC_000016.10:g.68801894G>A",\n "phylopScore": 1,\n "MyDataSource": {\n "refAllele": "G",\n "altAllele": "A",\n "allAf": 7e-06\n },\n "clinvar": [\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/TestCA.json.gz"},"the full JSON file"),"."),(0,l.kt)("p",null,"Illumina Connected Annotations preserves up to 6 decimal places for allele frequency data."),(0,l.kt)("h3",{id:"categories--descriptions-example"},"Categories & Descriptions Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-1"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Building on the previous example, we can add other types of annotations like predictions and general notes."),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 6"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 7"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,l.kt)("td",{parentName:"tr",align:"left"},"pathogenicity"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleFrequency"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Prediction"),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"number"),(0,l.kt)("td",{parentName:"tr",align:"left"},"string"),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"23603511"),(0,l.kt)("td",{parentName:"tr",align:"left"},"TGA"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006579"),(0,l.kt)("td",{parentName:"tr",align:"left"},"P"),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"68801894"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006569"),(0,l.kt)("td",{parentName:"tr",align:"left"},"LP"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Seen in case 123")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr19"),(0,l.kt)("td",{parentName:"tr",align:"left"},"11107436"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.00003291"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/MyDataSource2.tsv"},"the full TSV file"),"."),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Placeholders")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"You can use a period to denote an empty value (much in the same way as periods are used in VCF files to signify missing values). While\nIllumina Connected Annotations also accepts empty columns in the TSV file, we use them in these examples to promote readability."))),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 6")," adds a field called ",(0,l.kt)("inlineCode",{parentName:"li"},"pathogenicity")," which uses the ",(0,l.kt)("inlineCode",{parentName:"li"},"Prediction")," category. When using this category, Illumina Connected Annotations will\nvalidate to make\nsure that the field contains either the abbreviations (B, LB, VUS, LP, and P) or the long-form equivalents (e.g. benign or pathogenic)."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 7")," adds a field called ",(0,l.kt)("inlineCode",{parentName:"li"},"notes")," and it doesn't have a category or description. We're just going to use it to add some internal\nnotes.")),(0,l.kt)("h4",{id:"annotate-with-illumina-connected-annotations-1"},"Annotate with Illumina Connected Annotations"),(0,l.kt)("p",null,"Let's use a new VCF file. It includes all the same positions as our custom annotation file, but only the middle variant also matches the\nalternate allele (allele-specific match):"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n16 23603511 . TG T . . .\n16 68801894 . G A . . .\n19 11107436 . G C . . .\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/TestCA2.vcf"},"the full VCF file"),"."),(0,l.kt)("h4",{id:"investigate-the-results-1"},"Investigate the Results"),(0,l.kt)("p",null,"Because we specified ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=allele")," in our custom annotation file, only the middle variant will get an annotation:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-18}","{12-18}":!0},' "variants": [\n {\n "vid": "16-68801894-G-A",\n "chromosome": "16",\n "begin": 68801894,\n "end": 68801894,\n "refAllele": "G",\n "altAllele": "A",\n "variantType": "SNV",\n "hgvsg": "NC_000016.10:g.68801894G>A",\n "phylopScore": 1,\n "MyDataSource": {\n "refAllele": "G",\n "altAllele": "A",\n "allAf": 7e-06,\n "pathogenicity": "LP",\n "notes": "Seen in case 123"\n },\n "clinvar": [\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/TestCA2.json.gz"},"the full JSON file"),"."),(0,l.kt)("h4",{id:"using-positional-matches"},"Using Positional Matches"),(0,l.kt)("p",null,"What would happen if we changed to ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=position"),"? Two things will happen. First, our positional variants will now match:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-17}","{12-17}":!0},' "variants": [\n {\n "vid": "16-23603511-TG-T",\n "chromosome": "16",\n "begin": 23603512,\n "end": 23603512,\n "refAllele": "G",\n "altAllele": "-",\n "variantType": "deletion",\n "hgvsg": "NC_000016.10:g.23603512delG",\n "MyDataSource": [\n {\n "refAllele": "GA",\n "altAllele": "-",\n "allAf": 7e-06,\n "pathogenicity": "P"\n }\n ],\n "clinvar": [\n')),(0,l.kt)("p",null,"In addition, you will now see an extra flag for our allele-specific variant:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-20}","{12-20}":!0},' "variants": [\n {\n "vid": "16-68801894-G-A",\n "chromosome": "16",\n "begin": 68801894,\n "end": 68801894,\n "refAllele": "G",\n "altAllele": "A",\n "variantType": "SNV",\n "hgvsg": "NC_000016.10:g.68801894G>A",\n "phylopScore": 1,\n "MyDataSource": [\n {\n "refAllele": "G",\n "altAllele": "A",\n "allAf": 7e-06,\n "pathogenicity": "LP",\n "notes": "Seen in case 123",\n "isAlleleSpecific": true\n }\n ],\n "clinvar": [\n')),(0,l.kt)("h3",{id:"genomic-region-example"},"Genomic Region Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-2"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"In the previous example, we added a note for the middle variant, but sometimes it's handy to annotate a genomic region. Consider the following example:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"END"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"20000000"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"70000000"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Lots of false positives in this region")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/MyDataSource3.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 5")," now has a field called ",(0,l.kt)("inlineCode",{parentName:"li"},"notes"),". In essence, it looks exactly like column 7 from our previous example."),(0,l.kt)("li",{parentName:"ul"},"The main difference is that now one of our custom annotation entries is actually a genomic region. Any variant that overlaps with that region will get a custom annotation.")),(0,l.kt)("p",null,"In the previous example we learned about positional matching vs allele-specific matching. For genomic regions, ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=allele")," and ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=position")," produce\nthe same result."),(0,l.kt)("h4",{id:"annotate-with-illumina-connected-annotations-2"},"Annotate with Illumina Connected Annotations"),(0,l.kt)("p",null,"Let's use the same VCF file as our previous example."),(0,l.kt)("h4",{id:"investigate-the-results-2"},"Investigate the Results"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{9-17}","{9-17}":!0},' {\n "chromosome": "16",\n "position": 23603511,\n "refAllele": "TG",\n "altAlleles": [\n "T"\n ],\n "cytogeneticBand": "16p12.2",\n "MyDataSource": [\n {\n "start": 20000000,\n "end": 70000000,\n "notes": "Lots of false positives in this region",\n "reciprocalOverlap": 0,\n "annotationOverlap": 0\n }\n ],\n "variants": [\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/TestCA3.json.gz"},"the full JSON file"),"."),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Reciprocal & Annotation Overlap")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"For all intervals, Illumina Connected Annotations internally calculates two overlaps: a ",(0,l.kt)("strong",{parentName:"p"},"variant overlap")," and an ",(0,l.kt)("strong",{parentName:"p"},"annotation overlap"),". Variant overlap is the percentage of the variant's length that is\noverlapped. Annotation overlap is the percentage of the annotation's length that is overlap."),(0,l.kt)("p",{parentName:"div"},(0,l.kt)("strong",{parentName:"p"},"Reciprocal overlap")," is the minimum of those two overlaps. Given that the annotation is 50 Mbp and the deletion is one 1 bp, both overlaps will be pretty close to 0."))),(0,l.kt)("p",null,"We will also see this annotation for the other variant on chr16:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{9-17}","{9-17}":!0},' {\n "chromosome": "16",\n "position": 68801894,\n "refAllele": "G",\n "altAlleles": [\n "A"\n ],\n "cytogeneticBand": "16q22.1",\n "MyDataSource": [\n {\n "start": 20000000,\n "end": 70000000,\n "notes": "Lots of false positives in this region",\n "reciprocalOverlap": 0,\n "annotationOverlap": 0\n }\n ],\n "variants": [\n')),(0,l.kt)("h3",{id:"genomic-regions-for-structural-variants-example"},"Genomic Regions for Structural Variants Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-3"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Often we use genomic regions to represent other known CNVs and SVs in the genome. In this use case, we usually don't want to match these regions to other small variants. To force Illumina Connected Annotations to match regions only to other SVs, use the ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=sv")," option in the header. Here is an example:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=sv"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"END"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"20000000"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"70000000"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Lots of false positives in this region")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/MyDataSource6.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"The main difference is the header field ",(0,l.kt)("inlineCode",{parentName:"li"},"#matchVariantsBy=sv")," which indicates that only structural variants that overlap these genomic regions will receive annotations.")),(0,l.kt)("h4",{id:"annotate-with-illumina-connected-annotations-3"},"Annotate with Illumina Connected Annotations"),(0,l.kt)("p",null,"Let's use a new VCF file. It contains the first variant from the previous file and a structural variant deletion- both of which overlap the given genomic region."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n16 23603511 . TG T . . .\n16 68801894 . G . . END=73683789;SVTYPE=DEL\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/TestCA6.vcf"},"the full VCF file"),"."),(0,l.kt)("h4",{id:"investigate-the-results-3"},"Investigate the Results"),(0,l.kt)("p",null,"Note that this time, ",(0,l.kt)("inlineCode",{parentName:"p"},"MyDataSource")," only showed up for the ",(0,l.kt)("inlineCode",{parentName:"p"},"")," and not the deletion ",(0,l.kt)("inlineCode",{parentName:"p"},"16-23603511-TG-T"),"."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{21-29}","{21-29}":!0},' {\n "chromosome": "16",\n "position": 23603511,\n "refAllele": "TG",\n "altAlleles": [\n "T"\n ],\n "cytogeneticBand": "16p12.2",\n "variants": [\n ...\n ...\n {\n "chromosome": "16",\n "position": 68801894,\n "svEnd": 73683789,\n "refAllele": "G",\n "altAlleles": [\n ""\n ],\n "cytogeneticBand": "16q22.1-q22.3",\n "MyDataSource": [\n {\n "start": 20000000,\n "end": 70000000,\n "notes": "Lots of false positives in this region",\n "reciprocalOverlap": 0.02396,\n "annotationOverlap": 0.02396\n }\n ],\n "variants": [\n\n')),(0,l.kt)("h3",{id:"mixing-small-variants-and-genomic-regions"},"Mixing Small Variants and Genomic Regions"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-4"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Previously we looked at examples that either had small variants or genomic regions. Let's create a file that contains both:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 6"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"END"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"23603511"),(0,l.kt)("td",{parentName:"tr",align:"left"},"TGA"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"68801894"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr19"),(0,l.kt)("td",{parentName:"tr",align:"left"},"11107436"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr21"),(0,l.kt)("td",{parentName:"tr",align:"left"},"10510818"),(0,l.kt)("td",{parentName:"tr",align:"left"},"C"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"10699435"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Interval #1")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr21"),(0,l.kt)("td",{parentName:"tr",align:"left"},"10510818"),(0,l.kt)("td",{parentName:"tr",align:"left"},"C"),(0,l.kt)("td",{parentName:"tr",align:"left"},"<","DEL",">"),(0,l.kt)("td",{parentName:"tr",align:"left"},"10699435"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Interval #2")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr22"),(0,l.kt)("td",{parentName:"tr",align:"left"},"12370388"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T[chr22:12370729["),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"Known false-positive")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/MyDataSource4.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 4")," now has the ",(0,l.kt)("inlineCode",{parentName:"li"},"REF")," field. Exception for the case listed below, this is only used by small variants or translocation breakends."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 5")," now has the ",(0,l.kt)("inlineCode",{parentName:"li"},"END")," field. This is only used by genomic regions."),(0,l.kt)("li",{parentName:"ul"},"There are two custom annotations on chr21 and the start and end coordinates look the same, so what's different? Interval #2 has ",(0,l.kt)("strong",{parentName:"li"},"a symbolic allele in the ALT column"),". When this is used in custom annotation, the start position is treated as the padding base (using VCF conventions). When Illumina Connected Annotations matches a variant to interval #2, it will ignore the padding base and consider the start position to be at position 10510819.")),(0,l.kt)("h4",{id:"annotate-with-illumina-connected-annotations-4"},"Annotate with Illumina Connected Annotations"),(0,l.kt)("p",null,"Let's use a new VCF file to study how matching works for intervals #1 and #2:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n21 10510818 . C . . END=10699435;SVTYPE=DUP\n22 12370388 . T T[chr22:12370729[ . . SVTYPE=BND\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/TestCA3.vcf"},"the full VCF file"),"."),(0,l.kt)("p",null,'The first variant is similar to the custom annotation labelled "interval #2". Position 10510818 is the padding base, so it effectively starts at position 10510819.'),(0,l.kt)("h4",{id:"investigate-the-results-4"},"Investigate the Results"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{11-26}","{11-26}":!0},' "positions": [\n {\n "chromosome": "21",\n "position": 10510818,\n "svEnd": 10699435,\n "refAllele": "C",\n "altAlleles": [\n ""\n ],\n "cytogeneticBand": "21p11.2",\n "MyDataSource": [\n {\n "start": 10510818,\n "end": 10699435,\n "notes": "Interval #1",\n "reciprocalOverlap": 0.99999,\n "annotationOverlap": 0.99999\n },\n {\n "start": 10510819,\n "end": 10699435,\n "notes": "Interval #2",\n "reciprocalOverlap": 1,\n "annotationOverlap": 1\n }\n ],\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/TestCA4.json.gz"},"the full JSON file"),"."),(0,l.kt)("p",null,"As expected, the variant and interval #2 have matching endpoints, therefore there is 100% overlap. Interval #1 technically starts 1 bp earlier, so its overlap 99.9%."),(0,l.kt)("p",null,"Further down the JSON file, we find the annotated translocation breakend:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{11-15}","{11-15}":!0},' "variants": [\n {\n "vid": "22-12370388-T-T[chr22:12370729[",\n "chromosome": "22",\n "begin": 12370388,\n "end": 12370388,\n "isStructuralVariant": true,\n "refAllele": "T",\n "altAllele": "T[chr22:12370729[",\n "variantType": "translocation_breakend",\n "MyDataSource": {\n "refAllele": "T",\n "altAllele": "T[chr22:12370729[",\n "notes": "Known false-positive"\n }\n }\n')),(0,l.kt)("h2",{id:"gene-file-format"},"Gene File Format"),(0,l.kt)("h3",{id:"basic-gene-example"},"Basic Gene Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-5"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Previously we looked at examples that either had small variants or genomic regions, however, sometimes we would like to add custom gene annotations. The gene custom annotation file format\nlooks slightly different:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#geneSymbol"),(0,l.kt)("td",{parentName:"tr",align:"left"},"geneId"),(0,l.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string"),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"TP53"),(0,l.kt)("td",{parentName:"tr",align:"left"},"7157"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Colorectal cancer, hereditary nonpolyposis, type 5"),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"KRAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ENSG00000133703"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mismatch repair cancer syndrome"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Seen in cohort 123")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/MyDataSource5.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 2")," has the ",(0,l.kt)("inlineCode",{parentName:"li"},"geneId")," field. This can be either an ",(0,l.kt)("strong",{parentName:"li"},"Entrez Gene ID")," or an ",(0,l.kt)("strong",{parentName:"li"},"Ensembl ID"),".")),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Gene Symbols")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Gene symbols are always in flux and are being updated on a daily basis at the NCBI and at HGNC. Due to this, Illumina Connected Annotations uses the ",(0,l.kt)("inlineCode",{parentName:"p"},"geneId")," to match genes rather than the gene symbol. However, to\nmake the custom annotation files easier to read, we've included the ",(0,l.kt)("inlineCode",{parentName:"p"},"geneSymbol")," column as well."))),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Unknown Gene IDs")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"When Illumina Connected Annotations parses the gene custom annotation file, it will note any gene IDs that are currently not recognized in Illumina Connected Annotations. In such a case, Illumina Connected Annotations will display an error showing all the\nunrecognized gene IDs."))),(0,l.kt)("h4",{id:"annotate-with-illumina-connected-annotations-5"},"Annotate with Illumina Connected Annotations"),(0,l.kt)("p",null,"Let's use a VCF file that contain variants in TP53 and KRAS:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n12 25227255 . A T . . .\n17 7675074 . C A . . .\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/TestCA4.vcf"},"the full VCF file"),"."),(0,l.kt)("h4",{id:"investigate-the-results-5"},"Investigate the Results"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{24-27}","{24-27}":!0},' "genes": [\n {\n "name": "KRAS",\n "clingenGeneValidity": [\n {\n "diseaseId": "MONDO_0009026",\n "disease": "Costello syndrome",\n "classification": "disputed",\n "classificationDate": "2018-07-24"\n }\n ],\n "clingenDosageSensitivityMap": {\n "haploinsufficiency": "no evidence to suggest that dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "no evidence to suggest that dosage sensitivity is associated with clinical phenotype"\n },\n "gnomAD": {\n "pLi": 0.000788,\n "pRec": 0.789,\n "pNull": 0.21,\n "synZ": 0.336,\n "misZ": 2.32,\n "loeuf": 1.24\n },\n "MyDataSource": {\n "phenotype": "Mismatch repair cancer syndrome",\n "notes": "Seen in cohort 123"\n }\n },\n')),(0,l.kt)("p",null,"This is the abbreviated output for KRAS. Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/TestCA5.json.gz"},"the full JSON file")," if you want to see the complete KRAS entry."),(0,l.kt)("h2",{id:"customizing-the-header"},"Customizing the Header"),(0,l.kt)("h3",{id:"title"},"Title"),(0,l.kt)("p",null,"For the title, you can provide any string that hasn't already been used. The title should be unique."),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Make sure that the title does not conflict with other keys in the JSON file."))),(0,l.kt)("p",null,"For small variants, you can't provide a title that conflicts with other keys in the variant object. Some examples of this would be\n",(0,l.kt)("inlineCode",{parentName:"p"},"vid"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"chromosome"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"transcripts"),", etc.. The title should also not conflict with other data source keys like ",(0,l.kt)("inlineCode",{parentName:"p"},"clinvar")," or ",(0,l.kt)("inlineCode",{parentName:"p"},"gnomad"),"."),(0,l.kt)("p",null,"For structural variants, you can't provide a title that conflicts with other keys in the position object. Some examples of this would be\n",(0,l.kt)("inlineCode",{parentName:"p"},"chromosome"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"svLength"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"cytogeneticBand"),", etc. The title should also not conflict with other data source keys like ",(0,l.kt)("inlineCode",{parentName:"p"},"clingen")," or ",(0,l.kt)("inlineCode",{parentName:"p"},"dgv"),"."),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Care should be taken not to annotate using multiple custom annotations that all use the same title."))),(0,l.kt)("h3",{id:"genome-assemblies"},"Genome Assemblies"),(0,l.kt)("p",null,"The following genome assemblies can be specified:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"GRCh37"),(0,l.kt)("li",{parentName:"ul"},"GRCh38")),(0,l.kt)("h3",{id:"matching-criteria"},"Matching Criteria"),(0,l.kt)("p",null,"The matching criteria instructs how Illumina Connected Annotations should match a VCF variant to the custom annotation."),(0,l.kt)("p",null,"The following matching criteria can be specified:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"allele")," - use this when you only want allele-specific matches. This is commonly the case when using allele frequency data sources like ",(0,l.kt)("inlineCode",{parentName:"li"},"gnomAD")),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"position")," - use this when you want positional matches. This is commonly used with disease phenotype data sources like ",(0,l.kt)("inlineCode",{parentName:"li"},"ClinVar")),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"sv")," - use this when you want to match to all other overlapping SVs. This use case arose when we were adding custom annotations for baseline\ncopy number intervals along the genome.")),(0,l.kt)("h3",{id:"categories"},"Categories"),(0,l.kt)("p",null,"Categories are not used by Illumina Connected Annotations, but are often used by downstream tools. Categories provide hints for how those tools should filter or display\nthe annotation data."),(0,l.kt)("p",null,"When a category is specified, Illumina Connected Annotations will provide additional validation for those fields. The following table describes each category:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Category"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Description"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Validation"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleCount"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allele counts for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleNumber"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allele numbers for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleFrequency"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allele frequencies for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Prediction"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ACMG-style pathogenicity classifications"),(0,l.kt)("td",{parentName:"tr",align:"left"},"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"benign")," (B)",(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"likely benign")," (LB)",(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"VUS"),(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"likely pathogenic")," (LP)",(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"pathogenic")," (P)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Filter"),(0,l.kt)("td",{parentName:"tr",align:"left"},"free text that signals downstream tools to add the column to the filter"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Max 20 characters")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Description"),(0,l.kt)("td",{parentName:"tr",align:"left"},"free-text description"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Max 100 characters")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Identifier"),(0,l.kt)("td",{parentName:"tr",align:"left"},"any ID"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Max 50 characters")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"HomozygousCount"),(0,l.kt)("td",{parentName:"tr",align:"left"},"count of homozygous individuals for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Score"),(0,l.kt)("td",{parentName:"tr",align:"left"},"any score value"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Any double-precision floating point number")))),(0,l.kt)("h3",{id:"descriptions"},"Descriptions"),(0,l.kt)("p",null,"Descriptions are used to add more context to the categories. For now, descriptions are mainly used to associate allele counts, numbers, and frequencies with their respective populations."),(0,l.kt)("h4",{id:"populations"},"Populations"),(0,l.kt)("p",null,"The following populations were specified in the HapMap project, 1000 Genomes Project, ExAC, and gnomAD."),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Population Code"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Super-population Code"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Description"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ACB"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"African Caribbeans in Barbados")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"African")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"All populations")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Ad Mixed American")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ASJ"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"},"Ashkenazi Jewish")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ASW"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Americans of African Ancestry in SW USA")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"BEB"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Bengali from Bangladesh")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CDX"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Chinese Dai in Xishuangbanna, China")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CEU"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Utah Residents (CEPH) with Northern and Western European Ancestry")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CHB"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Han Chinese in Beijing, China")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CHS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Southern Han Chinese")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CLM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Colombians from Medellin, Colombia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"East Asian")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ESN"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Esan in Nigeria")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"European")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"FIN"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Finnish in Finland")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"GBR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"British in England and Scotland")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"GIH"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Gujarati Indian from Houston, Texas")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"GWD"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Gambian in Western Divisions in the Gambia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"IBS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Iberian population in Spain")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ITU"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Indian Telugu from the UK")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"JPT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Japanese in Tokyo, Japan")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"KHV"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Kinh in Ho Chi Minh City, Vietnam")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"LWK"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Luhya in Webuye, Kenya")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MAG"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mandinka in the Gambia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MKK"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Maasai in Kinyawa, Kenya")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MSL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mende in Sierra Leone")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MXL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mexican Ancestry from Los Angeles, USA")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"NFE"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"European (Non-Finnish)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"OTH"),(0,l.kt)("td",{parentName:"tr",align:"left"},"OTH"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Other")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"PEL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Peruvians from Lima, Peru")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"PJL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Punjabi from Lahore, Pakistan")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"PUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Puerto Ricans from Puerto Rico")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"South Asian")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"STU"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Sri Lankan Tamil from the UK")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"TSI"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Toscani in Italia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"YRI"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Yoruba in Ibadan, Nigeria")))),(0,l.kt)("h3",{id:"data-types"},"Data Types"),(0,l.kt)("p",null,"Each custom annotation can be one of the following data types:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"bool")," - true or false"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"number")," - any integer or floating-point number"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"string")," - text")),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"For boolean variables, only keys with a ",(0,l.kt)("inlineCode",{parentName:"p"},"true")," value will be output to the JSON object."))),(0,l.kt)("h2",{id:"using-sautils"},"Using SAUtils"),(0,l.kt)("p",null,"Illumina Connected Annotations includes a tool called ",(0,l.kt)("inlineCode",{parentName:"p"},"SAUtils")," that converts various data sources into Illumina Connected Annotations's native binary format. The sub-commands ",(0,l.kt)("inlineCode",{parentName:"p"},"customvar")," and ",(0,l.kt)("inlineCode",{parentName:"p"},"customgene")," are used to specify a variant file or a gene file respectively."),(0,l.kt)("h3",{id:"convert-variant-file"},"Convert Variant File"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/SAUtils.dll customvar \\\n -r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n -i MyDataSource.tsv \\\n -o SupplementaryAnnotation\n")),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-r")," argument specifies the compressed reference path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input TSV path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output directory")),(0,l.kt)("h3",{id:"convert-gene-file"},"Convert Gene File"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/SAUtils.dll customgene \\\n -r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n -c Data/Cache \\\n -i MyDataSource.tsv \\\n -o SupplementaryAnnotation\n")),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-c")," argument specifies the Illumina Connected Annotations cache path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input TSV path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output directory")))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/e286457f.e3aa814f.js b/assets/js/e286457f.e3aa814f.js deleted file mode 100644 index 8ab04b64..00000000 --- a/assets/js/e286457f.e3aa814f.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4773],{3905:(t,e,a)=>{a.d(e,{Zo:()=>p,kt:()=>g});var n=a(67294);function l(t,e,a){return e in t?Object.defineProperty(t,e,{value:a,enumerable:!0,configurable:!0,writable:!0}):t[e]=a,t}function i(t,e){var a=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),a.push.apply(a,n)}return a}function r(t){for(var e=1;e=0||(l[a]=t[a]);return l}(t,e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,a)&&(l[a]=t[a])}return l}var s=n.createContext({}),m=function(t){var e=n.useContext(s),a=e;return t&&(a="function"==typeof t?t(e):r(r({},e),t)),a},p=function(t){var e=m(t.components);return n.createElement(s.Provider,{value:e},t.children)},d="mdxType",k={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},c=n.forwardRef((function(t,e){var a=t.components,l=t.mdxType,i=t.originalType,s=t.parentName,p=o(t,["components","mdxType","originalType","parentName"]),d=m(a),c=l,g=d["".concat(s,".").concat(c)]||d[c]||k[c]||i;return a?n.createElement(g,r(r({ref:e},p),{},{components:a})):n.createElement(g,r({ref:e},p))}));function g(t,e){var a=arguments,l=e&&e.mdxType;if("string"==typeof t||l){var i=a.length,r=new Array(i);r[0]=c;var o={};for(var s in e)hasOwnProperty.call(e,s)&&(o[s]=e[s]);o.originalType=t,o[d]="string"==typeof t?t:l,r[1]=o;for(var m=2;m{a.r(e),a.d(e,{contentTitle:()=>r,default:()=>d,frontMatter:()=>i,metadata:()=>o,toc:()=>s});var n=a(87462),l=(a(67294),a(3905));const i={title:"Custom Annotations"},r=void 0,o={unversionedId:"file-formats/custom-annotations",id:"file-formats/custom-annotations",title:"Custom Annotations",description:"Overview",source:"@site/docs/file-formats/custom-annotations.md",sourceDirName:"file-formats",slug:"/file-formats/custom-annotations",permalink:"/IlluminaConnectedAnnotationsDocumentation/file-formats/custom-annotations",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/file-formats/custom-annotations.md",tags:[],version:"current",frontMatter:{title:"Custom Annotations"},sidebar:"docs",previous:{title:"Illumina Connected Annotations JSON File Format",permalink:"/IlluminaConnectedAnnotationsDocumentation/file-formats/illumina-annotator-json-file-format"},next:{title:"Canonical Transcripts",permalink:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/canonical-transcripts"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Variant File Format",id:"variant-file-format",children:[{value:"Basic Allele Frequency Example",id:"basic-allele-frequency-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv",children:[],level:4},{value:"Convert to Illumina Connected Annotations Format",id:"convert-to-illumina-connected-annotations-format",children:[],level:4},{value:"Annotate with Illumina Connected Annotations",id:"annotate-with-illumina-connected-annotations",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results",children:[],level:4}],level:3},{value:"Categories & Descriptions Example",id:"categories--descriptions-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-1",children:[],level:4},{value:"Annotate with Illumina Connected Annotations",id:"annotate-with-illumina-connected-annotations-1",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-1",children:[],level:4},{value:"Using Positional Matches",id:"using-positional-matches",children:[],level:4}],level:3},{value:"Genomic Region Example",id:"genomic-region-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-2",children:[],level:4},{value:"Annotate with Illumina Connected Annotations",id:"annotate-with-illumina-connected-annotations-2",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-2",children:[],level:4}],level:3},{value:"Genomic Regions for Structural Variants Example",id:"genomic-regions-for-structural-variants-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-3",children:[],level:4},{value:"Annotate with Illumina Connected Annotations",id:"annotate-with-illumina-connected-annotations-3",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-3",children:[],level:4}],level:3},{value:"Mixing Small Variants and Genomic Regions",id:"mixing-small-variants-and-genomic-regions",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-4",children:[],level:4},{value:"Annotate with Illumina Connected Annotations",id:"annotate-with-illumina-connected-annotations-4",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-4",children:[],level:4}],level:3}],level:2},{value:"Gene File Format",id:"gene-file-format",children:[{value:"Basic Gene Example",id:"basic-gene-example",children:[{value:"Create the Custom Annotation TSV",id:"create-the-custom-annotation-tsv-5",children:[],level:4},{value:"Annotate with Illumina Connected Annotations",id:"annotate-with-illumina-connected-annotations-5",children:[],level:4},{value:"Investigate the Results",id:"investigate-the-results-5",children:[],level:4}],level:3}],level:2},{value:"Customizing the Header",id:"customizing-the-header",children:[{value:"Title",id:"title",children:[],level:3},{value:"Genome Assemblies",id:"genome-assemblies",children:[],level:3},{value:"Matching Criteria",id:"matching-criteria",children:[],level:3},{value:"Categories",id:"categories",children:[],level:3},{value:"Descriptions",id:"descriptions",children:[{value:"Populations",id:"populations",children:[],level:4}],level:3},{value:"Data Types",id:"data-types",children:[],level:3}],level:2},{value:"Using SAUtils",id:"using-sautils",children:[{value:"Convert Variant File",id:"convert-variant-file",children:[],level:3},{value:"Convert Gene File",id:"convert-gene-file",children:[],level:3}],level:2}],m={toc:s},p="wrapper";function d(t){let{components:e,...a}=t;return(0,l.kt)(p,(0,n.Z)({},m,a,{components:e,mdxType:"MDXLayout"}),(0,l.kt)("h2",{id:"overview"},"Overview"),(0,l.kt)("p",null,"While the team tries to keep data sources up-to-date, you might want to start incorporate new annotations ahead of our update cycle. Another\ncommon use case involves protected health information (PHI). Custom annotations are a mechanism that enables both use cases."),(0,l.kt)("p",null,"Here are some examples of how our collaborators use custom annotations:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"associating context from both a sample-level and a sample cohort level with the variant annotations"),(0,l.kt)("li",{parentName:"ul"},"adding content that is licensed (e.g. HGMD) to the variant annotations")),(0,l.kt)("p",null,"At the moment, we have two different custom annotation file formats. One provides additional annotations to variants (both small variants and SVs)\nwhile the other caters to gene annotations."),(0,l.kt)("p",null,"In both cases, the custom annotation file format is a tab-delimited file that is separated into two parts: the header & the data."),(0,l.kt)("p",null,"The header is where you can customize how you want the data to appear in the JSON file and provide context about the genome assembly and how\nIllumina Connected Annotations should match the variants."),(0,l.kt)("p",null,"At Illumina, there are usually many components downstream of Illumina Connected Annotations that have to parse our annotations. If a customer provides a custom\nannotation, those downstream tools need to understand more about the data such as:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"data type (e.g. number, boolean, or a string)"),(0,l.kt)("li",{parentName:"ul"},"data category (e.g. is this an allele count, allele number, allele frequency, etc.)"),(0,l.kt)("li",{parentName:"ul"},"associated population (i.e. if this is an allele frequency)")),(0,l.kt)("p",null,"For each custom annotation, Illumina Connected Annotations uses this context to create a ",(0,l.kt)("a",{parentName:"p",href:"https://json-schema.org/"},"JSON schema")," that can be sent to downstream tools. If\na tool knows that this is an allele frequency, it can validate user input to ensure that it's in the range of ","[0, 1]","."),(0,l.kt)("h2",{id:"variant-file-format"},"Variant File Format"),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"File Format")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Illumina Connected Annotations expects plain text (or gzipped text) files. Using tools like Excel can add extra characters that can break parsing. We highly recommend creating and modifying these files with plain text editor like Notepad, Notepad++ or Atom."))),(0,l.kt)("h3",{id:"basic-allele-frequency-example"},"Basic Allele Frequency Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Imagine that you want to create a basic allele frequency custom annotation for small variants. If we visualized the tab-delimited file\n(TSV), it would look something like this:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allAf")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleFrequency")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"number")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"23603511"),(0,l.kt)("td",{parentName:"tr",align:"left"},"TGA"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006579")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"68801894"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006569")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr19"),(0,l.kt)("td",{parentName:"tr",align:"left"},"11107436"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.00003291")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/MyDataSource.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over the header and discuss the contents:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"title")," indicates the name of the JSON key"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"assembly")," indicates that this data is only valid for ",(0,l.kt)("inlineCode",{parentName:"li"},"GRCh38"),"."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"matchVariantsBy")," indicates how annotations should be matched and reported. In this case annotations will be matched and reported by allele."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"categories")," provides hints to downstream tools on how they might want to treat the data. In this case, we indicate that it's an allele frequency."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"descriptions")," are used in special circumstances to provide more context. Even though column 5 is called ",(0,l.kt)("inlineCode",{parentName:"li"},"allAf"),", it might not be clear to a\ndownstream tool that this means a global allele frequency using all sub-populations. In this case, ",(0,l.kt)("inlineCode",{parentName:"li"},"ALL")," indicates the intended population."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"type")," indicates to downstream tools the data type. Since allele frequencies are numbers, we'll write ",(0,l.kt)("inlineCode",{parentName:"li"},"number")," in this column.")),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Reference Base Checking")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Illumina Connected Annotations validates all the reference bases in a custom annotation. If a variant or genomic region is specified that has the wrong reference base, an error will be produced."))),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Sorting")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"The variants within each chromosome must be sorted by genomic position."))),(0,l.kt)("h4",{id:"convert-to-illumina-connected-annotations-format"},"Convert to Illumina Connected Annotations Format"),(0,l.kt)("p",null,"First we need to convert the TSV file to Illumina Connected Annotations's native file format and let's put that file in a new directory called CA:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"$ mkdir CA\n$ dotnet bin/Release/netcoreapp2.1/SAUtils.dll customvar \\\n -r Data/References/Homo_sapiens.GRCh38.Nirvana.dat -i MyDataSource.tsv -o CA\n---------------------------------------------------------------------------\nSAUtils (c) 2020 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0\n---------------------------------------------------------------------------\n\nChromosome 16 completed in 00:00:00.1\nChromosome 19 completed in 00:00:00.0\n\nTime: 00:00:00.2\n")),(0,l.kt)("h4",{id:"annotate-with-illumina-connected-annotations"},"Annotate with Illumina Connected Annotations"),(0,l.kt)("p",null,"Let's annotate the following VCF (notice that it's one of the variants that we have in our custom annotation):"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n16 68801894 . G A . . .\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/TestCA.vcf"},"the full VCF file"),"."),(0,l.kt)("p",null,"Since Illumina Connected Annotations can handle multiple directories with external annotations, all we need to do is specify our new CA directory in addition to\nthe normal Illumina Connected Annotations command-line."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash",metastring:"{3}","{3}":!0},"$ dotnet Annotator.dll -c Data/Cache/GRCh38/Both \\\n -r Data/References/Homo_sapiens.GRCh38.Nirvana.dat \\\n --sd Data/SupplementaryAnnotation/GRCh38 --sd CA -i TestCA.vcf -o TestCA\n---------------------------------------------------------------------------\nIlluminaConnectedAnnotations (c) 2020 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0\n---------------------------------------------------------------------------\n\nInitialization Time Positions/s\n---------------------------------------------------------------------------\nCache 00:00:01.8\nSA Position Scan 00:00:00.0 19\n\nReference Preload Annotation Variants/s\n---------------------------------------------------------------------------\nchr16 00:00:00.2 00:00:01.3 1\n\nSummary Time Percent\n---------------------------------------------------------------------------\nInitialization 00:00:01.9 25.5 %\nPreload 00:00:00.2 3.3 %\nAnnotation 00:00:01.3 18.2 %\n\nTime: 00:00:06.3\n")),(0,l.kt)("h4",{id:"investigate-the-results"},"Investigate the Results"),(0,l.kt)("p",null,"We would expect the following data to show up in our JSON output file:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-16}","{12-16}":!0},' "variants": [\n {\n "vid": "16-68801894-G-A",\n "chromosome": "16",\n "begin": 68801894,\n "end": 68801894,\n "refAllele": "G",\n "altAllele": "A",\n "variantType": "SNV",\n "hgvsg": "NC_000016.10:g.68801894G>A",\n "phylopScore": 1,\n "MyDataSource": {\n "refAllele": "G",\n "altAllele": "A",\n "allAf": 7e-06\n },\n "clinvar": [\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/TestCA.json.gz"},"the full JSON file"),"."),(0,l.kt)("p",null,"Illumina Connected Annotations preserves up to 6 decimal places for allele frequency data."),(0,l.kt)("h3",{id:"categories--descriptions-example"},"Categories & Descriptions Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-1"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Building on the previous example, we can add other types of annotations like predictions and general notes."),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 6"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 7"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allAf"),(0,l.kt)("td",{parentName:"tr",align:"left"},"pathogenicity"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleFrequency"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Prediction"),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"number"),(0,l.kt)("td",{parentName:"tr",align:"left"},"string"),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"23603511"),(0,l.kt)("td",{parentName:"tr",align:"left"},"TGA"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006579"),(0,l.kt)("td",{parentName:"tr",align:"left"},"P"),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"68801894"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.000006569"),(0,l.kt)("td",{parentName:"tr",align:"left"},"LP"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Seen in case 123")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr19"),(0,l.kt)("td",{parentName:"tr",align:"left"},"11107436"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"0.00003291"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/MyDataSource2.tsv"},"the full TSV file"),"."),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Placeholders")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"You can use a period to denote an empty value (much in the same way as periods are used in VCF files to signify missing values). While\nIllumina Connected Annotations also accepts empty columns in the TSV file, we use them in these examples to promote readability."))),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 6")," adds a field called ",(0,l.kt)("inlineCode",{parentName:"li"},"pathogenicity")," which uses the ",(0,l.kt)("inlineCode",{parentName:"li"},"Prediction")," category. When using this category, Illumina Connected Annotations will\nvalidate to make\nsure that the field contains either the abbreviations (B, LB, VUS, LP, and P) or the long-form equivalents (e.g. benign or pathogenic)."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 7")," adds a field called ",(0,l.kt)("inlineCode",{parentName:"li"},"notes")," and it doesn't have a category or description. We're just going to use it to add some internal\nnotes.")),(0,l.kt)("h4",{id:"annotate-with-illumina-connected-annotations-1"},"Annotate with Illumina Connected Annotations"),(0,l.kt)("p",null,"Let's use a new VCF file. It includes all the same positions as our custom annotation file, but only the middle variant also matches the\nalternate allele (allele-specific match):"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n16 23603511 . TG T . . .\n16 68801894 . G A . . .\n19 11107436 . G C . . .\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/TestCA2.vcf"},"the full VCF file"),"."),(0,l.kt)("h4",{id:"investigate-the-results-1"},"Investigate the Results"),(0,l.kt)("p",null,"Because we specified ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=allele")," in our custom annotation file, only the middle variant will get an annotation:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-18}","{12-18}":!0},' "variants": [\n {\n "vid": "16-68801894-G-A",\n "chromosome": "16",\n "begin": 68801894,\n "end": 68801894,\n "refAllele": "G",\n "altAllele": "A",\n "variantType": "SNV",\n "hgvsg": "NC_000016.10:g.68801894G>A",\n "phylopScore": 1,\n "MyDataSource": {\n "refAllele": "G",\n "altAllele": "A",\n "allAf": 7e-06,\n "pathogenicity": "LP",\n "notes": "Seen in case 123"\n },\n "clinvar": [\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/TestCA2.json.gz"},"the full JSON file"),"."),(0,l.kt)("h4",{id:"using-positional-matches"},"Using Positional Matches"),(0,l.kt)("p",null,"What would happen if we changed to ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=position"),"? Two things will happen. First, our positional variants will now match:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-17}","{12-17}":!0},' "variants": [\n {\n "vid": "16-23603511-TG-T",\n "chromosome": "16",\n "begin": 23603512,\n "end": 23603512,\n "refAllele": "G",\n "altAllele": "-",\n "variantType": "deletion",\n "hgvsg": "NC_000016.10:g.23603512delG",\n "MyDataSource": [\n {\n "refAllele": "GA",\n "altAllele": "-",\n "allAf": 7e-06,\n "pathogenicity": "P"\n }\n ],\n "clinvar": [\n')),(0,l.kt)("p",null,"In addition, you will now see an extra flag for our allele-specific variant:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{12-20}","{12-20}":!0},' "variants": [\n {\n "vid": "16-68801894-G-A",\n "chromosome": "16",\n "begin": 68801894,\n "end": 68801894,\n "refAllele": "G",\n "altAllele": "A",\n "variantType": "SNV",\n "hgvsg": "NC_000016.10:g.68801894G>A",\n "phylopScore": 1,\n "MyDataSource": [\n {\n "refAllele": "G",\n "altAllele": "A",\n "allAf": 7e-06,\n "pathogenicity": "LP",\n "notes": "Seen in case 123",\n "isAlleleSpecific": true\n }\n ],\n "clinvar": [\n')),(0,l.kt)("h3",{id:"genomic-region-example"},"Genomic Region Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-2"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"In the previous example, we added a note for the middle variant, but sometimes it's handy to annotate a genomic region. Consider the following example:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"END"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"20000000"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"70000000"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Lots of false positives in this region")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/MyDataSource3.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 5")," now has a field called ",(0,l.kt)("inlineCode",{parentName:"li"},"notes"),". In essence, it looks exactly like column 7 from our previous example."),(0,l.kt)("li",{parentName:"ul"},"The main difference is that now one of our custom annotation entries is actually a genomic region. Any variant that overlaps with that region will get a custom annotation.")),(0,l.kt)("p",null,"In the previous example we learned about positional matching vs allele-specific matching. For genomic regions, ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=allele")," and ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=position")," produce\nthe same result."),(0,l.kt)("h4",{id:"annotate-with-illumina-connected-annotations-2"},"Annotate with Illumina Connected Annotations"),(0,l.kt)("p",null,"Let's use the same VCF file as our previous example."),(0,l.kt)("h4",{id:"investigate-the-results-2"},"Investigate the Results"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{9-17}","{9-17}":!0},' {\n "chromosome": "16",\n "position": 23603511,\n "refAllele": "TG",\n "altAlleles": [\n "T"\n ],\n "cytogeneticBand": "16p12.2",\n "MyDataSource": [\n {\n "start": 20000000,\n "end": 70000000,\n "notes": "Lots of false positives in this region",\n "reciprocalOverlap": 0,\n "annotationOverlap": 0\n }\n ],\n "variants": [\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/TestCA3.json.gz"},"the full JSON file"),"."),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Reciprocal & Annotation Overlap")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"For all intervals, Illumina Connected Annotations internally calculates two overlaps: a ",(0,l.kt)("strong",{parentName:"p"},"variant overlap")," and an ",(0,l.kt)("strong",{parentName:"p"},"annotation overlap"),". Variant overlap is the percentage of the variant's length that is\noverlapped. Annotation overlap is the percentage of the annotation's length that is overlap."),(0,l.kt)("p",{parentName:"div"},(0,l.kt)("strong",{parentName:"p"},"Reciprocal overlap")," is the minimum of those two overlaps. Given that the annotation is 50 Mbp and the deletion is one 1 bp, both overlaps will be pretty close to 0."))),(0,l.kt)("p",null,"We will also see this annotation for the other variant on chr16:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{9-17}","{9-17}":!0},' {\n "chromosome": "16",\n "position": 68801894,\n "refAllele": "G",\n "altAlleles": [\n "A"\n ],\n "cytogeneticBand": "16q22.1",\n "MyDataSource": [\n {\n "start": 20000000,\n "end": 70000000,\n "notes": "Lots of false positives in this region",\n "reciprocalOverlap": 0,\n "annotationOverlap": 0\n }\n ],\n "variants": [\n')),(0,l.kt)("h3",{id:"genomic-regions-for-structural-variants-example"},"Genomic Regions for Structural Variants Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-3"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Often we use genomic regions to represent other known CNVs and SVs in the genome. In this use case, we usually don't want to match these regions to other small variants. To force Illumina Connected Annotations to match regions only to other SVs, use the ",(0,l.kt)("inlineCode",{parentName:"p"},"#matchVariantsBy=sv")," option in the header. Here is an example:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=sv"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"END"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"20000000"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"70000000"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Lots of false positives in this region")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/MyDataSource6.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"The main difference is the header field ",(0,l.kt)("inlineCode",{parentName:"li"},"#matchVariantsBy=sv")," which indicates that only structural variants that overlap these genomic regions will receive annotations.")),(0,l.kt)("h4",{id:"annotate-with-illumina-connected-annotations-3"},"Annotate with Illumina Connected Annotations"),(0,l.kt)("p",null,"Let's use a new VCF file. It contains the first variant from the previous file and a structural variant deletion- both of which overlap the given genomic region."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n16 23603511 . TG T . . .\n16 68801894 . G . . END=73683789;SVTYPE=DEL\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/TestCA6.vcf"},"the full VCF file"),"."),(0,l.kt)("h4",{id:"investigate-the-results-3"},"Investigate the Results"),(0,l.kt)("p",null,"Note that this time, ",(0,l.kt)("inlineCode",{parentName:"p"},"MyDataSource")," only showed up for the ",(0,l.kt)("inlineCode",{parentName:"p"},"")," and not the deletion ",(0,l.kt)("inlineCode",{parentName:"p"},"16-23603511-TG-T"),"."),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{21-29}","{21-29}":!0},' {\n "chromosome": "16",\n "position": 23603511,\n "refAllele": "TG",\n "altAlleles": [\n "T"\n ],\n "cytogeneticBand": "16p12.2",\n "variants": [\n ...\n ...\n {\n "chromosome": "16",\n "position": 68801894,\n "svEnd": 73683789,\n "refAllele": "G",\n "altAlleles": [\n ""\n ],\n "cytogeneticBand": "16q22.1-q22.3",\n "MyDataSource": [\n {\n "start": 20000000,\n "end": 70000000,\n "notes": "Lots of false positives in this region",\n "reciprocalOverlap": 0.02396,\n "annotationOverlap": 0.02396\n }\n ],\n "variants": [\n\n')),(0,l.kt)("h3",{id:"mixing-small-variants-and-genomic-regions"},"Mixing Small Variants and Genomic Regions"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-4"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Previously we looked at examples that either had small variants or genomic regions. Let's create a file that contains both:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 5"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 6"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#assembly=GRCh38"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#matchVariantsBy=allele"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#CHROM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"POS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"REF"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"END"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"23603511"),(0,l.kt)("td",{parentName:"tr",align:"left"},"TGA"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr16"),(0,l.kt)("td",{parentName:"tr",align:"left"},"68801894"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr19"),(0,l.kt)("td",{parentName:"tr",align:"left"},"11107436"),(0,l.kt)("td",{parentName:"tr",align:"left"},"G"),(0,l.kt)("td",{parentName:"tr",align:"left"},"A"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr21"),(0,l.kt)("td",{parentName:"tr",align:"left"},"10510818"),(0,l.kt)("td",{parentName:"tr",align:"left"},"C"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"10699435"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Interval #1")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr21"),(0,l.kt)("td",{parentName:"tr",align:"left"},"10510818"),(0,l.kt)("td",{parentName:"tr",align:"left"},"C"),(0,l.kt)("td",{parentName:"tr",align:"left"},"<","DEL",">"),(0,l.kt)("td",{parentName:"tr",align:"left"},"10699435"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Interval #2")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"chr22"),(0,l.kt)("td",{parentName:"tr",align:"left"},"12370388"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T"),(0,l.kt)("td",{parentName:"tr",align:"left"},"T[chr22:12370729["),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"Known false-positive")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/MyDataSource4.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's new in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 4")," now has the ",(0,l.kt)("inlineCode",{parentName:"li"},"REF")," field. Exception for the case listed below, this is only used by small variants or translocation breakends."),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 5")," now has the ",(0,l.kt)("inlineCode",{parentName:"li"},"END")," field. This is only used by genomic regions."),(0,l.kt)("li",{parentName:"ul"},"There are two custom annotations on chr21 and the start and end coordinates look the same, so what's different? Interval #2 has ",(0,l.kt)("strong",{parentName:"li"},"a symbolic allele in the ALT column"),". When this is used in custom annotation, the start position is treated as the padding base (using VCF conventions). When Illumina Connected Annotations matches a variant to interval #2, it will ignore the padding base and consider the start position to be at position 10510819.")),(0,l.kt)("h4",{id:"annotate-with-illumina-connected-annotations-4"},"Annotate with Illumina Connected Annotations"),(0,l.kt)("p",null,"Let's use a new VCF file to study how matching works for intervals #1 and #2:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n21 10510818 . C . . END=10699435;SVTYPE=DUP\n22 12370388 . T T[chr22:12370729[ . . SVTYPE=BND\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/TestCA3.vcf"},"the full VCF file"),"."),(0,l.kt)("p",null,'The first variant is similar to the custom annotation labelled "interval #2". Position 10510818 is the padding base, so it effectively starts at position 10510819.'),(0,l.kt)("h4",{id:"investigate-the-results-4"},"Investigate the Results"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{11-26}","{11-26}":!0},' "positions": [\n {\n "chromosome": "21",\n "position": 10510818,\n "svEnd": 10699435,\n "refAllele": "C",\n "altAlleles": [\n ""\n ],\n "cytogeneticBand": "21p11.2",\n "MyDataSource": [\n {\n "start": 10510818,\n "end": 10699435,\n "notes": "Interval #1",\n "reciprocalOverlap": 0.99999,\n "annotationOverlap": 0.99999\n },\n {\n "start": 10510819,\n "end": 10699435,\n "notes": "Interval #2",\n "reciprocalOverlap": 1,\n "annotationOverlap": 1\n }\n ],\n')),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/TestCA4.json.gz"},"the full JSON file"),"."),(0,l.kt)("p",null,"As expected, the variant and interval #2 have matching endpoints, therefore there is 100% overlap. Interval #1 technically starts 1 bp earlier, so its overlap 99.9%."),(0,l.kt)("p",null,"Further down the JSON file, we find the annotated translocation breakend:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{11-15}","{11-15}":!0},' "variants": [\n {\n "vid": "22-12370388-T-T[chr22:12370729[",\n "chromosome": "22",\n "begin": 12370388,\n "end": 12370388,\n "isStructuralVariant": true,\n "refAllele": "T",\n "altAllele": "T[chr22:12370729[",\n "variantType": "translocation_breakend",\n "MyDataSource": {\n "refAllele": "T",\n "altAllele": "T[chr22:12370729[",\n "notes": "Known false-positive"\n }\n }\n')),(0,l.kt)("h2",{id:"gene-file-format"},"Gene File Format"),(0,l.kt)("h3",{id:"basic-gene-example"},"Basic Gene Example"),(0,l.kt)("h4",{id:"create-the-custom-annotation-tsv-5"},"Create the Custom Annotation TSV"),(0,l.kt)("p",null,"Previously we looked at examples that either had small variants or genomic regions, however, sometimes we would like to add custom gene annotations. The gene custom annotation file format\nlooks slightly different:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 1"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 2"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 3"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Col 4"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#title=MyDataSource"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"})),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#geneSymbol"),(0,l.kt)("td",{parentName:"tr",align:"left"},"geneId"),(0,l.kt)("td",{parentName:"tr",align:"left"},"phenotype"),(0,l.kt)("td",{parentName:"tr",align:"left"},"notes")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#categories"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#descriptions"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"#type"),(0,l.kt)("td",{parentName:"tr",align:"left"},"."),(0,l.kt)("td",{parentName:"tr",align:"left"},"string"),(0,l.kt)("td",{parentName:"tr",align:"left"},"string")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"TP53"),(0,l.kt)("td",{parentName:"tr",align:"left"},"7157"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Colorectal cancer, hereditary nonpolyposis, type 5"),(0,l.kt)("td",{parentName:"tr",align:"left"},".")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"KRAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ENSG00000133703"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mismatch repair cancer syndrome"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Seen in cohort 123")))),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/MyDataSource5.tsv"},"the full TSV file"),"."),(0,l.kt)("p",null,"Let's go over what's in this example:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("strong",{parentName:"li"},"Column 2")," has the ",(0,l.kt)("inlineCode",{parentName:"li"},"geneId")," field. This can be either an ",(0,l.kt)("strong",{parentName:"li"},"Entrez Gene ID")," or an ",(0,l.kt)("strong",{parentName:"li"},"Ensembl ID"),".")),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Gene Symbols")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Gene symbols are always in flux and are being updated on a daily basis at the NCBI and at HGNC. Due to this, Illumina Connected Annotations uses the ",(0,l.kt)("inlineCode",{parentName:"p"},"geneId")," to match genes rather than the gene symbol. However, to\nmake the custom annotation files easier to read, we've included the ",(0,l.kt)("inlineCode",{parentName:"p"},"geneSymbol")," column as well."))),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Unknown Gene IDs")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"When Illumina Connected Annotations parses the gene custom annotation file, it will note any gene IDs that are currently not recognized in Illumina Connected Annotations. In such a case, Illumina Connected Annotations will display an error showing all the\nunrecognized gene IDs."))),(0,l.kt)("h4",{id:"annotate-with-illumina-connected-annotations-5"},"Annotate with Illumina Connected Annotations"),(0,l.kt)("p",null,"Let's use a VCF file that contain variants in TP53 and KRAS:"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\n12 25227255 . A T . . .\n17 7675074 . C A . . .\n")),(0,l.kt)("p",null,"Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/TestCA4.vcf"},"the full VCF file"),"."),(0,l.kt)("h4",{id:"investigate-the-results-5"},"Investigate the Results"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json",metastring:"{24-27}","{24-27}":!0},' "genes": [\n {\n "name": "KRAS",\n "clingenGeneValidity": [\n {\n "diseaseId": "MONDO_0009026",\n "disease": "Costello syndrome",\n "classification": "disputed",\n "classificationDate": "2018-07-24"\n }\n ],\n "clingenDosageSensitivityMap": {\n "haploinsufficiency": "no evidence to suggest that dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "no evidence to suggest that dosage sensitivity is associated with clinical phenotype"\n },\n "gnomAD": {\n "pLi": 0.000788,\n "pRec": 0.789,\n "pNull": 0.21,\n "synZ": 0.336,\n "misZ": 2.32,\n "loeuf": 1.24\n },\n "MyDataSource": {\n "phenotype": "Mismatch repair cancer syndrome",\n "notes": "Seen in cohort 123"\n }\n },\n')),(0,l.kt)("p",null,"This is the abbreviated output for KRAS. Here's ",(0,l.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/TestCA5.json.gz"},"the full JSON file")," if you want to see the complete KRAS entry."),(0,l.kt)("h2",{id:"customizing-the-header"},"Customizing the Header"),(0,l.kt)("h3",{id:"title"},"Title"),(0,l.kt)("p",null,"For the title, you can provide any string that hasn't already been used. The title should be unique."),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Make sure that the title does not conflict with other keys in the JSON file."))),(0,l.kt)("p",null,"For small variants, you can't provide a title that conflicts with other keys in the variant object. Some examples of this would be\n",(0,l.kt)("inlineCode",{parentName:"p"},"vid"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"chromosome"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"transcripts"),", etc.. The title should also not conflict with other data source keys like ",(0,l.kt)("inlineCode",{parentName:"p"},"clinvar")," or ",(0,l.kt)("inlineCode",{parentName:"p"},"gnomad"),"."),(0,l.kt)("p",null,"For structural variants, you can't provide a title that conflicts with other keys in the position object. Some examples of this would be\n",(0,l.kt)("inlineCode",{parentName:"p"},"chromosome"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"svLength"),", ",(0,l.kt)("inlineCode",{parentName:"p"},"cytogeneticBand"),", etc. The title should also not conflict with other data source keys like ",(0,l.kt)("inlineCode",{parentName:"p"},"clingen")," or ",(0,l.kt)("inlineCode",{parentName:"p"},"dgv"),"."),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Care should be taken not to annotate using multiple custom annotations that all use the same title."))),(0,l.kt)("h3",{id:"genome-assemblies"},"Genome Assemblies"),(0,l.kt)("p",null,"The following genome assemblies can be specified:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"GRCh37"),(0,l.kt)("li",{parentName:"ul"},"GRCh38")),(0,l.kt)("h3",{id:"matching-criteria"},"Matching Criteria"),(0,l.kt)("p",null,"The matching criteria instructs how Illumina Connected Annotations should match a VCF variant to the custom annotation."),(0,l.kt)("p",null,"The following matching criteria can be specified:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"allele")," - use this when you only want allele-specific matches. This is commonly the case when using allele frequency data sources like ",(0,l.kt)("inlineCode",{parentName:"li"},"gnomAD")),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"position")," - use this when you want positional matches. This is commonly used with disease phenotype data sources like ",(0,l.kt)("inlineCode",{parentName:"li"},"ClinVar")),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"sv")," - use this when you want to match to all other overlapping SVs. This use case arose when we were adding custom annotations for baseline\ncopy number intervals along the genome.")),(0,l.kt)("h3",{id:"categories"},"Categories"),(0,l.kt)("p",null,"Categories are not used by Illumina Connected Annotations, but are often used by downstream tools. Categories provide hints for how those tools should filter or display\nthe annotation data."),(0,l.kt)("p",null,"When a category is specified, Illumina Connected Annotations will provide additional validation for those fields. The following table describes each category:"),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Category"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Description"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Validation"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleCount"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allele counts for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleNumber"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allele numbers for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AlleleFrequency"),(0,l.kt)("td",{parentName:"tr",align:"left"},"allele frequencies for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Prediction"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ACMG-style pathogenicity classifications"),(0,l.kt)("td",{parentName:"tr",align:"left"},"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"benign")," (B)",(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"likely benign")," (LB)",(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"VUS"),(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"likely pathogenic")," (LP)",(0,l.kt)("br",null),"\u2022 ",(0,l.kt)("inlineCode",{parentName:"td"},"pathogenic")," (P)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Filter"),(0,l.kt)("td",{parentName:"tr",align:"left"},"free text that signals downstream tools to add the column to the filter"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Max 20 characters")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Description"),(0,l.kt)("td",{parentName:"tr",align:"left"},"free-text description"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Max 100 characters")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Identifier"),(0,l.kt)("td",{parentName:"tr",align:"left"},"any ID"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Max 50 characters")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"HomozygousCount"),(0,l.kt)("td",{parentName:"tr",align:"left"},"count of homozygous individuals for a specific population"),(0,l.kt)("td",{parentName:"tr",align:"left"},"See the supported populations below")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"Score"),(0,l.kt)("td",{parentName:"tr",align:"left"},"any score value"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Any double-precision floating point number")))),(0,l.kt)("h3",{id:"descriptions"},"Descriptions"),(0,l.kt)("p",null,"Descriptions are used to add more context to the categories. For now, descriptions are mainly used to associate allele counts, numbers, and frequencies with their respective populations."),(0,l.kt)("h4",{id:"populations"},"Populations"),(0,l.kt)("p",null,"The following populations were specified in the HapMap project, 1000 Genomes Project, ExAC, and gnomAD."),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Population Code"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Super-population Code"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Description"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ACB"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"African Caribbeans in Barbados")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"African")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"ALL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"All populations")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Ad Mixed American")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ASJ"),(0,l.kt)("td",{parentName:"tr",align:"left"}),(0,l.kt)("td",{parentName:"tr",align:"left"},"Ashkenazi Jewish")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ASW"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Americans of African Ancestry in SW USA")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"BEB"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Bengali from Bangladesh")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CDX"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Chinese Dai in Xishuangbanna, China")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CEU"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Utah Residents (CEPH) with Northern and Western European Ancestry")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CHB"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Han Chinese in Beijing, China")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CHS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Southern Han Chinese")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"CLM"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Colombians from Medellin, Colombia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"East Asian")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ESN"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Esan in Nigeria")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"European")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"FIN"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Finnish in Finland")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"GBR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"British in England and Scotland")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"GIH"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Gujarati Indian from Houston, Texas")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"GWD"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Gambian in Western Divisions in the Gambia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"IBS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Iberian population in Spain")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"ITU"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Indian Telugu from the UK")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"JPT"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Japanese in Tokyo, Japan")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"KHV"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Kinh in Ho Chi Minh City, Vietnam")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"LWK"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Luhya in Webuye, Kenya")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MAG"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mandinka in the Gambia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MKK"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Maasai in Kinyawa, Kenya")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MSL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mende in Sierra Leone")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"MXL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Mexican Ancestry from Los Angeles, USA")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"NFE"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"European (Non-Finnish)")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"OTH"),(0,l.kt)("td",{parentName:"tr",align:"left"},"OTH"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Other")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"PEL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Peruvians from Lima, Peru")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"PJL"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Punjabi from Lahore, Pakistan")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"PUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AMR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Puerto Ricans from Puerto Rico")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"South Asian")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"STU"),(0,l.kt)("td",{parentName:"tr",align:"left"},"SAS"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Sri Lankan Tamil from the UK")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"TSI"),(0,l.kt)("td",{parentName:"tr",align:"left"},"EUR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Toscani in Italia")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"YRI"),(0,l.kt)("td",{parentName:"tr",align:"left"},"AFR"),(0,l.kt)("td",{parentName:"tr",align:"left"},"Yoruba in Ibadan, Nigeria")))),(0,l.kt)("h3",{id:"data-types"},"Data Types"),(0,l.kt)("p",null,"Each custom annotation can be one of the following data types:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"bool")," - true or false"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"number")," - any integer or floating-point number"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"string")," - text")),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"For boolean variables, only keys with a ",(0,l.kt)("inlineCode",{parentName:"p"},"true")," value will be output to the JSON object."))),(0,l.kt)("h2",{id:"using-sautils"},"Using SAUtils"),(0,l.kt)("p",null,"Illumina Connected Annotations includes a tool called ",(0,l.kt)("inlineCode",{parentName:"p"},"SAUtils")," that converts various data sources into Illumina Connected Annotations's native binary format. The sub-commands ",(0,l.kt)("inlineCode",{parentName:"p"},"customvar")," and ",(0,l.kt)("inlineCode",{parentName:"p"},"customgene")," are used to specify a variant file or a gene file respectively."),(0,l.kt)("h3",{id:"convert-variant-file"},"Convert Variant File"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/SAUtils.dll customvar \\\n -r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n -i MyDataSource.tsv \\\n -o SupplementaryAnnotation\n")),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-r")," argument specifies the compressed reference path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input TSV path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output directory")),(0,l.kt)("h3",{id:"convert-gene-file"},"Convert Gene File"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/SAUtils.dll customgene \\\n -r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n -c Data/Cache \\\n -i MyDataSource.tsv \\\n -o SupplementaryAnnotation\n")),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-c")," argument specifies the Illumina Connected Annotations cache path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input TSV path"),(0,l.kt)("li",{parentName:"ul"},"the ",(0,l.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output directory")))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/e3117044.34d1e2b7.js b/assets/js/e3117044.34d1e2b7.js deleted file mode 100644 index 9a892f10..00000000 --- a/assets/js/e3117044.34d1e2b7.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[217],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>g});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function o(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function i(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var s=r.createContext({}),p=function(t){var e=r.useContext(s),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},m=function(t){var e=p(t.components);return r.createElement(s.Provider,{value:e},t.children)},c="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},u=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,o=t.originalType,s=t.parentName,m=l(t,["components","mdxType","originalType","parentName"]),c=p(n),u=a,g=c["".concat(s,".").concat(u)]||c[u]||d[u]||o;return n?r.createElement(g,i(i({ref:e},m),{},{components:n})):r.createElement(g,i({ref:e},m))}));function g(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var o=n.length,i=new Array(o);i[0]=u;var l={};for(var s in e)hasOwnProperty.call(e,s)&&(l[s]=e[s]);l.originalType=t,l[c]="string"==typeof t?t:a,i[1]=l;for(var p=2;p{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>c,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var r=n(87462),a=(n(67294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/cosmic-json",id:"version-3.17/data-sources/cosmic-json",title:"cosmic-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/cosmic-json.md",sourceDirName:"data-sources",slug:"/data-sources/cosmic-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/cosmic-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/cosmic-json.md",tags:[],version:"3.17",frontMatter:{}},s=[],p={toc:s},m="wrapper";function c(t){let{components:e,...n}=t;return(0,a.kt)(m,(0,r.Z)({},p,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},' "cosmicGeneFusions":[\n {\n "id":"COSF881",\n "numSamples":6,\n "geneSymbols":[\n "MYB",\n "NFIB"\n ],\n "hgvsr":"ENST00000341911.5(MYB):r.1_2368::ENST00000397581.2(NFIB):r.2592_3318",\n "histologies":[\n {\n "name":"adenoid cystic carcinoma",\n "numSamples":6\n }\n ],\n "sites":[\n {\n "name":"salivary gland (submandibular)",\n "numSamples":1\n },\n {\n "name":"salivary gland (parotid)",\n "numSamples":1\n },\n {\n "name":"salivary gland (nasal cavity)",\n "numSamples":1\n },\n {\n "name":"breast",\n "numSamples":3\n }\n ],\n "pubMedIds":[\n 19841262\n ]\n }\n ]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"id"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"COSMIC fusion ID")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"geneSymbols"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"5' gene & 3' gene")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"hgvsr"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"HGVS RNA translocation fusion notation")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"histologies"),(0,a.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"phenotypic descriptions")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"sites"),(0,a.kt)("td",{parentName:"tr",align:"center"},"count array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"tissue types")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")))),(0,a.kt)("p",null,(0,a.kt)("strong",{parentName:"p"},"Count")),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"name"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"description")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"numSamples"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"})))))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/70df5ffd.1f4ffe0e.js b/assets/js/e39dd739.0325da4d.js similarity index 79% rename from assets/js/70df5ffd.1f4ffe0e.js rename to assets/js/e39dd739.0325da4d.js index ca9acc46..a6dd62ed 100644 --- a/assets/js/70df5ffd.1f4ffe0e.js +++ b/assets/js/e39dd739.0325da4d.js @@ -1 +1 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1100],{3905:(t,e,n)=>{n.d(e,{Zo:()=>m,kt:()=>N});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var p=a.createContext({}),u=function(t){var e=a.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},m=function(t){var e=u(t.components);return a.createElement(p.Provider,{value:e},t.children)},d="mdxType",g={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},k=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),d=u(n),k=r,N=d["".concat(p,".").concat(k)]||d[k]||g[k]||l;return n?a.createElement(N,o(o({ref:e},m),{},{components:n})):a.createElement(N,o({ref:e},m))}));function N(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=k;var i={};for(var p in e)hasOwnProperty.call(e,p)&&(i[p]=e[p]);i.originalType=t,i[d]="string"==typeof t?t:r,o[1]=i;for(var u=2;u{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>d,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var a=n(87462),r=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/gnomad-structural-variants-json",id:"version-3.21/data-sources/gnomad-structural-variants-json",title:"gnomad-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/gnomad-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gnomad-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/gnomad-structural-variants-json.md",tags:[],version:"3.21",frontMatter:{}},p=[],u={toc:p},m="wrapper";function d(t){let{components:e,...n}=t;return(0,r.kt)(m,(0,a.Z)({},u,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD-preview": [\n {\n "chromosome": "1",\n "begin": 40001,\n "end": 47200,\n "variantId": "gnomAD-SV_v2.1_DUP_1_1",\n "variantType": "duplication",\n "failedFilter": true,\n "allAf": 0.068963,\n "afrAf": 0.135694,\n "amrAf": 0.022876,\n "easAf": 0.01101,\n "eurAf": 0.007846,\n "othAf": 0.017544,\n "femaleAf": 0.065288,\n "maleAf": 0.07255,\n "allAc": 943,\n "afrAc": 866,\n "amrAc": 21,\n "easAc": 17,\n "eurAc": 37,\n "othAc": 2,\n "femaleAc": 442,\n "maleAc": 499,\n "allAn": 13674,\n "afrAn": 6382,\n "amrAn": 918,\n "easAn": 1544,\n "eurAn": 4716,\n "othAn": 114,\n "femaleAn": 6770,\n "maleAn": 6878,\n "allHc": 91,\n "afrHc": 90,\n "amrHc": 1,\n "easHc": 0,\n "eurHc": 0,\n "othHc": 55,\n "femaleHc": 44,\n "maleHc": 47,\n "reciprocalOverlap": 0.01839,\n "annotationOverlap": 0.16667\n }\n]\n\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"chromosome number")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"position interval start")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"position internal end")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"structural variant type")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantId"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"gnomAD ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all other populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the African super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Ad Mixed American super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the European super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all other populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for male population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for female population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the African super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Ad Mixed American super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the European super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all other populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for female population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for male population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the African / African American population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Latino population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the East Asian population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the European super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all other populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"boolean"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Reciprocal overlap. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Reciprocal overlap. Range: 0 - 1.0")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Note:")," Following fields are not available in ",(0,r.kt)("em",{parentName:"p"},"GRCh38")," because the source file does not contain this information:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAn")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAn")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter")))))}d.isMDXComponent=!0}}]); \ No newline at end of file +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3805],{3905:(t,e,a)=>{a.d(e,{Zo:()=>m,kt:()=>N});var n=a(7294);function r(t,e,a){return e in t?Object.defineProperty(t,e,{value:a,enumerable:!0,configurable:!0,writable:!0}):t[e]=a,t}function l(t,e){var a=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),a.push.apply(a,n)}return a}function o(t){for(var e=1;e=0||(r[a]=t[a]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,a)&&(r[a]=t[a])}return r}var p=n.createContext({}),u=function(t){var e=n.useContext(p),a=e;return t&&(a="function"==typeof t?t(e):o(o({},e),t)),a},m=function(t){var e=u(t.components);return n.createElement(p.Provider,{value:e},t.children)},d="mdxType",g={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},k=n.forwardRef((function(t,e){var a=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),d=u(a),k=r,N=d["".concat(p,".").concat(k)]||d[k]||g[k]||l;return a?n.createElement(N,o(o({ref:e},m),{},{components:a})):n.createElement(N,o({ref:e},m))}));function N(t,e){var a=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=a.length,o=new Array(l);o[0]=k;var i={};for(var p in e)hasOwnProperty.call(e,p)&&(i[p]=e[p]);i.originalType=t,i[d]="string"==typeof t?t:r,o[1]=i;for(var u=2;u{a.r(e),a.d(e,{contentTitle:()=>o,default:()=>d,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var n=a(7462),r=(a(7294),a(3905));const l={},o=void 0,i={unversionedId:"data-sources/gnomad-structural-variants-json",id:"data-sources/gnomad-structural-variants-json",title:"gnomad-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gnomad-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gnomad-structural-variants-json.md",tags:[],version:"current",frontMatter:{}},p=[],u={toc:p},m="wrapper";function d(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},u,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD-preview": [\n {\n "chromosome": "1",\n "begin": 40001,\n "end": 47200,\n "variantId": "gnomAD-SV_v2.1_DUP_1_1",\n "variantType": "duplication",\n "failedFilter": true,\n "allAf": 0.068963,\n "afrAf": 0.135694,\n "amrAf": 0.022876,\n "easAf": 0.01101,\n "eurAf": 0.007846,\n "othAf": 0.017544,\n "femaleAf": 0.065288,\n "maleAf": 0.07255,\n "allAc": 943,\n "afrAc": 866,\n "amrAc": 21,\n "easAc": 17,\n "eurAc": 37,\n "othAc": 2,\n "femaleAc": 442,\n "maleAc": 499,\n "allAn": 13674,\n "afrAn": 6382,\n "amrAn": 918,\n "easAn": 1544,\n "eurAn": 4716,\n "othAn": 114,\n "femaleAn": 6770,\n "maleAn": 6878,\n "allHc": 91,\n "afrHc": 90,\n "amrHc": 1,\n "easHc": 0,\n "eurHc": 0,\n "othHc": 55,\n "femaleHc": 44,\n "maleHc": 47,\n "reciprocalOverlap": 0.01839,\n "annotationOverlap": 0.16667\n }\n]\n\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"chromosome number")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"position interval start")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"position internal end")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"structural variant type")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantId"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"gnomAD ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all other populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the African super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Ad Mixed American super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the European super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all other populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for male population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for female population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the African super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Ad Mixed American super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the European super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all other populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for female population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for male population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the African / African American population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Latino population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the East Asian population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the European super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all other populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"boolean"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Reciprocal overlap. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Reciprocal overlap. Range: 0 - 1.0")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Note:")," Following fields are not available in ",(0,r.kt)("em",{parentName:"p"},"GRCh38")," because the source file does not contain this information:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAn")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAn")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/e39dd739.4e56d11d.js b/assets/js/e39dd739.4e56d11d.js deleted file mode 100644 index d00e0dd8..00000000 --- a/assets/js/e39dd739.4e56d11d.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3805],{3905:(t,e,a)=>{a.d(e,{Zo:()=>m,kt:()=>N});var n=a(67294);function r(t,e,a){return e in t?Object.defineProperty(t,e,{value:a,enumerable:!0,configurable:!0,writable:!0}):t[e]=a,t}function l(t,e){var a=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),a.push.apply(a,n)}return a}function o(t){for(var e=1;e=0||(r[a]=t[a]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,a)&&(r[a]=t[a])}return r}var p=n.createContext({}),u=function(t){var e=n.useContext(p),a=e;return t&&(a="function"==typeof t?t(e):o(o({},e),t)),a},m=function(t){var e=u(t.components);return n.createElement(p.Provider,{value:e},t.children)},d="mdxType",g={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},k=n.forwardRef((function(t,e){var a=t.components,r=t.mdxType,l=t.originalType,p=t.parentName,m=i(t,["components","mdxType","originalType","parentName"]),d=u(a),k=r,N=d["".concat(p,".").concat(k)]||d[k]||g[k]||l;return a?n.createElement(N,o(o({ref:e},m),{},{components:a})):n.createElement(N,o({ref:e},m))}));function N(t,e){var a=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=a.length,o=new Array(l);o[0]=k;var i={};for(var p in e)hasOwnProperty.call(e,p)&&(i[p]=e[p]);i.originalType=t,i[d]="string"==typeof t?t:r,o[1]=i;for(var u=2;u{a.r(e),a.d(e,{contentTitle:()=>o,default:()=>d,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var n=a(87462),r=(a(67294),a(3905));const l={},o=void 0,i={unversionedId:"data-sources/gnomad-structural-variants-json",id:"data-sources/gnomad-structural-variants-json",title:"gnomad-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gnomad-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gnomad-structural-variants-json.md",tags:[],version:"current",frontMatter:{}},p=[],u={toc:p},m="wrapper";function d(t){let{components:e,...a}=t;return(0,r.kt)(m,(0,n.Z)({},u,a,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gnomAD-preview": [\n {\n "chromosome": "1",\n "begin": 40001,\n "end": 47200,\n "variantId": "gnomAD-SV_v2.1_DUP_1_1",\n "variantType": "duplication",\n "failedFilter": true,\n "allAf": 0.068963,\n "afrAf": 0.135694,\n "amrAf": 0.022876,\n "easAf": 0.01101,\n "eurAf": 0.007846,\n "othAf": 0.017544,\n "femaleAf": 0.065288,\n "maleAf": 0.07255,\n "allAc": 943,\n "afrAc": 866,\n "amrAc": 21,\n "easAc": 17,\n "eurAc": 37,\n "othAc": 2,\n "femaleAc": 442,\n "maleAc": 499,\n "allAn": 13674,\n "afrAn": 6382,\n "amrAn": 918,\n "easAn": 1544,\n "eurAn": 4716,\n "othAn": 114,\n "femaleAn": 6770,\n "maleAn": 6878,\n "allHc": 91,\n "afrHc": 90,\n "amrHc": 1,\n "easHc": 0,\n "eurHc": 0,\n "othHc": 55,\n "femaleHc": 44,\n "maleHc": 47,\n "reciprocalOverlap": 0.01839,\n "annotationOverlap": 0.16667\n }\n]\n\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"chromosome number")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"begin"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"position interval start")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"end"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"position internal end")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantType"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"structural variant type")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"variantId"),(0,r.kt)("td",{parentName:"tr",align:null},"string"),(0,r.kt)("td",{parentName:"tr",align:null},"gnomAD ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the African super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the Ad Mixed American super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the East Asian super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for the European super population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for all other populations. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for female population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAf"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"allele frequency for male population. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the African super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the Ad Mixed American super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the East Asian super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for the European super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for all other populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for male population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele count for female population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the African super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the Ad Mixed American super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the East Asian super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for the European super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for all other populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for female population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAn"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"allele number for male population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the African / African American population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the Latino population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the East Asian population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for the European super population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for all other populations.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for male population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleHc"),(0,r.kt)("td",{parentName:"tr",align:null},"integer"),(0,r.kt)("td",{parentName:"tr",align:null},"count of homozygous individuals for female population.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,r.kt)("td",{parentName:"tr",align:null},"boolean"),(0,r.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters (Note: we do not list the failed filters)")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Reciprocal overlap. Range: 0 - 1.0")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,r.kt)("td",{parentName:"tr",align:null},"floating point"),(0,r.kt)("td",{parentName:"tr",align:null},"Reciprocal overlap. Range: 0 - 1.0")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"Note:")," Following fields are not available in ",(0,r.kt)("em",{parentName:"p"},"GRCh38")," because the source file does not contain this information:"),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAf")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleAn")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleAn")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"allHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"afrHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"amrHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"easHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"eurAc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"othHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"maleHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"femaleHc")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"failedFilter")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/e3c84ce4.7d00023e.js b/assets/js/e3c84ce4.7d00023e.js deleted file mode 100644 index f0e45a76..00000000 --- a/assets/js/e3c84ce4.7d00023e.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1647,8493],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>g});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var o=a.createContext({}),p=function(e){var t=a.useContext(o),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},c=function(e){var t=p(e.components);return a.createElement(o.Provider,{value:t},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,o=e.parentName,c=s(e,["components","mdxType","originalType","parentName"]),d=p(n),u=i,g=d["".concat(o,".").concat(u)]||d[u]||m[u]||r;return n?a.createElement(g,l(l({ref:t},c),{},{components:n})):a.createElement(g,l({ref:t},c))}));function g(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,l=new Array(r);l[0]=u;var s={};for(var o in t)hasOwnProperty.call(t,o)&&(s[o]=t[o]);s.originalType=e,s[d]="string"==typeof e?e:i,l[1]=s;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>d,frontMatter:()=>r,metadata:()=>s,toc:()=>o});var a=n(87462),i=(n(67294),n(3905));const r={},l=void 0,s={unversionedId:"data-sources/clinvar-json",id:"version-3.14/data-sources/clinvar-json",title:"clinvar-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.14/data-sources/clinvar-json.md",sourceDirName:"data-sources",slug:"/data-sources/clinvar-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/clinvar-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/clinvar-json.md",tags:[],version:"3.14",frontMatter:{}},o=[],p={toc:o},c="wrapper";function d(e){let{components:t,...n}=e;return(0,i.kt)(c,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "id":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "significance":[\n "benign"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "lastUpdatedDate":"2020-03-01",\n "isAlleleSpecific":true\n },\n {\n "id":"RCV000030258.4",\n "variationId":"VCV000036581.3",\n "reviewStatus":"reviewed by expert panel",\n "alleleOrigins":[\n "germline"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "phenotypes":[\n "Lynch syndrome"\n ],\n "medGenIds":[\n "C1333990"\n ],\n "omimIds":[\n "120435"\n ],\n "significance":[\n "benign"\n ],\n "lastUpdatedDate":"2017-05-01",\n "isAlleleSpecific":true\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"id"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"ClinVar ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"variationId"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"ClinVar VCV ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"reviewStatus"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"alleleOrigins"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"medGenIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"MedGen IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"omimIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"OMIM IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"orphanetIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Orphanet IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"significance"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"lastUpdatedDate"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,i.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,i.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the ClinVar alternate allele")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"reviewStatus:")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no assertion provided"),(0,i.kt)("li",{parentName:"ul"},"no assertion criteria provided"),(0,i.kt)("li",{parentName:"ul"},"criteria provided, single submitter"),(0,i.kt)("li",{parentName:"ul"},"practice guideline"),(0,i.kt)("li",{parentName:"ul"},"classified by multiple submitters"),(0,i.kt)("li",{parentName:"ul"},"criteria provided, conflicting interpretations"),(0,i.kt)("li",{parentName:"ul"},"criteria provided, multiple submitters, no conflicts"),(0,i.kt)("li",{parentName:"ul"},"no interpretation for the single variant")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"alleleOrigins:")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"unknown"),(0,i.kt)("li",{parentName:"ul"},"other"),(0,i.kt)("li",{parentName:"ul"},"germline"),(0,i.kt)("li",{parentName:"ul"},"somatic"),(0,i.kt)("li",{parentName:"ul"},"inherited"),(0,i.kt)("li",{parentName:"ul"},"paternal"),(0,i.kt)("li",{parentName:"ul"},"maternal"),(0,i.kt)("li",{parentName:"ul"},"de-novo"),(0,i.kt)("li",{parentName:"ul"},"biparental"),(0,i.kt)("li",{parentName:"ul"},"uniparental"),(0,i.kt)("li",{parentName:"ul"},"not-tested"),(0,i.kt)("li",{parentName:"ul"},"tested-inconclusive")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"significance:")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"uncertain significance"),(0,i.kt)("li",{parentName:"ul"},"not provided"),(0,i.kt)("li",{parentName:"ul"},"benign"),(0,i.kt)("li",{parentName:"ul"},"likely benign"),(0,i.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"pathogenic"),(0,i.kt)("li",{parentName:"ul"},"drug response"),(0,i.kt)("li",{parentName:"ul"},"histocompatibility"),(0,i.kt)("li",{parentName:"ul"},"association"),(0,i.kt)("li",{parentName:"ul"},"risk factor"),(0,i.kt)("li",{parentName:"ul"},"protective"),(0,i.kt)("li",{parentName:"ul"},"affects"),(0,i.kt)("li",{parentName:"ul"},"conflicting data from submitters"),(0,i.kt)("li",{parentName:"ul"},"other"),(0,i.kt)("li",{parentName:"ul"},"no interpretation for the single variant"),(0,i.kt)("li",{parentName:"ul"},"conflicting interpretations of pathogenicity")))}d.isMDXComponent=!0},23344:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>s,default:()=>m,frontMatter:()=>l,metadata:()=>o,toc:()=>p});var a=n(87462),i=(n(67294),n(3905)),r=n(5044);const l={title:"ClinVar"},s=void 0,o={unversionedId:"data-sources/clinvar",id:"version-3.14/data-sources/clinvar",title:"ClinVar",description:"Overview",source:"@site/versioned_docs/version-3.14/data-sources/clinvar.mdx",sourceDirName:"data-sources",slug:"/data-sources/clinvar",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/clinvar",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/data-sources/clinvar.mdx",tags:[],version:"3.14",frontMatter:{title:"ClinVar"},sidebar:"version-3.14/docs",previous:{title:"1000 Genomes",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/1000Genomes"},next:{title:"dbSNP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/data-sources/dbsnp"}},p=[{value:"Overview",id:"overview",children:[],level:2},{value:"RCV File",id:"rcv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Parsing Significance",id:"parsing-significance",children:[],level:4}],level:3}],level:2},{value:"VCV File",id:"vcv-file",children:[{value:"Example",id:"example-1",children:[],level:3},{value:"Parsing",id:"parsing-1",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:p},d="wrapper";function m(e){let{components:t,...l}=e;return(0,i.kt)(d,(0,a.Z)({},c,l,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"ClinVar is a freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence. ClinVar thus facilitates access to and communication about the relationships asserted between human variation and observed health status, and the history of that interpretation."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Melissa J Landrum, Jennifer M Lee, Mark Benson, Garth R Brown, Chen Chao, Shanmuga Chitipiralla, Baoshan Gu, Jennifer Hart, Douglas Hoffman, Wonhee Jang, Karen Karapetyan, Kenneth Katz, Chunlei Liu, Zenith Maddipatla, Adriana Malheiro, Kurt McDaniel, Michael Ovetsky, George Riley, George Zhou, J Bradley Holmes, Brandi L Kattman, Donna R Maglott, ClinVar: improving access to variant interpretations and supporting evidence, ",(0,i.kt)("em",{parentName:"p"},"Nucleic Acids Research"),", ",(0,i.kt)("strong",{parentName:"p"},"46"),", Issue D1, 4 January 2018, Pages D1062\u2013D1067, ",(0,i.kt)("a",{parentName:"p",href:"https://doi.org/10.1093/nar/gkx1153"},"https://doi.org/10.1093/nar/gkx1153")))),(0,i.kt)("h2",{id:"rcv-file"},"RCV File"),(0,i.kt)("h3",{id:"example"},"Example"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{target:"_blank",href:n(46760).Z},"a full RCV entry"),"."),(0,i.kt)("h3",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"ID")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{3}","{3}":!0},'\n \n \n\n')),(0,i.kt)("p",null,"The Acc and Version fields are merged to form the ID (RCV000000001.2)"),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"LastUpdatedDate")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{2}","{2}":!0},'\n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Significance")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{5}","{5}":!0},'\n \n \n no assertion criteria provided \n Pathogenic \n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"ReviewStatus")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4}","{4}":!0},'\n \n \n no assertion criteria provided \n Pathogenic \n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Phenotypes")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{2-8}","{2-8}":!0},'\n \n \n \n Joubert syndrome 9\n \n \n \n\n')),(0,i.kt)("p",null,'We only use the field with Type="Preferred". Multiple phenotypes may be reported'),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Location and Variant Id")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{3,5-12}","{3,5-12}":!0},'\n\n \n \n \n \n \n \n \n\n')),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"The variant position is extracted from the fields for their respective assemblies."),(0,i.kt)("li",{parentName:"ul"},"Updated records contain positionVCF, referenceAlleleVCF and alternateAlleleVCF fields and when present, we use them to create the variant."),(0,i.kt)("li",{parentName:"ul"},'For older records, since "start\' and "stop" fields are not always available, we use the "display_start" and "display_end" fields.'),(0,i.kt)("li",{parentName:"ul"},"If a required allele is not available, we extract it from the reference sequence."),(0,i.kt)("li",{parentName:"ul"},"Only variants having a dbSNP id are extracted."),(0,i.kt)("li",{parentName:"ul"},"Note that a ClinVar accession may have multiple variants associated with it (possible in different locations)"),(0,i.kt)("li",{parentName:"ul"},"VariantId is extracted from the MeasureSet attributes.")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"MedGen, OMIM, Orphanet IDs")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4-7}","{4-7}":!0},'\n \n \n \n \n \n \n \n \n\n')),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"AlleleOrigins")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{2}","{2}":!0},"\n germline\n\n")),(0,i.kt)("p",null,"We only extract all Allele Origins from Submissions (SCV) entries."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"PubMedIds")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4,10,16,21}","{4,10,16,21}":!0},'\n \n \n 12114475\n \n \n \n LMM Criteria\n \n 24033266\n \n \n \n \n \n 9113933\n \n \n \n \n 23757202\n \n\n')),(0,i.kt)("p",null,"We only extract all Pubmed Ids from Submissions (SCV) entries."),(0,i.kt)("h4",{id:"parsing-significance"},"Parsing Significance"),(0,i.kt)("p",null,"Extracting significance(s) may involve parsing multiple fields. Take the following snippets into consideration."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{3,8,13-14}","{3,8,13-14}":!0},'\n no assertion criteria provided\n Pathogenic\n\n\n\n criteria provided, multiple submitters, no conflicts\n Pathogenic/Likely pathogenic\n\n\n\n no assertion criteria provided\n Conflicting interpretations of pathogenicity\n Pathogenic(1);Uncertain significance(1)\n\n')),(0,i.kt)("p",null,"Given the evidence, we converted the significance field into an array of strings which may be parsed out of the ",(0,i.kt)("inlineCode",{parentName:"p"},"Descriptions")," or ",(0,i.kt)("inlineCode",{parentName:"p"},"Explanation")," fields. "),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Varying Delimiters")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The delimiters in each field may vary. Currently, the delimiters for ",(0,i.kt)("inlineCode",{parentName:"p"},"Description")," are ",(0,i.kt)("inlineCode",{parentName:"p"},",")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"/"),". The delimiters for ",(0,i.kt)("inlineCode",{parentName:"p"},"Explanation")," are ",(0,i.kt)("inlineCode",{parentName:"p"},";")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"/"),"."))),(0,i.kt)("h2",{id:"vcv-file"},"VCV File"),(0,i.kt)("h3",{id:"example-1"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml"},'\n\n\n current\n Homo sapiens\n \n \n \n \n \n 1p36.31\n \n \n \n 601142\n \n \n \n 1p36.31\n \n \n \n 607215\n \n \n GRCh37/hg19 1p36.31(chr1:6051187-6158763)\n copy number gain\n \n 1p36.31\n \n \n \n no interpretation for the single variant\n \n \n \n \n \n \n no interpretation for the single variant\n \n \n no interpretation for the single variant\n \n \n \n \n \n \n \n \n \n\n\n')),(0,i.kt)("h3",{id:"parsing-1"},"Parsing"),(0,i.kt)("p",null,"In the following section, we discuss which field of the XML was used to extract information that is presented in the JSON output."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"id")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml"},'\n')),(0,i.kt)("p",null,"The Acc and Version fields are merged to form the ID (RCV000000001.2)"),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"significance")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{7}","{7}":!0},'\n \n \n \n \n \n no interpretation for the single variant\n \n \n \n \n \n\n')),(0,i.kt)("p",null,"May have multiple significances listed."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"reviewStatus")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-xml",metastring:"{4}","{4}":!0},"\n \n \n no interpretation for the single variant\n \n \n\n")),(0,i.kt)("h2",{id:"known-issues"},"Known Issues"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"The XML file contains ~1k more entries (out of 162K) than the VCF file"),(0,i.kt)("li",{parentName:"ul"},"The XML file does not have a field indicating that a record is associated with the reference base - something that was present in VCF"),(0,i.kt)("li",{parentName:"ul"},'The XML file contains entries (e.g. RCV000016645 version=1) which have IUPAC ambiguous bases ("R", "Y", "H",\netc.) as their alternate allele')))),(0,i.kt)("h2",{id:"download-url"},"Download URL"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz"},"ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz")),(0,i.kt)("h2",{id:"json-output"},"JSON Output"),(0,i.kt)(r.default,{mdxType:"JSON"}))}m.isMDXComponent=!0},46760:(e,t,n)=>{n.d(t,{Z:()=>a});const a=n.p+"assets/files/clinvar-rcv-example-4e0a2f2ac6c70acd0ce41410690b683b.xml"}}]); \ No newline at end of file diff --git a/assets/js/e3db97ce.412e8590.js b/assets/js/e3db97ce.412e8590.js deleted file mode 100644 index 016e4f15..00000000 --- a/assets/js/e3db97ce.412e8590.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5146],{3905:(t,e,n)=>{n.d(e,{Zo:()=>s,kt:()=>f});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function o(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function i(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var l=r.createContext({}),p=function(t){var e=r.useContext(l),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},s=function(t){var e=p(t.components);return r.createElement(l.Provider,{value:e},t.children)},d="mdxType",m={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},u=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,o=t.originalType,l=t.parentName,s=c(t,["components","mdxType","originalType","parentName"]),d=p(n),u=a,f=d["".concat(l,".").concat(u)]||d[u]||m[u]||o;return n?r.createElement(f,i(i({ref:e},s),{},{components:n})):r.createElement(f,i({ref:e},s))}));function f(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var o=n.length,i=new Array(o);i[0]=u;var c={};for(var l in e)hasOwnProperty.call(e,l)&&(c[l]=e[l]);c.originalType=t,c[d]="string"==typeof t?t:a,i[1]=c;for(var p=2;p{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>d,frontMatter:()=>o,metadata:()=>c,toc:()=>l});var r=n(87462),a=(n(67294),n(3905));const o={},i=void 0,c={unversionedId:"data-sources/splice-ai-json",id:"version-3.17/data-sources/splice-ai-json",title:"splice-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/splice-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/splice-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/splice-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/splice-ai-json.md",tags:[],version:"3.17",frontMatter:{}},l=[],p={toc:l},s="wrapper";function d(t){let{components:e,...n}=t;return(0,a.kt)(s,(0,r.Z)({},p,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"spliceAI":[ \n {\n "hgnc":"BLCAP",\n "acceptorGainDistance":-3,\n "acceptorGainScore":0.3,\n "donorLossDistance":7,\n "donorLossScore":0.9\n },\n { \n "hgnc":"NNAT",\n "acceptorGainDistance":-1,\n "acceptorGainScore":0.2,\n "donorGainDistance":-2,\n "donorGainScore":0.3\n }\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorGainDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorGainScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorLossDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorLossScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorGainDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorGainScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorLossDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorLossScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/e445e249.df089122.js b/assets/js/e445e249.df089122.js deleted file mode 100644 index 333534fe..00000000 --- a/assets/js/e445e249.df089122.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3130],{3905:(t,e,n)=>{n.d(e,{Zo:()=>c,kt:()=>g});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function o(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var p=r.createContext({}),u=function(t){var e=r.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},c=function(t){var e=u(t.components);return r.createElement(p.Provider,{value:e},t.children)},d="mdxType",m={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},s=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,l=t.originalType,p=t.parentName,c=i(t,["components","mdxType","originalType","parentName"]),d=u(n),s=a,g=d["".concat(p,".").concat(s)]||d[s]||m[s]||l;return n?r.createElement(g,o(o({ref:e},c),{},{components:n})):r.createElement(g,o({ref:e},c))}));function g(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var l=n.length,o=new Array(l);o[0]=s;var i={};for(var p in e)hasOwnProperty.call(e,p)&&(i[p]=e[p]);i.originalType=t,i[d]="string"==typeof t?t:a,o[1]=i;for(var u=2;u{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>d,frontMatter:()=>l,metadata:()=>i,toc:()=>p});var r=n(87462),a=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/decipher-json",id:"version-3.18/data-sources/decipher-json",title:"decipher-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/decipher-json.md",sourceDirName:"data-sources",slug:"/data-sources/decipher-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/decipher-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/decipher-json.md",tags:[],version:"3.18",frontMatter:{}},p=[],u={toc:p},c="wrapper";function d(t){let{components:e,...n}=t;return(0,a.kt)(c,(0,r.Z)({},u,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"decipher":[\n {\n "chromosome":"1",\n "begin":13516,\n "end":91073,\n "numDeletions":27,\n "deletionFrequency":0.675,\n "numDuplications":27,\n "duplicationFrequency":0.675,\n "sampleSize":40,\n "reciprocalOverlap": 0.27555,\n "annotationOverlap": 0.5901\n }\n],\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"begin"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"end"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"numDeletions"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"# of observed deletions")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"deletionFrequency"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"deletion frequency")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"numDuplications"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"# of observed duplications")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"duplicationFrequency"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"duplication frequency")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"sampleSize"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"total # of samples")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% annotation overlap")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/e5950a77.7bd8d075.js b/assets/js/e5950a77.7bd8d075.js deleted file mode 100644 index f71bf8e5..00000000 --- a/assets/js/e5950a77.7bd8d075.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4420],{3905:(e,n,t)=>{t.d(n,{Zo:()=>p,kt:()=>g});var a=t(67294);function i(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function r(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function o(e){for(var n=1;n=0||(i[t]=e[t]);return i}(e,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(i[t]=e[t])}return i}var l=a.createContext({}),c=function(e){var n=a.useContext(l),t=n;return e&&(t="function"==typeof e?e(n):o(o({},n),e)),t},p=function(e){var n=c(e.components);return a.createElement(l.Provider,{value:n},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},u=a.forwardRef((function(e,n){var t=e.components,i=e.mdxType,r=e.originalType,l=e.parentName,p=s(e,["components","mdxType","originalType","parentName"]),d=c(t),u=i,g=d["".concat(l,".").concat(u)]||d[u]||m[u]||r;return t?a.createElement(g,o(o({ref:n},p),{},{components:t})):a.createElement(g,o({ref:n},p))}));function g(e,n){var t=arguments,i=n&&n.mdxType;if("string"==typeof e||i){var r=t.length,o=new Array(r);o[0]=u;var s={};for(var l in n)hasOwnProperty.call(n,l)&&(s[l]=n[l]);s.originalType=e,s[d]="string"==typeof e?e:i,o[1]=s;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>o,default:()=>d,frontMatter:()=>r,metadata:()=>s,toc:()=>l});var a=t(87462),i=(t(67294),t(3905));const r={title:"Gene Fusion Detection"},o=void 0,s={unversionedId:"core-functionality/gene-fusions",id:"version-3.18/core-functionality/gene-fusions",title:"Gene Fusion Detection",description:"Overview",source:"@site/versioned_docs/version-3.18/core-functionality/gene-fusions.md",sourceDirName:"core-functionality",slug:"/core-functionality/gene-fusions",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/core-functionality/gene-fusions",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/core-functionality/gene-fusions.md",tags:[],version:"3.18",frontMatter:{title:"Gene Fusion Detection"},sidebar:"docs",previous:{title:"Canonical Transcripts",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/core-functionality/canonical-transcripts"},next:{title:"MNV Recomposition",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/core-functionality/mnv-recomposition"}},l=[{value:"Overview",id:"overview",children:[],level:2},{value:"Approach",id:"approach",children:[{value:"Variant Types",id:"variant-types",children:[],level:3},{value:"Criteria",id:"criteria",children:[],level:3}],level:2},{value:"ETV6/RUNX1 Example",id:"etv6runx1-example",children:[{value:"VCF",id:"vcf",children:[],level:3},{value:"JSON Output",id:"json-output",children:[{value:"Gene Fusion Data Sources",id:"gene-fusion-data-sources",children:[],level:4},{value:"Consequences",id:"consequences",children:[],level:4},{value:"Gene Fusions Section",id:"gene-fusions-section",children:[],level:4}],level:3}],level:2}],c={toc:l},p="wrapper";function d(e){let{components:n,...r}=e;return(0,i.kt)(p,(0,a.Z)({},c,r,{components:n,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"Gene fusions often result from large genomic rearrangements such as structural variants. While WGS secondary analysis pipelines typically contain alignment and variant calling stages, very few of them contain dedicated gene fusion callers. When they are included, they are usually associated with RNA-Seq pipelines where gene fusions can be readily observed."),(0,i.kt)("p",null,"Since gene fusions are frequently observed in cancer and since many sequencing experiments do not include paired RNA-Seq data, we have added gene fusion detection and annotation to Nirvana."),(0,i.kt)("p",null,"The rich diversity in gene fusion architectures and their likely mechanisms can be seen below:"),(0,i.kt)("p",null,(0,i.kt)("img",{src:t(66347).Z})),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Kumar-Sinha, C., Kalyana-Sundaram, S. & Chinnaiyan, A.M. ",(0,i.kt)("a",{parentName:"p",href:"https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-015-0252-1"},"Landscape of gene fusions in epithelial cancers: seq and ye shall find"),". Genome Med 7, 129 (2015)"))),(0,i.kt)("h2",{id:"approach"},"Approach"),(0,i.kt)("p",null,"Nirvana uses structural variant calls to evaluate if they form either putative intra-chromosomal or inter-chromosomal gene fusions. Let's consider two transcripts, ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_014206.3")," (",(0,i.kt)("strong",{parentName:"p"},"TMEM258"),") and ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_013402.4")," (",(0,i.kt)("strong",{parentName:"p"},"FADS1"),"). Both of these genes are on the reverse strand in the genome. The vertical bar indicates the breakpoint where these transcripts are fused:"),(0,i.kt)("p",null,(0,i.kt)("img",{alt:"TMEM258 & FADS1 transcripts",src:t(76594).Z})),(0,i.kt)("p",null,"The above explains where the transcripts are fused together, but it doesn't explain in which orientation. By using the directionality encoded in the translocation breakend, we can rearrange these two transcripts in four ways:"),(0,i.kt)("p",null,(0,i.kt)("img",{alt:"TMEM258 & FADS1 gene fusions",src:t(14240).Z})),(0,i.kt)("p",null,"Only two of the combinations yields a fusion contains both the transcription start site (TSS) and the stop codon. In one case, we can even detect an in-frame gene fusion."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Interpreting translocation breakends")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"At first glance, translocation breakends are a bit daunting. However, once you understand how they work, they're actually quite simple. For more information, we recommend reading section 5.4 in the ",(0,i.kt)("a",{parentName:"p",href:"https://samtools.github.io/hts-specs/VCFv4.2.pdf"},"VCF 4.2 specification"),"."),(0,i.kt)("table",{parentName:"div"},(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"REF"),(0,i.kt)("th",{parentName:"tr",align:"left"},"ALT"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Meaning"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"t[p["),(0,i.kt)("td",{parentName:"tr",align:"left"},"piece extending to the right of p is joined after t")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"t]p]"),(0,i.kt)("td",{parentName:"tr",align:"left"},"reverse comp piece extending left of p is joined after t")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"]p]t"),(0,i.kt)("td",{parentName:"tr",align:"left"},"piece extending to the left of p is joined before t")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"[p[t"),(0,i.kt)("td",{parentName:"tr",align:"left"},"reverse comp piece extending right of p is joined before t")))))),(0,i.kt)("h3",{id:"variant-types"},"Variant Types"),(0,i.kt)("p",null,"Specifically we can identify gene fusions from the following structural variant types:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"deletions (",(0,i.kt)("inlineCode",{parentName:"li"},""),")"),(0,i.kt)("li",{parentName:"ul"},"tandem_duplications (",(0,i.kt)("inlineCode",{parentName:"li"},""),")"),(0,i.kt)("li",{parentName:"ul"},"inversions (",(0,i.kt)("inlineCode",{parentName:"li"},""),")"),(0,i.kt)("li",{parentName:"ul"},"translocation breakpoints (",(0,i.kt)("inlineCode",{parentName:"li"},"AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911["),") ")),(0,i.kt)("h3",{id:"criteria"},"Criteria"),(0,i.kt)("p",null,"The following criteria must be met for Nirvana to identify a gene fusion:"),(0,i.kt)("ol",null,(0,i.kt)("li",{parentName:"ol"},"After accounting for gene orientation and genomic rearrangements, both transcripts must have the same orientation"),(0,i.kt)("li",{parentName:"ol"},"Both transcripts must be from the same transcript source (i.e. we won't mix and match between RefSeq and Ensembl transcripts)"),(0,i.kt)("li",{parentName:"ol"},"Both transcripts must belong to different genes"),(0,i.kt)("li",{parentName:"ol"},"Both transcripts cannot have a coding region that already overlaps without the variant (i.e. in cases where two genes naturally overlap, we don't want to call a gene fusion)")),(0,i.kt)("h2",{id:"etv6runx1-example"},"ETV6/RUNX1 Example"),(0,i.kt)("p",null,"ETV6/RUNX1 is the most common gene fusion in childhood B-cell precursor acute lymphoblastic leukemia (ALL). Patients with this translocation are associated with a good prognosis and excellent response to treatment."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Sun C., Chang L., Zhu X. ",(0,i.kt)("a",{parentName:"p",href:"https://www.oncotarget.com/article/16367/text/"},"Pathogenesis of ETV6/RUNX1-positive childhood acute lymphoblastic leukemia and mechanisms underlying its relapse"),". Oncotarget. 2017; 8: 35445-35459"))),(0,i.kt)("h3",{id:"vcf"},"VCF"),(0,i.kt)("p",null,"Here's a simplified representation of the translocation breakends called by the Manta structural variant caller:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\nchr12 12026270 . C [chr21:36420865[C . PASS SVTYPE=BND\nchr12 12026305 . A A]chr21:36420571] . PASS SVTYPE=BND\nchr21 36420571 . C C]chr12:12026305] . PASS SVTYPE=BND\nchr21 36420865 . C [chr12:12026270[C . PASS SVTYPE=BND\n")),(0,i.kt)("p",null,"When you put these calls together, the resulting genomic rearrangement looks something like this:"),(0,i.kt)("p",null,(0,i.kt)("img",{src:t(12914).Z})),(0,i.kt)("h3",{id:"json-output"},"JSON Output"),(0,i.kt)("p",null,"The annotation for the first variant in the VCF looks like this:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{139,141-198,211,213-222}","{139,141-198,211,213-222}":!0},'{\n "chromosome": "chr12",\n "position": 12026270,\n "refAllele": "C",\n "altAlleles": [\n "[chr21:36420865[C"\n ],\n "filters": [\n "PASS"\n ],\n "cytogeneticBand": "12p13.2",\n "clingen": [\n {\n "chromosome": "12",\n "begin": 173786,\n "end": 34835837,\n "variantType": "copy_number_gain",\n "id": "nsv995956",\n "clinicalInterpretation": "pathogenic",\n "phenotypes": [\n "Decreased calvarial ossification",\n "Delayed gross motor development",\n "Feeding difficulties",\n "Frontal bossing",\n "Morphological abnormality of the central nervous system",\n "Patchy alopecia"\n ],\n "phenotypeIds": [\n "HP:0002007",\n "HP:0002011",\n "HP:0002194",\n "HP:0002232",\n "HP:0005474",\n "HP:0011968",\n "MedGen:C0232466",\n "MedGen:C1862862",\n "MedGen:CN001816",\n "MedGen:CN001820",\n "MedGen:CN001989",\n "MedGen:CN004852"\n ],\n "observedGains": 1,\n "validated": true\n }\n ],\n "variants": [\n {\n "vid": "12-12026270-C-[chr21:36420865[C",\n "chromosome": "chr12",\n "begin": 12026270,\n "end": 12026270,\n "isStructuralVariant": true,\n "refAllele": "C",\n "altAllele": "[chr21:36420865[C",\n "variantType": "translocation_breakend",\n "cosmicGeneFusions": [\n {\n "id": "COSF2245",\n "numSamples": 249,\n "geneSymbols": [\n "ETV6",\n "RUNX1"\n ],\n "hgvsr": "ENST00000396373.4(ETV6):r.1_1283::ENST00000300305.3(RUNX1):r.504_6222",\n "histologies": [\n {\n "name": "acute lymphoblastic B cell leukaemia",\n "numSamples": 169\n },\n {\n "name": "acute lymphoblastic leukaemia",\n "numSamples": 80\n }\n ],\n "sites": [\n {\n "name": "haematopoietic and lymphoid tissue",\n "numSamples": 249\n }\n ],\n "pubMedIds": [\n 7761424,\n 7780150,\n 8609706,\n 8751464,\n 8982044,\n 9067587,\n 9207408,\n 9226156,\n 9628428,\n 10463610,\n 10774753,\n 11091202,\n 12621238,\n 12661004,\n 12750722,\n 15104290,\n 15642392,\n 24557455,\n 26925663\n ]\n }\n ],\n "fusionCatcher": [\n {\n "genes": {\n "first": {\n "hgnc": "ETV6",\n "isOncogene": true\n },\n "second": {\n "hgnc": "RUNX1",\n "isOncogene": true\n }\n },\n "somaticSources": [\n "DepMap CCLE",\n "Cancer Genome Project",\n "ChimerKB 4.0",\n "ChimerPub 4.0",\n "ChimerSeq 4.0",\n "Known",\n "Mitelman DB",\n "OncoKB",\n "TICdb"\n ]\n }\n ],\n "transcripts": [\n {\n "transcript": "ENST00000396373.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "introns": "5/7",\n "geneId": "ENSG00000139083",\n "hgnc": "ETV6",\n "consequence": [\n "transcript_variant",\n "unidirectional_gene_fusion"\n ],\n "geneFusions": [\n {\n "transcript": "ENST00000437180.1",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000437180.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"\n },\n {\n "transcript": "ENST00000300305.3",\n "bioType": "protein_coding",\n "intron": 1,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000300305.3(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"\n },\n {\n "transcript": "ENST00000482318.1",\n "bioType": "nonsense_mediated_decay",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000482318.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"\n },\n {\n "transcript": "ENST00000486278.2",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000486278.2(RUNX1):r.?_-15+274::ENST00000396373.4(ETV6):r.1009+3367_?"\n },\n {\n "transcript": "ENST00000455571.1",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000455571.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"\n },\n {\n "transcript": "ENST00000475045.2",\n "bioType": "protein_coding",\n "intron": 11,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000475045.2(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"\n },\n {\n "transcript": "ENST00000416754.1",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000416754.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?"\n }\n ],\n "isCanonical": true,\n "proteinId": "ENSP00000379658.3"\n },\n {\n "transcript": "NM_001987.4",\n "source": "RefSeq",\n "bioType": "protein_coding",\n "introns": "5/7",\n "geneId": "2120",\n "hgnc": "ETV6",\n "consequence": [\n "transcript_variant",\n "unidirectional_gene_fusion"\n ],\n "geneFusions": [\n {\n "transcript": "NM_001754.4",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "861",\n "hgnc": "RUNX1",\n "hgvsr": "NM_001754.4(RUNX1):r.?_58+274::NM_001987.4(ETV6):r.1009+3367_?"\n }\n ],\n "isCanonical": true,\n "proteinId": "NP_001978.1"\n }\n ]\n }\n ]\n}\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"transcript"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"transcript ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"bioType"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"descriptions of the ",(0,i.kt)("a",{parentName:"td",href:"https://uswest.ensembl.org/info/genome/genebuild/biotypes.html"},"biotypes from Ensembl"))),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"exon"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"},"exon that contained fusion breakpoint")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"intron"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"},"intron that contained fusion breakpoint")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"geneId"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"gene ID. e.g. ENSG00000116062")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hgvsr"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"HGVS RNA nomenclature")))),(0,i.kt)("h4",{id:"gene-fusion-data-sources"},"Gene Fusion Data Sources"),(0,i.kt)("p",null,"To provide more context to our gene fusions, we provide the following gene fusion data sources:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"../data-sources/cosmic"},"COSMIC")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"../data-sources/fusioncatcher"},"FusionCatcher"))),(0,i.kt)("h4",{id:"consequences"},"Consequences"),(0,i.kt)("p",null,"When a gene fusion is identified, we add the following Sequence Ontology consequence:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{3}","{3}":!0},' "consequence": [\n "transcript_variant",\n "unidirectional_gene_fusion"\n ],\n')),(0,i.kt)("h4",{id:"gene-fusions-section"},"Gene Fusions Section"),(0,i.kt)("p",null,"The ",(0,i.kt)("inlineCode",{parentName:"p"},"geneFusions")," section is contained within the object of the originating transcript. It will contain all the pairwise gene fusions that obey the criteria outline above. In the case of ",(0,i.kt)("inlineCode",{parentName:"p"},"ENST00000396373.4"),", there 7 other Ensembl transcripts that would produce a gene fusion. For ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001987.4"),", there was only one transcript (",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001754.4"),") that produce a gene fusion."),(0,i.kt)("p",null,"For each originating transcript, we report the following for each partner transcript:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"transcript ID"),(0,i.kt)("li",{parentName:"ul"},"gene ID"),(0,i.kt)("li",{parentName:"ul"},"HGNC gene symbol"),(0,i.kt)("li",{parentName:"ul"},"transcript bio type (e.g. protein_coding)"),(0,i.kt)("li",{parentName:"ul"},"intron or exon number containing the breakpoint"),(0,i.kt)("li",{parentName:"ul"},"HGVS RNA notation")),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Before Nirvana 3.15, we provided HGVS coding notation. However, HGVS r. notation is more appropriate for these types fusion splicing events (see ",(0,i.kt)("a",{parentName:"p",href:"https://varnomen.hgvs.org/bg-material/consultation/svd-wg007"},"HGVS SVD-WG007"),")."))),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{8}","{8}":!0},' "geneFusions": [\n {\n "transcript": "NM_001754.4",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "861",\n "hgnc": "RUNX1",\n "hgvsr": "NM_001754.4(RUNX1):r.?_58+274::NM_001987.4(ETV6):r.1009+3367_?"\n }\n ],\n')),(0,i.kt)("p",null,"The HGVS RNA notation above indicates that the gene fusion starts with ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001754.4")," (RUNX1) until CDS position 58 and continues with ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001987.4")," (ETV6). ",(0,i.kt)("inlineCode",{parentName:"p"},"1009+3367")," indicates that the fusion occurred 3367 bp within intron 2."))}d.isMDXComponent=!0},14240:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/TMEM258_FADS1_GeneFusions-e5e3758ea9d2c07d3591e3801b2bf7e3.svg"},76594:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/TMEM258_FADS1_Transcripts-fe1b9c6be1f7cbfefbce887f8cec5d58.svg"},12914:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/etv6-runx1-fusion-ec8f4312c9aca496bde0d6e2b1bbd50d.svg"},66347:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/gene-fusions-fig2-1cce8ac31b00465c8d36bdc47ec3309e.svg"}}]); \ No newline at end of file diff --git a/assets/js/e6117510.f962069f.js b/assets/js/e6117510.f962069f.js deleted file mode 100644 index 4904b713..00000000 --- a/assets/js/e6117510.f962069f.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6077],{3905:(t,e,n)=>{n.d(e,{Zo:()=>c,kt:()=>k});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function i(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function o(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var p=r.createContext({}),d=function(t){var e=r.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},c=function(t){var e=d(t.components);return r.createElement(p.Provider,{value:e},t.children)},m="mdxType",s={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},u=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,i=t.originalType,p=t.parentName,c=l(t,["components","mdxType","originalType","parentName"]),m=d(n),u=a,k=m["".concat(p,".").concat(u)]||m[u]||s[u]||i;return n?r.createElement(k,o(o({ref:e},c),{},{components:n})):r.createElement(k,o({ref:e},c))}));function k(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var p in e)hasOwnProperty.call(e,p)&&(l[p]=e[p]);l.originalType=t,l[m]="string"==typeof t?t:a,o[1]=l;for(var d=2;d{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>m,frontMatter:()=>i,metadata:()=>l,toc:()=>p});var r=n(87462),a=(n(67294),n(3905));const i={title:"Dependencies"},o=void 0,l={unversionedId:"introduction/dependencies",id:"version-3.21/introduction/dependencies",title:"Dependencies",description:"All of the following dependencies have been included in this repository.",source:"@site/versioned_docs/version-3.21/introduction/dependencies.md",sourceDirName:"introduction",slug:"/introduction/dependencies",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/introduction/dependencies",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/introduction/dependencies.md",tags:[],version:"3.21",frontMatter:{title:"Dependencies"},sidebar:"docs",previous:{title:"Introduction",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/"},next:{title:"Getting Started",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/introduction/getting-started"}},p=[],d={toc:p},c="wrapper";function m(t){let{components:e,...n}=t;return(0,a.kt)(c,(0,r.Z)({},d,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("p",null,"All of the following dependencies have been included in this repository."),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Name"),(0,a.kt)("th",{parentName:"tr",align:"center"},"License"),(0,a.kt)("th",{parentName:"tr",align:null},"Usage"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/aws/aws-extensions-for-dotnet-cli"},"Amazon.Lambda")),(0,a.kt)("td",{parentName:"tr",align:"center"},"Apache"),(0,a.kt)("td",{parentName:"tr",align:null},"AWS extensions for .NET CLI")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/aws/aws-sdk-net/"},"AWSSDK")),(0,a.kt)("td",{parentName:"tr",align:"center"},"Apache"),(0,a.kt)("td",{parentName:"tr",align:null},"AWS Lambda, S3, SNS support")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://www.newtonsoft.com/json"},"Json.NET")),(0,a.kt)("td",{parentName:"tr",align:"center"},"MIT"),(0,a.kt)("td",{parentName:"tr",align:null},"JASIX utility")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/ebiggers/libdeflate"},"libdeflate")),(0,a.kt)("td",{parentName:"tr",align:"center"},"MIT"),(0,a.kt)("td",{parentName:"tr",align:null},"BlockCompression library")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/moq/moq4"},"Moq")),(0,a.kt)("td",{parentName:"tr",align:"center"},"BSD"),(0,a.kt)("td",{parentName:"tr",align:null},"Mocking framework for unit tests")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"http://www.ndesk.org/Options"},"NDesk.Options")),(0,a.kt)("td",{parentName:"tr",align:"center"},"MIT/X11"),(0,a.kt)("td",{parentName:"tr",align:null},"CommandLine library")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/xunit/xunit"},"xUnit")),(0,a.kt)("td",{parentName:"tr",align:"center"},"Apache"),(0,a.kt)("td",{parentName:"tr",align:null},"Unit testing framework")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/Dead2/zlib-ng"},"zlib-ng")),(0,a.kt)("td",{parentName:"tr",align:"center"},"zlib"),(0,a.kt)("td",{parentName:"tr",align:null},"BlockCompression library")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/facebook/zstd"},"zstd")),(0,a.kt)("td",{parentName:"tr",align:"center"},"BSD"),(0,a.kt)("td",{parentName:"tr",align:null},"BlockCompression library")))))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/e7c452c5.2d736200.js b/assets/js/e7c452c5.2d736200.js deleted file mode 100644 index 9ff85db5..00000000 --- a/assets/js/e7c452c5.2d736200.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4005],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>m});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function l(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},p=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},d="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},g=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,p=o(e,["components","mdxType","originalType","parentName"]),d=c(n),g=i,m=d["".concat(s,".").concat(g)]||d[g]||u[g]||r;return n?a.createElement(m,l(l({ref:t},p),{},{components:n})):a.createElement(m,l({ref:t},p))}));function m(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,l=new Array(r);l[0]=g;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[d]="string"==typeof e?e:i,l[1]=o;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>d,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={},l=void 0,o={unversionedId:"data-sources/clingen-dosage-json",id:"version-3.16/data-sources/clingen-dosage-json",title:"clingen-dosage-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/clingen-dosage-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-dosage-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/clingen-dosage-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/clingen-dosage-json.md",tags:[],version:"3.16",frontMatter:{}},s=[],c={toc:s},p="wrapper";function d(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clingenDosageSensitivityMap": [{\n "chromosome": "15",\n "begin": 30900686,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 0.33994\n},\n{\n "chromosome": "15",\n "begin": 31727418,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "dosage sensitivity unlikely",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 1\n}]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Field"),(0,i.kt)("th",{parentName:"tr",align:null},"Type"),(0,i.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clingenDosageSensitivityMap"),(0,i.kt)("td",{parentName:"tr",align:null},"object array"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"begin"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"end"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"haploinsufficiency"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"triplosensitivity"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"(same as haploinsufficiency)\xa0")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"haploinsufficiency and triplosensitivity")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no evidence to suggest that dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"little evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"emerging evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"gene associated with autosomal recessive phenotype"),(0,i.kt)("li",{parentName:"ul"},"dosage sensitivity unlikely")))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/e7f9f89d.1590a0f2.js b/assets/js/e7f9f89d.1590a0f2.js deleted file mode 100644 index 6236c1a9..00000000 --- a/assets/js/e7f9f89d.1590a0f2.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7411],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>h});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},d=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},u="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,d=l(e,["components","mdxType","originalType","parentName"]),u=c(n),m=i,h=u["".concat(s,".").concat(m)]||u[m]||p[m]||r;return n?a.createElement(h,o(o({ref:t},d),{},{components:n})):a.createElement(h,o({ref:t},d))}));function h(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,o=new Array(r);o[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[u]="string"==typeof e?e:i,o[1]=l;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>u,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={title:"Getting Started"},o=void 0,l={unversionedId:"introduction/getting-started",id:"version-3.2.5/introduction/getting-started",title:"Getting Started",description:"Nirvana is written in C# using .NET Core (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files.",source:"@site/versioned_docs/version-3.2.5/introduction/getting-started.md",sourceDirName:"introduction",slug:"/introduction/getting-started",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/introduction/getting-started",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/introduction/getting-started.md",tags:[],version:"3.2.5",frontMatter:{title:"Getting Started"},sidebar:"version-3.2.5/docs",previous:{title:"Dependencies",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/introduction/dependencies"},next:{title:"1000 Genomes",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/1000Genomes"}},s=[{value:"Quick Start",id:"quick-start",children:[],level:2},{value:"Getting Nirvana",id:"getting-nirvana",children:[{value:"Compile from Source",id:"compile-from-source",children:[],level:3},{value:"GitHub Release Notes",id:"github-release-notes",children:[],level:3}],level:2},{value:"Downloading the data files",id:"downloading-the-data-files",children:[],level:2},{value:"Download a test VCF file",id:"download-a-test-vcf-file",children:[],level:2},{value:"Running Nirvana",id:"running-nirvana",children:[],level:2}],c={toc:s},d="wrapper";function u(e){let{components:t,...n}=e;return(0,i.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("p",null,"Nirvana is written in C# using ",(0,i.kt)("a",{parentName:"p",href:"https://www.microsoft.com/net/download/core"},".NET Core")," (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files."),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Nirvana currently uses .NET Core 2.1. Please make sure that you have the most current runtime from the ",(0,i.kt)("a",{parentName:"p",href:"https://www.microsoft.com/net/download/core"},".NET Core downloads")," page."))),(0,i.kt)("h2",{id:"quick-start"},"Quick Start"),(0,i.kt)("p",null,"If you want to get started right away, we've created ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestNirvana.sh"},"a script")," that downloads Nirvana, compiles it, and starts annotating a test file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"curl -O https://illumina.github.io/NirvanaDocumentation/files/TestNirvana.sh\nsh ./TestNirvana.sh\n")),(0,i.kt)("p",null,"We have verified that this script works on Windows (using Git Bash or WSL), Linux, and Mac OS X."),(0,i.kt)("h2",{id:"getting-nirvana"},"Getting Nirvana"),(0,i.kt)("h3",{id:"compile-from-source"},"Compile from Source"),(0,i.kt)("p",null,"The following will grab the latest version of Nirvana from GitHub and compile it using the .NET Core compiler:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"git clone https://github.com/Illumina/Nirvana.git\ncd Nirvana\ndotnet build -c Release\n")),(0,i.kt)("h3",{id:"github-release-notes"},"GitHub Release Notes"),(0,i.kt)("p",null,"Alternatively, you can grab the latest binaries from our ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/Illumina/Nirvana/releases/tag/v3.2.5"},"GitHub Releases")," page:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"mkdir -p Nirvana/Data\ncd Nirvana\nunzip Nirvana-3.2.5-dotnet-2.1.0.zip\n")),(0,i.kt)("h2",{id:"downloading-the-data-files"},"Downloading the data files"),(0,i.kt)("div",{className:"admonition admonition-warning alert alert--danger"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M5.05.31c.81 2.17.41 3.38-.52 4.31C3.55 5.67 1.98 6.45.9 7.98c-1.45 2.05-1.7 6.53 3.53 7.7-2.2-1.16-2.67-4.52-.3-6.61-.61 2.03.53 3.33 1.94 2.86 1.39-.47 2.3.53 2.27 1.67-.02.78-.31 1.44-1.13 1.81 3.42-.59 4.78-3.42 4.78-5.56 0-2.84-2.53-3.22-1.25-5.61-1.52.13-2.03 1.13-1.89 2.75.09 1.08-1.02 1.8-1.86 1.33-.67-.41-.66-1.19-.06-1.78C8.18 5.31 8.68 2.45 5.05.32L5.03.3l.02.01z"}))),"Downloader not available")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Nirvana 3.2.5 does not include a downloader tool, but these files can be copied over from the TSO 500 or TSO Comprehensive data directory if you have those. Otherwise, an unsupported route is to use the downloader from ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/Illumina/Nirvana/releases/tag/v3.13.0"},"Nirvana 3.13")," to get the reference, cache, and supplementary annotation files. "))),(0,i.kt)("h2",{id:"download-a-test-vcf-file"},"Download a test VCF file"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.vcf.gz"},"a toy VCF file")," you can play around with:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"curl -O https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.vcf.gz\n")),(0,i.kt)("h2",{id:"running-nirvana"},"Running Nirvana"),(0,i.kt)("p",null,"Once you have downloaded the data sets, use the following command to annotate your VCF:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/Nirvana.dll \\\n -c Data/Cache/GRCh37/Both \\\n --sd Data/SupplementaryAnnotation/GRCh37 \\\n -r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n -i HiSeq.10000.vcf.gz \\\n -o HiSeq.10000\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-c")," argument specifies the cache prefix"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"--sd")," argument specifies the supplementary annotation directory"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-r")," argument specifies the compressed reference path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input VCF path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output filename prefix")),(0,i.kt)("p",null,"When running Nirvana, performance metrics are shown as it evaluates each chromosome in the input VCF file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"---------------------------------------------------------------------------\nNirvana (c) 2020 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.12.0\n---------------------------------------------------------------------------\n\nInitialization Time Positions/s\n---------------------------------------------------------------------------\nCache 00:00:01.8\nSA Position Scan 00:00:00.7 12902\n\nReference Preload Annotation Variants/s\n---------------------------------------------------------------------------\nchr1 00:00:02.3 00:00:04.5 2176\n\nSummary Time Percent\n---------------------------------------------------------------------------\nInitialization 00:00:02.6 16.5 %\nPreload 00:00:02.3 15.2 %\nAnnotation 00:00:04.5 29.0 %\n\nTime: 00:00:14.7\n")),(0,i.kt)("p",null,"The output will be a JSON file called ",(0,i.kt)("inlineCode",{parentName:"p"},"HiSeq.10000.json.gz"),". Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.json.gz"},"the full JSON file"),"."))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/e8a99743.64710208.js b/assets/js/e8a99743.64710208.js deleted file mode 100644 index 1e701bfa..00000000 --- a/assets/js/e8a99743.64710208.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2620,6766],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>v});var a=n(67294);function l(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(l[n]=e[n]);return l}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(l[n]=e[n])}return l}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},m="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,l=e.mdxType,r=e.originalType,s=e.parentName,c=o(e,["components","mdxType","originalType","parentName"]),m=d(n),u=l,v=m["".concat(s,".").concat(u)]||m[u]||p[u]||r;return n?a.createElement(v,i(i({ref:t},c),{},{components:n})):a.createElement(v,i({ref:t},c))}));function v(e,t){var n=arguments,l=t&&t.mdxType;if("string"==typeof e||l){var r=n.length,i=new Array(r);i[0]=u;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[m]="string"==typeof e?e:l,i[1]=o;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>m,frontMatter:()=>r,metadata:()=>o,toc:()=>s});var a=n(87462),l=(n(67294),n(3905));const r={},i=void 0,o={unversionedId:"data-sources/dbsnp-json",id:"version-3.17/data-sources/dbsnp-json",title:"dbsnp-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/dbsnp-json.md",sourceDirName:"data-sources",slug:"/data-sources/dbsnp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/dbsnp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/dbsnp-json.md",tags:[],version:"3.17",frontMatter:{}},s=[],d={toc:s},c="wrapper";function m(e){let{components:t,...n}=e;return(0,l.kt)(c,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-json"},'"dbsnp":[\n "rs1042821"\n]\n')),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,l.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,l.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:"left"},"dbsnp"),(0,l.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,l.kt)("td",{parentName:"tr",align:"left"},"dbSNP rsIDs")))))}m.isMDXComponent=!0},73169:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>i,metadata:()=>s,toc:()=>d});var a=n(87462),l=(n(67294),n(3905)),r=n(22384);const i={title:"dbSNP"},o=void 0,s={unversionedId:"data-sources/dbsnp",id:"version-3.17/data-sources/dbsnp",title:"dbSNP",description:"Overview",source:"@site/versioned_docs/version-3.17/data-sources/dbsnp.mdx",sourceDirName:"data-sources",slug:"/data-sources/dbsnp",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/dbsnp",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/dbsnp.mdx",tags:[],version:"3.17",frontMatter:{title:"dbSNP"},sidebar:"version-3.17/docs",previous:{title:"COSMIC",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/cosmic"},next:{title:"FusionCatcher",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/fusioncatcher"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"VCF File",id:"vcf-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Global allele extraction",id:"global-allele-extraction",children:[],level:4},{value:"Equal Allele Frequency Example (2 alleles)",id:"equal-allele-frequency-example-2-alleles",children:[],level:4},{value:"Equal Allele Frequency Example (3 alleles)",id:"equal-allele-frequency-example-3-alleles",children:[],level:4},{value:"Equal Allele Frequency in Alternate Alleles",id:"equal-allele-frequency-in-alternate-alleles",children:[],level:4},{value:"Equal Allele Frequency Between Reference & Alternate Allele",id:"equal-allele-frequency-between-reference--alternate-allele",children:[],level:4}],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:d},m="wrapper";function p(e){let{components:t,...n}=e;return(0,l.kt)(m,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,l.kt)("h2",{id:"overview"},"Overview"),(0,l.kt)("p",null,"dbSNP contains human single nucleotide variations, microsatellites, and small-scale insertions and deletions along with publication, population frequency, molecular consequence, and genomic and RefSeq mapping information for both common variations and clinical mutations."),(0,l.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"Sherry, S.T., Ward, M. and Sirotkin, K. (1999) dbSNP\u2014Database for Single Nucleotide Polymorphisms and Other Classes of Minor Genetic Variation. ",(0,l.kt)("em",{parentName:"p"},"Genome Res."),", ",(0,l.kt)("strong",{parentName:"p"},"9"),", 677\u2013679."))),(0,l.kt)("h2",{id:"vcf-file"},"VCF File"),(0,l.kt)("h3",{id:"example"},"Example"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"#CHROM POS ID REF ALT QUAL FILTER INFO\n1 10177 rs367896724 A AC . . RS=367896724;RSPOS=10177;dbSNPBuildID=138; \\ \n SSR=0;SAO=0;VP=0x050000020005130026000200;GENEINFO=DDX11L1:100287102;WGT=1; \\\n VC=DIV;R5;ASP;G5A;G5;KGPhase3;CAF=0.5747,0.4253;COMMON=1; \\\n TOPMED=0.76728147298674821,0.23271852701325178\n")),(0,l.kt)("h3",{id:"parsing"},"Parsing"),(0,l.kt)("p",null,"From the VCF file, we're mainly interested in the following:"),(0,l.kt)("ul",null,(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"rsID")," from the ",(0,l.kt)("inlineCode",{parentName:"li"},"ID")," field"),(0,l.kt)("li",{parentName:"ul"},(0,l.kt)("inlineCode",{parentName:"li"},"CAF")," from the ",(0,l.kt)("inlineCode",{parentName:"li"},"INFO")," field")),(0,l.kt)("h4",{id:"global-allele-extraction"},"Global allele extraction"),(0,l.kt)("p",null,"The global major and minor alleles are extracted based on the frequency of the alleles provided in the CAF field. The global minor allele frequency is the second highest value of the CAF comma delimited field (ignoring '.' values). "),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Tie Breaking: Global Major Allele")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"If there are two candidates for global major and the reference allele is one of them, we prefer the reference allele."))),(0,l.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"Tie Breaking: Global Minor Allele")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"If there are two candidates for global minor and the reference allele is one of them, we prefer the other allele. If the reference allele is not involved, they are chosen arbitrarily."))),(0,l.kt)("h4",{id:"equal-allele-frequency-example-2-alleles"},"Equal Allele Frequency Example (2 alleles)"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C CAF=0.5,0.5\n")),(0,l.kt)("p",null,"We will select A to be the global major allele and C to be the global minor allele."),(0,l.kt)("h4",{id:"equal-allele-frequency-example-3-alleles"},"Equal Allele Frequency Example (3 alleles)"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C,T CAF=0.33,0.33,0.33\n")),(0,l.kt)("p",null,"We will select A to be the global major allele and either C or T is chosen (arbitrarily) to be the global minor allele."),(0,l.kt)("h4",{id:"equal-allele-frequency-in-alternate-alleles"},"Equal Allele Frequency in Alternate Alleles"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C,T CAF=0.2,0.4,0.4\n")),(0,l.kt)("p",null,"We will select C or T to be arbitrarily assigned to be the global major or global minor allele."),(0,l.kt)("h4",{id:"equal-allele-frequency-between-reference--alternate-allele"},"Equal Allele Frequency Between Reference & Alternate Allele"),(0,l.kt)("pre",null,(0,l.kt)("code",{parentName:"pre",className:"language-scss"},"chr1 100 A C,T CAF=0.2,0.2,0.6\n")),(0,l.kt)("p",null,"We will select T to be the global major allele and C to be the global minor allele."),(0,l.kt)("h2",{id:"known-issues"},"Known Issues"),(0,l.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,l.kt)("div",{parentName:"div",className:"admonition-heading"},(0,l.kt)("h5",{parentName:"div"},(0,l.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,l.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,l.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,l.kt)("div",{parentName:"div",className:"admonition-content"},(0,l.kt)("p",{parentName:"div"},"If there are multiple entries with different CAF values for the same allele, we use the first CAF value."))),(0,l.kt)("h2",{id:"download-url"},"Download URL"),(0,l.kt)("p",null,(0,l.kt)("a",{parentName:"p",href:"https://ftp.ncbi.nih.gov/snp/organisms/"},"https://ftp.ncbi.nih.gov/snp/organisms/")),(0,l.kt)("h2",{id:"json-output"},"JSON Output"),(0,l.kt)(r.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/e95cadfe.39c91cf0.js b/assets/js/e95cadfe.39c91cf0.js new file mode 100644 index 00000000..caae7ea9 --- /dev/null +++ b/assets/js/e95cadfe.39c91cf0.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5277],{3905:(e,n,t)=>{t.d(n,{Zo:()=>p,kt:()=>h});var a=t(7294);function i(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function r(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function o(e){for(var n=1;n=0||(i[t]=e[t]);return i}(e,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(i[t]=e[t])}return i}var l=a.createContext({}),c=function(e){var n=a.useContext(l),t=n;return e&&(t="function"==typeof e?e(n):o(o({},n),e)),t},p=function(e){var n=c(e.components);return a.createElement(l.Provider,{value:n},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},u=a.forwardRef((function(e,n){var t=e.components,i=e.mdxType,r=e.originalType,l=e.parentName,p=s(e,["components","mdxType","originalType","parentName"]),d=c(t),u=i,h=d["".concat(l,".").concat(u)]||d[u]||m[u]||r;return t?a.createElement(h,o(o({ref:n},p),{},{components:t})):a.createElement(h,o({ref:n},p))}));function h(e,n){var t=arguments,i=n&&n.mdxType;if("string"==typeof e||i){var r=t.length,o=new Array(r);o[0]=u;var s={};for(var l in n)hasOwnProperty.call(n,l)&&(s[l]=n[l]);s.originalType=e,s[d]="string"==typeof e?e:i,o[1]=s;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>o,default:()=>d,frontMatter:()=>r,metadata:()=>s,toc:()=>l});var a=t(7462),i=(t(7294),t(3905));const r={title:"Gene Fusion Detection"},o=void 0,s={unversionedId:"core-functionality/gene-fusions",id:"core-functionality/gene-fusions",title:"Gene Fusion Detection",description:"Overview",source:"@site/docs/core-functionality/gene-fusions.md",sourceDirName:"core-functionality",slug:"/core-functionality/gene-fusions",permalink:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/gene-fusions",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/core-functionality/gene-fusions.md",tags:[],version:"current",frontMatter:{title:"Gene Fusion Detection"},sidebar:"docs",previous:{title:"Transcript Consequence Impact",permalink:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/transcript-consequence-impacts"},next:{title:"Variant IDs",permalink:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/variant-ids"}},l=[{value:"Overview",id:"overview",children:[],level:2},{value:"Approach",id:"approach",children:[{value:"Variant Types",id:"variant-types",children:[],level:3},{value:"Criteria",id:"criteria",children:[],level:3}],level:2},{value:"ETV6/RUNX1 Example",id:"etv6runx1-example",children:[{value:"VCF",id:"vcf",children:[],level:3},{value:"JSON Output",id:"json-output",children:[{value:"Gene Fusion Data Sources",id:"gene-fusion-data-sources",children:[],level:4},{value:"Consequences",id:"consequences",children:[],level:4},{value:"Gene Fusions Section",id:"gene-fusions-section",children:[],level:4}],level:3}],level:2}],c={toc:l},p="wrapper";function d(e){let{components:n,...r}=e;return(0,i.kt)(p,(0,a.Z)({},c,r,{components:n,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"Gene fusions often result from large genomic rearrangements such as structural variants. While WGS secondary analysis pipelines typically contain alignment and variant calling stages, very few of them contain dedicated gene fusion callers. When they are included, they are usually associated with RNA-Seq pipelines where gene fusions can be readily observed."),(0,i.kt)("p",null,"Since gene fusions are frequently observed in cancer and since many sequencing experiments do not include paired RNA-Seq data, we have added gene fusion detection and annotation to Illumina Connected Annotations."),(0,i.kt)("p",null,"The rich diversity in gene fusion architectures and their likely mechanisms can be seen below:"),(0,i.kt)("p",null,(0,i.kt)("img",{src:t(5886).Z})),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Kumar-Sinha, C., Kalyana-Sundaram, S. & Chinnaiyan, A.M. ",(0,i.kt)("a",{parentName:"p",href:"https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-015-0252-1"},"Landscape of gene fusions in epithelial cancers: seq and ye shall find"),". Genome Med 7, 129 (2015)"))),(0,i.kt)("h2",{id:"approach"},"Approach"),(0,i.kt)("p",null,"Illumina Connected Annotations uses structural variant calls to evaluate if they form either putative intra-chromosomal or inter-chromosomal gene fusions. Let's consider two transcripts, ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_014206.3")," (",(0,i.kt)("strong",{parentName:"p"},"TMEM258"),") and ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_013402.4")," (",(0,i.kt)("strong",{parentName:"p"},"FADS1"),"). Both of these genes are on the reverse strand in the genome. The vertical bar indicates the breakpoint where these transcripts are fused:"),(0,i.kt)("p",null,(0,i.kt)("img",{alt:"TMEM258 & FADS1 transcripts",src:t(8532).Z})),(0,i.kt)("p",null,"The above explains where the transcripts are fused together, but it doesn't explain in which orientation. By using the directionality encoded in the translocation breakend, we can rearrange these two transcripts in four ways:"),(0,i.kt)("p",null,(0,i.kt)("img",{alt:"TMEM258 & FADS1 gene fusions",src:t(1033).Z})),(0,i.kt)("p",null,"Only two of the combinations yields a fusion containing both the transcription start site (TSS) and the stop codon. In one case, we can even detect an in-frame gene fusion.\nIf only unidirectional gene fusions are desired, only these two fusions can be detected. If ",(0,i.kt)("inlineCode",{parentName:"p"},"enable-bidirectional-fusions")," is enabled, all four cases can be identified."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Interpreting translocation breakends")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"At first glance, translocation breakends are a bit daunting. However, once you understand how they work, they're actually quite simple. For more information, we recommend reading section 5.4 in the ",(0,i.kt)("a",{parentName:"p",href:"https://samtools.github.io/hts-specs/VCFv4.2.pdf"},"VCF 4.2 specification"),"."),(0,i.kt)("table",{parentName:"div"},(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"REF"),(0,i.kt)("th",{parentName:"tr",align:"left"},"ALT"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Meaning"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"t[p["),(0,i.kt)("td",{parentName:"tr",align:"left"},"piece extending to the right of p is joined after t")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"t]p]"),(0,i.kt)("td",{parentName:"tr",align:"left"},"reverse comp piece extending left of p is joined after t")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"]p]t"),(0,i.kt)("td",{parentName:"tr",align:"left"},"piece extending to the left of p is joined before t")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"[p[t"),(0,i.kt)("td",{parentName:"tr",align:"left"},"reverse comp piece extending right of p is joined before t")))))),(0,i.kt)("h3",{id:"variant-types"},"Variant Types"),(0,i.kt)("p",null,"Specifically we can identify gene fusions from the following structural variant types:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"deletions (",(0,i.kt)("inlineCode",{parentName:"li"},""),")"),(0,i.kt)("li",{parentName:"ul"},"tandem_duplications (",(0,i.kt)("inlineCode",{parentName:"li"},""),")"),(0,i.kt)("li",{parentName:"ul"},"inversions (",(0,i.kt)("inlineCode",{parentName:"li"},""),")"),(0,i.kt)("li",{parentName:"ul"},"translocation breakpoints (",(0,i.kt)("inlineCode",{parentName:"li"},"AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911["),") ")),(0,i.kt)("h3",{id:"criteria"},"Criteria"),(0,i.kt)("p",null,"The following criteria must be met for Illumina Connected Annotations to identify a gene fusion:"),(0,i.kt)("ol",null,(0,i.kt)("li",{parentName:"ol"},"After accounting for gene orientation and genomic rearrangements, both transcripts must have the same orientation if ",(0,i.kt)("inlineCode",{parentName:"li"},"enable-bidirectional-fusions")," is not enabled. They can have the same or different orientations if ",(0,i.kt)("inlineCode",{parentName:"li"},"enable-bidirectional-fusions")," is set."),(0,i.kt)("li",{parentName:"ol"},"Both transcripts must be from the same transcript source (i.e. we won't mix and match between RefSeq and Ensembl transcripts)"),(0,i.kt)("li",{parentName:"ol"},"Both transcripts must belong to different genes"),(0,i.kt)("li",{parentName:"ol"},"Both transcripts cannot have a coding region that already overlaps without the variant (i.e. in cases where two genes naturally overlap, we don't want to call a gene fusion)")),(0,i.kt)("h2",{id:"etv6runx1-example"},"ETV6/RUNX1 Example"),(0,i.kt)("p",null,"ETV6/RUNX1 is the most common gene fusion in childhood B-cell precursor acute lymphoblastic leukemia (ALL). Samples with this translocation are associated with a good prognosis and excellent response to treatment."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Sun C., Chang L., Zhu X. ",(0,i.kt)("a",{parentName:"p",href:"https://www.oncotarget.com/article/16367/text/"},"Pathogenesis of ETV6/RUNX1-positive childhood acute lymphoblastic leukemia and mechanisms underlying its relapse"),". Oncotarget. 2017; 8: 35445-35459"))),(0,i.kt)("h3",{id:"vcf"},"VCF"),(0,i.kt)("p",null,"Here's a simplified representation of the translocation breakends called by the Manta structural variant caller:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\nchr12 12026270 . C [chr21:36420865[C . PASS SVTYPE=BND\nchr12 12026305 . A A]chr21:36420571] . PASS SVTYPE=BND\nchr21 36420571 . C C]chr12:12026305] . PASS SVTYPE=BND\nchr21 36420865 . C [chr12:12026270[C . PASS SVTYPE=BND\n")),(0,i.kt)("p",null,"When you put these calls together, the resulting genomic rearrangement looks something like this:"),(0,i.kt)("p",null,(0,i.kt)("img",{src:t(7786).Z})),(0,i.kt)("h3",{id:"json-output"},"JSON Output"),(0,i.kt)("p",null,"The annotation for the first variant in the VCF looks like this:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{139,141-205,218,220-230}","{139,141-205,218,220-230}":!0},'{\n "chromosome": "chr12",\n "position": 12026270,\n "refAllele": "C",\n "altAlleles": [\n "[chr21:36420865[C"\n ],\n "filters": [\n "PASS"\n ],\n "cytogeneticBand": "12p13.2",\n "clingen": [\n {\n "chromosome": "12",\n "begin": 173786,\n "end": 34835837,\n "variantType": "copy_number_gain",\n "id": "nsv995956",\n "clinicalInterpretation": "pathogenic",\n "phenotypes": [\n "Decreased calvarial ossification",\n "Delayed gross motor development",\n "Feeding difficulties",\n "Frontal bossing",\n "Morphological abnormality of the central nervous system",\n "Patchy alopecia"\n ],\n "phenotypeIds": [\n "HP:0002007",\n "HP:0002011",\n "HP:0002194",\n "HP:0002232",\n "HP:0005474",\n "HP:0011968",\n "MedGen:C0232466",\n "MedGen:C1862862",\n "MedGen:CN001816",\n "MedGen:CN001820",\n "MedGen:CN001989",\n "MedGen:CN004852"\n ],\n "observedGains": 1,\n "validated": true\n }\n ],\n "variants": [\n {\n "vid": "12-12026270-C-[chr21:36420865[C",\n "chromosome": "chr12",\n "begin": 12026270,\n "end": 12026270,\n "isStructuralVariant": true,\n "refAllele": "C",\n "altAllele": "[chr21:36420865[C",\n "variantType": "translocation_breakend",\n "cosmicGeneFusions": [\n {\n "id": "COSF2245",\n "numSamples": 249,\n "geneSymbols": [\n "ETV6",\n "RUNX1"\n ],\n "hgvsr": "ENST00000396373.4(ETV6):r.1_1283::ENST00000300305.3(RUNX1):r.504_6222",\n "histologies": [\n {\n "name": "acute lymphoblastic B cell leukaemia",\n "numSamples": 169\n },\n {\n "name": "acute lymphoblastic leukaemia",\n "numSamples": 80\n }\n ],\n "sites": [\n {\n "name": "haematopoietic and lymphoid tissue",\n "numSamples": 249\n }\n ],\n "pubMedIds": [\n 7761424,\n 7780150,\n 8609706,\n 8751464,\n 8982044,\n 9067587,\n 9207408,\n 9226156,\n 9628428,\n 10463610,\n 10774753,\n 11091202,\n 12621238,\n 12661004,\n 12750722,\n 15104290,\n 15642392,\n 24557455,\n 26925663\n ]\n }\n ],\n "fusionCatcher": [\n {\n "genes": {\n "first": {\n "hgnc": "ETV6",\n "isOncogene": true\n },\n "second": {\n "hgnc": "RUNX1",\n "isOncogene": true\n }\n },\n "somaticSources": [\n "DepMap CCLE",\n "Cancer Genome Project",\n "ChimerKB 4.0",\n "ChimerPub 4.0",\n "ChimerSeq 4.0",\n "Known",\n "Mitelman DB",\n "OncoKB",\n "TICdb"\n ]\n }\n ],\n "transcripts": [\n {\n "transcript": "ENST00000396373.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "introns": "5/7",\n "geneId": "ENSG00000139083",\n "hgnc": "ETV6",\n "consequence": [\n "transcript_variant",\n "unidirectional_gene_fusion"\n ],\n "geneFusions": [\n {\n "transcript": "ENST00000437180.1",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000437180.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n },\n {\n "transcript": "ENST00000300305.3",\n "bioType": "protein_coding",\n "intron": 1,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000300305.3(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n },\n {\n "transcript": "ENST00000482318.1",\n "bioType": "nonsense_mediated_decay",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000482318.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n },\n {\n "transcript": "ENST00000486278.2",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000486278.2(RUNX1):r.?_-15+274::ENST00000396373.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n },\n {\n "transcript": "ENST00000455571.1",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000455571.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n },\n {\n "transcript": "ENST00000475045.2",\n "bioType": "protein_coding",\n "intron": 11,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000475045.2(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n },\n {\n "transcript": "ENST00000416754.1",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000416754.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n }\n ],\n "isCanonical": true,\n "proteinId": "ENSP00000379658.3"\n },\n {\n "transcript": "NM_001987.4",\n "source": "RefSeq",\n "bioType": "protein_coding",\n "introns": "5/7",\n "geneId": "2120",\n "hgnc": "ETV6",\n "consequence": [\n "transcript_variant",\n "unidirectional_gene_fusion"\n ],\n "geneFusions": [\n {\n "transcript": "NM_001754.4",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "861",\n "hgnc": "RUNX1",\n "hgvsr": "NM_001754.4(RUNX1):r.?_58+274::NM_001987.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n }\n ],\n "isCanonical": true,\n "proteinId": "NP_001978.1"\n }\n ]\n }\n ]\n}\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"transcript"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"transcript ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"bioType"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"descriptions of the ",(0,i.kt)("a",{parentName:"td",href:"https://uswest.ensembl.org/info/genome/genebuild/biotypes.html"},"biotypes from Ensembl"))),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"exon"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"},"exon that contained fusion breakpoint")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"intron"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"},"intron that contained fusion breakpoint")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"geneId"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"gene ID. e.g. ENSG00000116062")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hgvsr"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"HGVS RNA nomenclature")))),(0,i.kt)("h4",{id:"gene-fusion-data-sources"},"Gene Fusion Data Sources"),(0,i.kt)("p",null,"To provide more context to our gene fusions, we provide the following gene fusion data sources:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"../data-sources/cosmic"},"COSMIC")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"../data-sources/fusioncatcher"},"FusionCatcher"))),(0,i.kt)("h4",{id:"consequences"},"Consequences"),(0,i.kt)("p",null,"When a gene fusion is identified, we add the following Sequence Ontology consequence:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{3}","{3}":!0},' "consequence": [\n "transcript_variant",\n "gene_fusion"\n ],\n')),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"If both transcripts have the same orientation, we label it as ",(0,i.kt)("inlineCode",{parentName:"li"},"unidirectional_gene_fusion"),", if they have different orientations, we label it as ",(0,i.kt)("inlineCode",{parentName:"li"},"bidirectional_gene_fusion")),(0,i.kt)("li",{parentName:"ul"},"If both unidirectional and bidirectional ones are detected, we label it as ",(0,i.kt)("inlineCode",{parentName:"li"},"gene_fusion"),".")),(0,i.kt)("h4",{id:"gene-fusions-section"},"Gene Fusions Section"),(0,i.kt)("p",null,"The ",(0,i.kt)("inlineCode",{parentName:"p"},"geneFusions")," section is contained within the object of the originating transcript. It will contain all the pairwise gene fusions that obey the criteria outline above. In the case of ",(0,i.kt)("inlineCode",{parentName:"p"},"ENST00000396373.4"),", there 7 other Ensembl transcripts that would produce a gene fusion. For ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001987.4"),", there was only one transcript (",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001754.4"),") that produce a gene fusion."),(0,i.kt)("p",null,"For each originating transcript, we report the following for each partner transcript:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"transcript ID"),(0,i.kt)("li",{parentName:"ul"},"gene ID"),(0,i.kt)("li",{parentName:"ul"},"HGNC gene symbol"),(0,i.kt)("li",{parentName:"ul"},"transcript bio type (e.g. protein_coding)"),(0,i.kt)("li",{parentName:"ul"},"intron or exon number containing the breakpoint"),(0,i.kt)("li",{parentName:"ul"},"HGVS RNA notation"),(0,i.kt)("li",{parentName:"ul"},"gene fusion directionality")),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Before Illumina Connected Annotations 3.15, we provided HGVS coding notation. However, HGVS r. notation is more appropriate for these types fusion splicing events (see ",(0,i.kt)("a",{parentName:"p",href:"https://varnomen.hgvs.org/bg-material/consultation/svd-wg007"},"HGVS SVD-WG007"),")."))),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{8}","{8}":!0},' "geneFusions": [\n {\n "transcript": "NM_001754.4",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "861",\n "hgnc": "RUNX1",\n "hgvsr": "NM_001754.4(RUNX1):r.?_58+274::NM_001987.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n }\n ],\n')),(0,i.kt)("p",null,"The HGVS RNA notation above indicates that the gene fusion starts with ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001754.4")," (RUNX1) until CDS position 58 and continues with ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001987.4")," (ETV6). ",(0,i.kt)("inlineCode",{parentName:"p"},"1009+3367")," indicates that the fusion occurred 3367 bp within intron 2."))}d.isMDXComponent=!0},1033:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/TMEM258_FADS1_GeneFusions-e5e3758ea9d2c07d3591e3801b2bf7e3.svg"},8532:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/TMEM258_FADS1_Transcripts-fe1b9c6be1f7cbfefbce887f8cec5d58.svg"},7786:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/etv6-runx1-fusion-ec8f4312c9aca496bde0d6e2b1bbd50d.svg"},5886:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/gene-fusions-fig2-1cce8ac31b00465c8d36bdc47ec3309e.svg"}}]); \ No newline at end of file diff --git a/assets/js/e95cadfe.a0a8d70b.js b/assets/js/e95cadfe.a0a8d70b.js deleted file mode 100644 index 09c863db..00000000 --- a/assets/js/e95cadfe.a0a8d70b.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5277],{3905:(e,n,t)=>{t.d(n,{Zo:()=>p,kt:()=>h});var a=t(67294);function i(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function r(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function o(e){for(var n=1;n=0||(i[t]=e[t]);return i}(e,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(i[t]=e[t])}return i}var l=a.createContext({}),c=function(e){var n=a.useContext(l),t=n;return e&&(t="function"==typeof e?e(n):o(o({},n),e)),t},p=function(e){var n=c(e.components);return a.createElement(l.Provider,{value:n},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},u=a.forwardRef((function(e,n){var t=e.components,i=e.mdxType,r=e.originalType,l=e.parentName,p=s(e,["components","mdxType","originalType","parentName"]),d=c(t),u=i,h=d["".concat(l,".").concat(u)]||d[u]||m[u]||r;return t?a.createElement(h,o(o({ref:n},p),{},{components:t})):a.createElement(h,o({ref:n},p))}));function h(e,n){var t=arguments,i=n&&n.mdxType;if("string"==typeof e||i){var r=t.length,o=new Array(r);o[0]=u;var s={};for(var l in n)hasOwnProperty.call(n,l)&&(s[l]=n[l]);s.originalType=e,s[d]="string"==typeof e?e:i,o[1]=s;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>o,default:()=>d,frontMatter:()=>r,metadata:()=>s,toc:()=>l});var a=t(87462),i=(t(67294),t(3905));const r={title:"Gene Fusion Detection"},o=void 0,s={unversionedId:"core-functionality/gene-fusions",id:"core-functionality/gene-fusions",title:"Gene Fusion Detection",description:"Overview",source:"@site/docs/core-functionality/gene-fusions.md",sourceDirName:"core-functionality",slug:"/core-functionality/gene-fusions",permalink:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/gene-fusions",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/core-functionality/gene-fusions.md",tags:[],version:"current",frontMatter:{title:"Gene Fusion Detection"},sidebar:"docs",previous:{title:"Transcript Consequence Impact",permalink:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/transcript-consequence-impacts"},next:{title:"Variant IDs",permalink:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/variant-ids"}},l=[{value:"Overview",id:"overview",children:[],level:2},{value:"Approach",id:"approach",children:[{value:"Variant Types",id:"variant-types",children:[],level:3},{value:"Criteria",id:"criteria",children:[],level:3}],level:2},{value:"ETV6/RUNX1 Example",id:"etv6runx1-example",children:[{value:"VCF",id:"vcf",children:[],level:3},{value:"JSON Output",id:"json-output",children:[{value:"Gene Fusion Data Sources",id:"gene-fusion-data-sources",children:[],level:4},{value:"Consequences",id:"consequences",children:[],level:4},{value:"Gene Fusions Section",id:"gene-fusions-section",children:[],level:4}],level:3}],level:2}],c={toc:l},p="wrapper";function d(e){let{components:n,...r}=e;return(0,i.kt)(p,(0,a.Z)({},c,r,{components:n,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"Gene fusions often result from large genomic rearrangements such as structural variants. While WGS secondary analysis pipelines typically contain alignment and variant calling stages, very few of them contain dedicated gene fusion callers. When they are included, they are usually associated with RNA-Seq pipelines where gene fusions can be readily observed."),(0,i.kt)("p",null,"Since gene fusions are frequently observed in cancer and since many sequencing experiments do not include paired RNA-Seq data, we have added gene fusion detection and annotation to Illumina Connected Annotations."),(0,i.kt)("p",null,"The rich diversity in gene fusion architectures and their likely mechanisms can be seen below:"),(0,i.kt)("p",null,(0,i.kt)("img",{src:t(76851).Z})),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Kumar-Sinha, C., Kalyana-Sundaram, S. & Chinnaiyan, A.M. ",(0,i.kt)("a",{parentName:"p",href:"https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-015-0252-1"},"Landscape of gene fusions in epithelial cancers: seq and ye shall find"),". Genome Med 7, 129 (2015)"))),(0,i.kt)("h2",{id:"approach"},"Approach"),(0,i.kt)("p",null,"Illumina Connected Annotations uses structural variant calls to evaluate if they form either putative intra-chromosomal or inter-chromosomal gene fusions. Let's consider two transcripts, ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_014206.3")," (",(0,i.kt)("strong",{parentName:"p"},"TMEM258"),") and ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_013402.4")," (",(0,i.kt)("strong",{parentName:"p"},"FADS1"),"). Both of these genes are on the reverse strand in the genome. The vertical bar indicates the breakpoint where these transcripts are fused:"),(0,i.kt)("p",null,(0,i.kt)("img",{alt:"TMEM258 & FADS1 transcripts",src:t(97309).Z})),(0,i.kt)("p",null,"The above explains where the transcripts are fused together, but it doesn't explain in which orientation. By using the directionality encoded in the translocation breakend, we can rearrange these two transcripts in four ways:"),(0,i.kt)("p",null,(0,i.kt)("img",{alt:"TMEM258 & FADS1 gene fusions",src:t(62434).Z})),(0,i.kt)("p",null,"Only two of the combinations yields a fusion containing both the transcription start site (TSS) and the stop codon. In one case, we can even detect an in-frame gene fusion.\nIf only unidirectional gene fusions are desired, only these two fusions can be detected. If ",(0,i.kt)("inlineCode",{parentName:"p"},"enable-bidirectional-fusions")," is enabled, all four cases can be identified."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Interpreting translocation breakends")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"At first glance, translocation breakends are a bit daunting. However, once you understand how they work, they're actually quite simple. For more information, we recommend reading section 5.4 in the ",(0,i.kt)("a",{parentName:"p",href:"https://samtools.github.io/hts-specs/VCFv4.2.pdf"},"VCF 4.2 specification"),"."),(0,i.kt)("table",{parentName:"div"},(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"REF"),(0,i.kt)("th",{parentName:"tr",align:"left"},"ALT"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Meaning"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"t[p["),(0,i.kt)("td",{parentName:"tr",align:"left"},"piece extending to the right of p is joined after t")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"t]p]"),(0,i.kt)("td",{parentName:"tr",align:"left"},"reverse comp piece extending left of p is joined after t")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"]p]t"),(0,i.kt)("td",{parentName:"tr",align:"left"},"piece extending to the left of p is joined before t")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"s"),(0,i.kt)("td",{parentName:"tr",align:"left"},"[p[t"),(0,i.kt)("td",{parentName:"tr",align:"left"},"reverse comp piece extending right of p is joined before t")))))),(0,i.kt)("h3",{id:"variant-types"},"Variant Types"),(0,i.kt)("p",null,"Specifically we can identify gene fusions from the following structural variant types:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"deletions (",(0,i.kt)("inlineCode",{parentName:"li"},""),")"),(0,i.kt)("li",{parentName:"ul"},"tandem_duplications (",(0,i.kt)("inlineCode",{parentName:"li"},""),")"),(0,i.kt)("li",{parentName:"ul"},"inversions (",(0,i.kt)("inlineCode",{parentName:"li"},""),")"),(0,i.kt)("li",{parentName:"ul"},"translocation breakpoints (",(0,i.kt)("inlineCode",{parentName:"li"},"AAAAAAAAAAAAAAAAAATTAGTCAGGCAC[chr3:153444911["),") ")),(0,i.kt)("h3",{id:"criteria"},"Criteria"),(0,i.kt)("p",null,"The following criteria must be met for Illumina Connected Annotations to identify a gene fusion:"),(0,i.kt)("ol",null,(0,i.kt)("li",{parentName:"ol"},"After accounting for gene orientation and genomic rearrangements, both transcripts must have the same orientation if ",(0,i.kt)("inlineCode",{parentName:"li"},"enable-bidirectional-fusions")," is not enabled. They can have the same or different orientations if ",(0,i.kt)("inlineCode",{parentName:"li"},"enable-bidirectional-fusions")," is set."),(0,i.kt)("li",{parentName:"ol"},"Both transcripts must be from the same transcript source (i.e. we won't mix and match between RefSeq and Ensembl transcripts)"),(0,i.kt)("li",{parentName:"ol"},"Both transcripts must belong to different genes"),(0,i.kt)("li",{parentName:"ol"},"Both transcripts cannot have a coding region that already overlaps without the variant (i.e. in cases where two genes naturally overlap, we don't want to call a gene fusion)")),(0,i.kt)("h2",{id:"etv6runx1-example"},"ETV6/RUNX1 Example"),(0,i.kt)("p",null,"ETV6/RUNX1 is the most common gene fusion in childhood B-cell precursor acute lymphoblastic leukemia (ALL). Samples with this translocation are associated with a good prognosis and excellent response to treatment."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Sun C., Chang L., Zhu X. ",(0,i.kt)("a",{parentName:"p",href:"https://www.oncotarget.com/article/16367/text/"},"Pathogenesis of ETV6/RUNX1-positive childhood acute lymphoblastic leukemia and mechanisms underlying its relapse"),". Oncotarget. 2017; 8: 35445-35459"))),(0,i.kt)("h3",{id:"vcf"},"VCF"),(0,i.kt)("p",null,"Here's a simplified representation of the translocation breakends called by the Manta structural variant caller:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"##fileformat=VCFv4.1\n#CHROM POS ID REF ALT QUAL FILTER INFO\nchr12 12026270 . C [chr21:36420865[C . PASS SVTYPE=BND\nchr12 12026305 . A A]chr21:36420571] . PASS SVTYPE=BND\nchr21 36420571 . C C]chr12:12026305] . PASS SVTYPE=BND\nchr21 36420865 . C [chr12:12026270[C . PASS SVTYPE=BND\n")),(0,i.kt)("p",null,"When you put these calls together, the resulting genomic rearrangement looks something like this:"),(0,i.kt)("p",null,(0,i.kt)("img",{src:t(53299).Z})),(0,i.kt)("h3",{id:"json-output"},"JSON Output"),(0,i.kt)("p",null,"The annotation for the first variant in the VCF looks like this:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{139,141-205,218,220-230}","{139,141-205,218,220-230}":!0},'{\n "chromosome": "chr12",\n "position": 12026270,\n "refAllele": "C",\n "altAlleles": [\n "[chr21:36420865[C"\n ],\n "filters": [\n "PASS"\n ],\n "cytogeneticBand": "12p13.2",\n "clingen": [\n {\n "chromosome": "12",\n "begin": 173786,\n "end": 34835837,\n "variantType": "copy_number_gain",\n "id": "nsv995956",\n "clinicalInterpretation": "pathogenic",\n "phenotypes": [\n "Decreased calvarial ossification",\n "Delayed gross motor development",\n "Feeding difficulties",\n "Frontal bossing",\n "Morphological abnormality of the central nervous system",\n "Patchy alopecia"\n ],\n "phenotypeIds": [\n "HP:0002007",\n "HP:0002011",\n "HP:0002194",\n "HP:0002232",\n "HP:0005474",\n "HP:0011968",\n "MedGen:C0232466",\n "MedGen:C1862862",\n "MedGen:CN001816",\n "MedGen:CN001820",\n "MedGen:CN001989",\n "MedGen:CN004852"\n ],\n "observedGains": 1,\n "validated": true\n }\n ],\n "variants": [\n {\n "vid": "12-12026270-C-[chr21:36420865[C",\n "chromosome": "chr12",\n "begin": 12026270,\n "end": 12026270,\n "isStructuralVariant": true,\n "refAllele": "C",\n "altAllele": "[chr21:36420865[C",\n "variantType": "translocation_breakend",\n "cosmicGeneFusions": [\n {\n "id": "COSF2245",\n "numSamples": 249,\n "geneSymbols": [\n "ETV6",\n "RUNX1"\n ],\n "hgvsr": "ENST00000396373.4(ETV6):r.1_1283::ENST00000300305.3(RUNX1):r.504_6222",\n "histologies": [\n {\n "name": "acute lymphoblastic B cell leukaemia",\n "numSamples": 169\n },\n {\n "name": "acute lymphoblastic leukaemia",\n "numSamples": 80\n }\n ],\n "sites": [\n {\n "name": "haematopoietic and lymphoid tissue",\n "numSamples": 249\n }\n ],\n "pubMedIds": [\n 7761424,\n 7780150,\n 8609706,\n 8751464,\n 8982044,\n 9067587,\n 9207408,\n 9226156,\n 9628428,\n 10463610,\n 10774753,\n 11091202,\n 12621238,\n 12661004,\n 12750722,\n 15104290,\n 15642392,\n 24557455,\n 26925663\n ]\n }\n ],\n "fusionCatcher": [\n {\n "genes": {\n "first": {\n "hgnc": "ETV6",\n "isOncogene": true\n },\n "second": {\n "hgnc": "RUNX1",\n "isOncogene": true\n }\n },\n "somaticSources": [\n "DepMap CCLE",\n "Cancer Genome Project",\n "ChimerKB 4.0",\n "ChimerPub 4.0",\n "ChimerSeq 4.0",\n "Known",\n "Mitelman DB",\n "OncoKB",\n "TICdb"\n ]\n }\n ],\n "transcripts": [\n {\n "transcript": "ENST00000396373.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "introns": "5/7",\n "geneId": "ENSG00000139083",\n "hgnc": "ETV6",\n "consequence": [\n "transcript_variant",\n "unidirectional_gene_fusion"\n ],\n "geneFusions": [\n {\n "transcript": "ENST00000437180.1",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000437180.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n },\n {\n "transcript": "ENST00000300305.3",\n "bioType": "protein_coding",\n "intron": 1,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000300305.3(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n },\n {\n "transcript": "ENST00000482318.1",\n "bioType": "nonsense_mediated_decay",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000482318.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n },\n {\n "transcript": "ENST00000486278.2",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000486278.2(RUNX1):r.?_-15+274::ENST00000396373.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n },\n {\n "transcript": "ENST00000455571.1",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000455571.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n },\n {\n "transcript": "ENST00000475045.2",\n "bioType": "protein_coding",\n "intron": 11,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000475045.2(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n },\n {\n "transcript": "ENST00000416754.1",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "ENSG00000159216",\n "hgnc": "RUNX1",\n "hgvsr": "ENST00000416754.1(RUNX1):r.?_58+274::ENST00000396373.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n }\n ],\n "isCanonical": true,\n "proteinId": "ENSP00000379658.3"\n },\n {\n "transcript": "NM_001987.4",\n "source": "RefSeq",\n "bioType": "protein_coding",\n "introns": "5/7",\n "geneId": "2120",\n "hgnc": "ETV6",\n "consequence": [\n "transcript_variant",\n "unidirectional_gene_fusion"\n ],\n "geneFusions": [\n {\n "transcript": "NM_001754.4",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "861",\n "hgnc": "RUNX1",\n "hgvsr": "NM_001754.4(RUNX1):r.?_58+274::NM_001987.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n }\n ],\n "isCanonical": true,\n "proteinId": "NP_001978.1"\n }\n ]\n }\n ]\n}\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"transcript"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"transcript ID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"bioType"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"descriptions of the ",(0,i.kt)("a",{parentName:"td",href:"https://uswest.ensembl.org/info/genome/genebuild/biotypes.html"},"biotypes from Ensembl"))),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"exon"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"},"exon that contained fusion breakpoint")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"intron"),(0,i.kt)("td",{parentName:"tr",align:"center"},"int"),(0,i.kt)("td",{parentName:"tr",align:"left"},"intron that contained fusion breakpoint")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"geneId"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"gene ID. e.g. ENSG00000116062")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hgvsr"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"HGVS RNA nomenclature")))),(0,i.kt)("h4",{id:"gene-fusion-data-sources"},"Gene Fusion Data Sources"),(0,i.kt)("p",null,"To provide more context to our gene fusions, we provide the following gene fusion data sources:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"../data-sources/cosmic"},"COSMIC")),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"../data-sources/fusioncatcher"},"FusionCatcher"))),(0,i.kt)("h4",{id:"consequences"},"Consequences"),(0,i.kt)("p",null,"When a gene fusion is identified, we add the following Sequence Ontology consequence:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{3}","{3}":!0},' "consequence": [\n "transcript_variant",\n "gene_fusion"\n ],\n')),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"If both transcripts have the same orientation, we label it as ",(0,i.kt)("inlineCode",{parentName:"li"},"unidirectional_gene_fusion"),", if they have different orientations, we label it as ",(0,i.kt)("inlineCode",{parentName:"li"},"bidirectional_gene_fusion")),(0,i.kt)("li",{parentName:"ul"},"If both unidirectional and bidirectional ones are detected, we label it as ",(0,i.kt)("inlineCode",{parentName:"li"},"gene_fusion"),".")),(0,i.kt)("h4",{id:"gene-fusions-section"},"Gene Fusions Section"),(0,i.kt)("p",null,"The ",(0,i.kt)("inlineCode",{parentName:"p"},"geneFusions")," section is contained within the object of the originating transcript. It will contain all the pairwise gene fusions that obey the criteria outline above. In the case of ",(0,i.kt)("inlineCode",{parentName:"p"},"ENST00000396373.4"),", there 7 other Ensembl transcripts that would produce a gene fusion. For ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001987.4"),", there was only one transcript (",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001754.4"),") that produce a gene fusion."),(0,i.kt)("p",null,"For each originating transcript, we report the following for each partner transcript:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"transcript ID"),(0,i.kt)("li",{parentName:"ul"},"gene ID"),(0,i.kt)("li",{parentName:"ul"},"HGNC gene symbol"),(0,i.kt)("li",{parentName:"ul"},"transcript bio type (e.g. protein_coding)"),(0,i.kt)("li",{parentName:"ul"},"intron or exon number containing the breakpoint"),(0,i.kt)("li",{parentName:"ul"},"HGVS RNA notation"),(0,i.kt)("li",{parentName:"ul"},"gene fusion directionality")),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Before Illumina Connected Annotations 3.15, we provided HGVS coding notation. However, HGVS r. notation is more appropriate for these types fusion splicing events (see ",(0,i.kt)("a",{parentName:"p",href:"https://varnomen.hgvs.org/bg-material/consultation/svd-wg007"},"HGVS SVD-WG007"),")."))),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json",metastring:"{8}","{8}":!0},' "geneFusions": [\n {\n "transcript": "NM_001754.4",\n "bioType": "protein_coding",\n "intron": 2,\n "geneId": "861",\n "hgnc": "RUNX1",\n "hgvsr": "NM_001754.4(RUNX1):r.?_58+274::NM_001987.4(ETV6):r.1009+3367_?",\n "directionality":"uniDirectional"\n }\n ],\n')),(0,i.kt)("p",null,"The HGVS RNA notation above indicates that the gene fusion starts with ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001754.4")," (RUNX1) until CDS position 58 and continues with ",(0,i.kt)("inlineCode",{parentName:"p"},"NM_001987.4")," (ETV6). ",(0,i.kt)("inlineCode",{parentName:"p"},"1009+3367")," indicates that the fusion occurred 3367 bp within intron 2."))}d.isMDXComponent=!0},62434:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/TMEM258_FADS1_GeneFusions-e5e3758ea9d2c07d3591e3801b2bf7e3.svg"},97309:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/TMEM258_FADS1_Transcripts-fe1b9c6be1f7cbfefbce887f8cec5d58.svg"},53299:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/etv6-runx1-fusion-ec8f4312c9aca496bde0d6e2b1bbd50d.svg"},76851:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/images/gene-fusions-fig2-1cce8ac31b00465c8d36bdc47ec3309e.svg"}}]); \ No newline at end of file diff --git a/assets/js/ea458ac3.7253e2dd.js b/assets/js/ea458ac3.7253e2dd.js deleted file mode 100644 index 6f03247c..00000000 --- a/assets/js/ea458ac3.7253e2dd.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8872,5919,8823],{3905:(e,t,a)=>{a.d(t,{Zo:()=>d,kt:()=>h});var n=a(67294);function i(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function r(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function o(e){for(var t=1;t=0||(i[a]=e[a]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(i[a]=e[a])}return i}var s=n.createContext({}),m=function(e){var t=n.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):o(o({},t),e)),a},d=function(e){var t=m(e.components);return n.createElement(s.Provider,{value:t},e.children)},p="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},u=n.forwardRef((function(e,t){var a=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,d=l(e,["components","mdxType","originalType","parentName"]),p=m(a),u=i,h=p["".concat(s,".").concat(u)]||p[u]||c[u]||r;return a?n.createElement(h,o(o({ref:t},d),{},{components:a})):n.createElement(h,o({ref:t},d))}));function h(e,t){var a=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=a.length,o=new Array(r);o[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[p]="string"==typeof e?e:i,o[1]=l;for(var m=2;m{a.r(t),a.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var n=a(87462),i=(a(67294),a(3905));const r={},o=void 0,l={unversionedId:"data-sources/mitomap-small-variants-json",id:"version-3.17/data-sources/mitomap-small-variants-json",title:"mitomap-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/mitomap-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/mitomap-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/mitomap-small-variants-json.md",tags:[],version:"3.17",frontMatter:{}},s=[],m={toc:s},d="wrapper";function p(e){let{components:t,...a}=e;return(0,i.kt)(d,(0,n.Z)({},m,a,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "refAllele":"G",\n "altAllele":"A",\n "diseases":[ \n "Bipolar disorder",\n "Melanoma"\n ],\n "hasHomoplasmy":false,\n "hasHeteroplasmy":true,\n "status":"Reported",\n "clinicalSignificance":"confirmed pathogenic",\n "scorePercentile":83.30,\n "numGenBankFullLengthSeqs":2,\n "pubMedIds":["2316527","6299878","6301949"],\n "isAlleleSpecific":true\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"diseases"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"associated diseases")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hasHomoplasmy"),(0,i.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hasHeteroplasmy"),(0,i.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"status"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"record status")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"clinicalSignificance"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"predicted pathogenicity")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float"),(0,i.kt)("td",{parentName:"tr",align:"left"},"MitoTIP score")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numGenBankFullLengthSeqs"),(0,i.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,i.kt)("td",{parentName:"tr",align:"left"},"# of GenBank full-length sequences")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,i.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,i.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the MITOMAP alternate allele")))))}p.isMDXComponent=!0},73356:(e,t,a)=>{a.r(t),a.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var n=a(87462),i=(a(67294),a(3905));const r={},o=void 0,l={unversionedId:"data-sources/mitomap-structural-variants-json",id:"version-3.17/data-sources/mitomap-structural-variants-json",title:"mitomap-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/mitomap-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/mitomap-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/mitomap-structural-variants-json.md",tags:[],version:"3.17",frontMatter:{}},s=[],m={toc:s},d="wrapper";function p(e){let{components:t,...a}=e;return(0,i.kt)(d,(0,n.Z)({},m,a,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "chromosome":"MT",\n "begin":3166,\n "end":14152,\n "variantType":"deletion",\n "reciprocalOverlap":0.18068,\n "annotationOverlap":0.42405\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,i.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"end"),(0,i.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"annotationOverlap"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")))))}p.isMDXComponent=!0},29802:(e,t,a)=>{a.r(t),a.d(t,{contentTitle:()=>s,default:()=>u,frontMatter:()=>l,metadata:()=>m,toc:()=>d});var n=a(87462),i=(a(67294),a(3905)),r=a(95584),o=a(73356);const l={title:"MITOMAP"},s=void 0,m={unversionedId:"data-sources/mitomap",id:"version-3.17/data-sources/mitomap",title:"MITOMAP",description:"Overview",source:"@site/versioned_docs/version-3.17/data-sources/mitomap.mdx",sourceDirName:"data-sources",slug:"/data-sources/mitomap",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/mitomap",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/mitomap.mdx",tags:[],version:"3.17",frontMatter:{title:"MITOMAP"},sidebar:"version-3.17/docs",previous:{title:"Mitochondrial Heteroplasmy",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/mito-heteroplasmy"},next:{title:"OMIM",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/omim"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"Scraping HTML Pages",id:"scraping-html-pages",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Allele Parsing",id:"allele-parsing",children:[],level:4}],level:3}],level:2},{value:"PostgreSQL Dump File",id:"postgresql-dump-file",children:[{value:"Example",id:"example-1",children:[],level:3},{value:"Parsing",id:"parsing-1",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URLs",id:"download-urls",children:[],level:2},{value:"JSON Output",id:"json-output",children:[{value:"Small Variants",id:"small-variants",children:[],level:3},{value:"Structural Variants",id:"structural-variants",children:[],level:3}],level:2}],p={toc:d},c="wrapper";function u(e){let{components:t,...l}=e;return(0,i.kt)(c,(0,n.Z)({},p,l,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"MITOMAP provides a compendium of polymorphisms and mutations in human mitochondrial DNA."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Lott, M.T., Leipzig, J.N., Derbeneva, O., Xie, H.M., Chalkia, D., Sarmady, M., Procaccio, V., and Wallace, D.C. mtDNA variation and analysis using MITOMAP and MITOMASTER. ",(0,i.kt)("em",{parentName:"p"},"Current Protocols in Bioinformatics")," 1(123):1.23.1-26 (2013). ",(0,i.kt)("a",{parentName:"p",href:"http://www.mitomap.org"},"http://www.mitomap.org")))),(0,i.kt)("h2",{id:"scraping-html-pages"},"Scraping HTML Pages"),(0,i.kt)("h3",{id:"example"},"Example"),(0,i.kt)("p",null,"MITOMAP is unique in that it doesn't offer the data in a downloadable format. As a result, the annotation content in Nirvana is scraped from the following MITOMAP pages:"),(0,i.kt)("ol",null,(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/PolymorphismsControl"},"mtDNA Control Region Sequence Variants")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/PolymorphismsCoding"},"mtDNA Coding Region & RNA Sequence Variants")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/MutationsRNA"},"Reported Mitochondrial DNA Base Substitution Diseases: rRNA/tRNA mutations")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/MutationsCodingControl"},"Reported Mitochondrial DNA Base Substitution Diseases: Coding and Control Region Point Mutations")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/DeletionsSingle"},"Reported mtDNA Deletions")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/InsertionsSimple"},"mtDNA Simple Insertions"))),(0,i.kt)("p",null,(0,i.kt)("img",{src:a(75439).Z})),(0,i.kt)("h3",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"Here's what the HTML code looks like:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-html"},"[\"582\",\"MT-TF\",\"Mitochondrial myopathy\",\"T582C\",\"tRNA Phe\",\"-\",\"+\",\"Reported\",\"72.90% \",\"0\",\"2\"],\n[\"583\",\"MT-TF\",\"MELAS / MM & EXIT\",\"G583A\",\"tRNA Phe\",\"-\",\"+\",\"Cfrm\",\"93.10% \",\"0\",\"3\"],\n")),(0,i.kt)("p",null,"We're mainly interested in the following columns (numbers indicate the HTML page above):"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"Position",(0,i.kt)("sup",null,"1,2,3,4")),(0,i.kt)("li",{parentName:"ul"},"Disease",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Nucleotide Change",(0,i.kt)("sup",null,"1,2")),(0,i.kt)("li",{parentName:"ul"},"Allele",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Homoplasmy",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Heteroplasmy",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Status",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"MitoTIP",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"GB Seqs FL(CR)",(0,i.kt)("sup",null,"1,2,3,4")),(0,i.kt)("li",{parentName:"ul"},"Deletion Junction",(0,i.kt)("sup",null,"5")),(0,i.kt)("li",{parentName:"ul"},"Insert (nt)",(0,i.kt)("sup",null,"6")),(0,i.kt)("li",{parentName:"ul"},"Insert Point (nt)",(0,i.kt)("sup",null,"6")),(0,i.kt)("li",{parentName:"ul"},"References/Curated References",(0,i.kt)("sup",null,"1,2,3,4"))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"MitoTIP")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The MitoTIP information is used to populate the ",(0,i.kt)("inlineCode",{parentName:"p"},"clinicalSignificance")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"scorePercentile"),' JSON keys. The "frequency alert" entries are skipped since it\'s not directly relevant to clinical significance.'))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Left alignment")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Many of the variants in MITOMAP have not been normalized. As part of our import procedure, we left align all insertions and deletions."))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Variant Enumeration")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Sometimes MITOMAP provides data that indicates that multiple values have been observed. Some examples of this are ",(0,i.kt)("inlineCode",{parentName:"p"},"C-C(2-8)")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"A-AC or ACC"),". Alternate alleles containing IUPAC ambiguity codes are similarly enumerated."))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Inversions")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"MITOMAP inversions are currently treated as MNVs."))),(0,i.kt)("h4",{id:"allele-parsing"},"Allele Parsing"),(0,i.kt)("p",null,"The following MITOMAP allele parsing conventions are supported:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"C123T"),(0,i.kt)("li",{parentName:"ul"},"16021_16022del"),(0,i.kt)("li",{parentName:"ul"},"8042del2"),(0,i.kt)("li",{parentName:"ul"},"C9537insC"),(0,i.kt)("li",{parentName:"ul"},"3902_3908invACCTTGC"),(0,i.kt)("li",{parentName:"ul"},"A-AC or ACC"),(0,i.kt)("li",{parentName:"ul"},"C-C(2-8)"),(0,i.kt)("li",{parentName:"ul"},"8042delAT")),(0,i.kt)("h2",{id:"postgresql-dump-file"},"PostgreSQL Dump File"),(0,i.kt)("h3",{id:"example-1"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"COPY mitomap.reference (id, authors, title, publication, editors, volume, number, pages, date, city, publisher, keywords, abstract, nlmid) FROM stdin;\n1 Albring, M., Griffith, J. and Attardi, G. Association of a protein structure of probable membrane derivation with HeLa cell mitochondrial DNA near its origin of replication Proceedings of the National Academy of Sciences of the United States of America . 74 4 1348-1352 1977 . . Deoxyribonucleoproteins; DNA Replication; DNA, Mitochondrial; Hela Cells; Membrane Proteins; Microscopy, Electron; Molecular Weight; Neoplasm Proteins; Protein Binding Almost all (about 95 percent) of the mitochondrial DNA molecules released by Triton X-100 lysis of HeLa cell mitochondria in the presence of 0.15 M salt are associated with a single protein-containing structure varying in appearance between a 10-20 nm knob and a 100-500 nm membrane-like patch. Analysis by high resolution electron microscopy and by polyacrylamide gel electrophoresis after cleavage of mitochondrial DNA with the endonucleases EcoRI, HindIII, and Hpa II has shown that the protein structure is attached to the DNA in the region of the D-loop, and probably near the origin of mitochondrial DNA replication. The data strongly suggest that HeLa cell mitochondrial DNA is attached in vivo to the inner mitochondrial membrane at or near the origin of replication, and that a membrane fragment of variable size remains associated with the DNA during the isolation. After sodium dodecyl sulfate extraction of mitochondrial DNA, a small 5-10 nm protein is found at the same site on a fraction of the mitochondrial DNA molecules. 266177\n2 Anderson, S., Bankier, A.T., Barrell, B.G., de Bruijn, M.H., Coulson, A.R., Drouin, J., Eperon, I.C., Nierlich, D.P., Roe, B.A., Sanger, F., Schreier, P.H., Smith, A.J., Staden, R., Young, I.G. Sequence and organization of the human mitochondrial genome Nature . 290 5806 457-465 1981 . . Base Sequence; Codon; DNA Replication; mtDNA; Evolution; Genes, Structural; Human; Nucleic Acid Precursors; Peptide Chain Initiation; Peptide Chain Termination; RNA, Ribosomal; RNA, Transfer; Transcription, Genetic The complete sequence of the 16,569-base pair human mitochondrial genome is presented. The genes for the 12S and 16S rRNAs, 22 tRNAs, cytochrome c oxidase subunits I, II and III, ATPase subunit 6, cytochrome b and eight other predicted protein coding genes have been located. The sequence shows extreme economy in that the genes have none or only a few noncoding bases between them, and in many cases the termination codons are not coded in the DNA but are created post- transcriptionally by polyadenylation of the mRNAs. 7219534\n")),(0,i.kt)("h3",{id:"parsing-1"},"Parsing"),(0,i.kt)("p",null,"From the PostgreSQL dump file, we're interested in parsing the mapping between reference IDs and the PubMed IDs:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"id"),(0,i.kt)("li",{parentName:"ul"},"nlmid")),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Why not use the PostgreSQL file for everything?")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Ideally we would use this file for parsing all of our data, but the schema contains 80+ tables and we haven't invested the time yet to see how the tables are linked together to produce the 6 main HTML pages that we're interested in."))),(0,i.kt)("h2",{id:"known-issues"},"Known Issues"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Duplicated records")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Multiple records describing the same nucleotide change are merged into the same record. If any conflicting information is found (homoplasmy, heteroplasmy, status, clinical significance, score percentile, end coordinate, variant type), an exception is thrown."),(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"For diseases and PubMed IDs, we take the union of the values in the duplicated records."),(0,i.kt)("li",{parentName:"ul"},"For full length GenBank sequences, we take the largest number from each of the duplicated records since it provides the strongest evidence for this variant.")))),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Skipped records")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Records that represent an alternate notation of the original variant are skipped. Similarly some variants with confusing alleles (T961delT+ / -C(n)ins) are also skipped."))),(0,i.kt)("h2",{id:"download-urls"},"Download URLs"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"see ",(0,i.kt)("a",{parentName:"li",href:"#example"},"HTML Pages")," above"),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/downloads/mitomap.dump.sql.gz"},"PostgreSQL dump file"))),(0,i.kt)("h2",{id:"json-output"},"JSON Output"),(0,i.kt)("h3",{id:"small-variants"},"Small Variants"),(0,i.kt)(r.default,{mdxType:"SmallJSON"}),(0,i.kt)("h3",{id:"structural-variants"},"Structural Variants"),(0,i.kt)(o.default,{mdxType:"SVJSON"}))}u.isMDXComponent=!0},75439:(e,t,a)=>{a.d(t,{Z:()=>n});const n=a.p+"assets/images/MITOMAP-d8d4dd35c2336fdba5fcced77ec438e6.png"}}]); \ No newline at end of file diff --git a/assets/js/eb44b3a3.bb705757.js b/assets/js/eb44b3a3.bb705757.js deleted file mode 100644 index 2e89dda3..00000000 --- a/assets/js/eb44b3a3.bb705757.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9552],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>h});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),c=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},d=function(e){var t=c(e.components);return a.createElement(s.Provider,{value:t},e.children)},u="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,d=l(e,["components","mdxType","originalType","parentName"]),u=c(n),m=i,h=u["".concat(s,".").concat(m)]||u[m]||p[m]||r;return n?a.createElement(h,o(o({ref:t},d),{},{components:n})):a.createElement(h,o({ref:t},d))}));function h(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=n.length,o=new Array(r);o[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[u]="string"==typeof e?e:i,o[1]=l;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>u,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const r={title:"Getting Started"},o=void 0,l={unversionedId:"introduction/getting-started",id:"version-3.16/introduction/getting-started",title:"Getting Started",description:"Nirvana is written in C# using .NET Core (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files.",source:"@site/versioned_docs/version-3.16/introduction/getting-started.md",sourceDirName:"introduction",slug:"/introduction/getting-started",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/introduction/getting-started",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/introduction/getting-started.md",tags:[],version:"3.16",frontMatter:{title:"Getting Started"},sidebar:"version-3.16/docs",previous:{title:"Dependencies",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/introduction/dependencies"},next:{title:"Parsing Nirvana JSON",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/introduction/parsing-json"}},s=[{value:"Quick Start",id:"quick-start",children:[],level:2},{value:"Getting Nirvana",id:"getting-nirvana",children:[{value:"Compile from Source",id:"compile-from-source",children:[],level:3},{value:"GitHub Release Notes",id:"github-release-notes",children:[],level:3},{value:"Docker",id:"docker",children:[],level:3}],level:2},{value:"Downloading the data files",id:"downloading-the-data-files",children:[],level:2},{value:"Download a test VCF file",id:"download-a-test-vcf-file",children:[],level:2},{value:"Running Nirvana",id:"running-nirvana",children:[],level:2}],c={toc:s},d="wrapper";function u(e){let{components:t,...n}=e;return(0,i.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("p",null,"Nirvana is written in C# using ",(0,i.kt)("a",{parentName:"p",href:"https://www.microsoft.com/net/download/core"},".NET Core")," (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files."),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Nirvana currently uses .NET Core 3.1 or later. Please make sure that you have the most current runtime from the ",(0,i.kt)("a",{parentName:"p",href:"https://www.microsoft.com/net/download/core"},".NET Core downloads")," page."))),(0,i.kt)("h2",{id:"quick-start"},"Quick Start"),(0,i.kt)("p",null,"If you want to get started right away, we've created ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/TestNirvana.sh"},"a script")," that downloads Nirvana, compiles it, and starts annotating a test file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"curl -O https://illumina.github.io/NirvanaDocumentation/files/TestNirvana.sh\nbash ./TestNirvana.sh\n")),(0,i.kt)("p",null,"We have verified that this script works on Windows (using Git Bash or WSL), Linux, and Mac OS X."),(0,i.kt)("h2",{id:"getting-nirvana"},"Getting Nirvana"),(0,i.kt)("h3",{id:"compile-from-source"},"Compile from Source"),(0,i.kt)("p",null,"The following will grab the latest version of Nirvana from GitHub and compile it using the .NET Core compiler:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"git clone https://github.com/Illumina/Nirvana.git\ncd Nirvana\ndotnet build -c Release\n")),(0,i.kt)("h3",{id:"github-release-notes"},"GitHub Release Notes"),(0,i.kt)("p",null,"Alternatively, you can grab the latest binaries from our ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/Illumina/Nirvana/releases"},"GitHub Releases")," page:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"mkdir -p Nirvana/Data\ncd Nirvana\nunzip Nirvana-3.16.1-dotnet-3.1.0.zip\n")),(0,i.kt)("h3",{id:"docker"},"Docker"),(0,i.kt)("p",null,"You can find us on ",(0,i.kt)("a",{parentName:"p",href:"https://hub.docker.com/repository/docker/annotation/nirvana"},"Docker Hub")," under ",(0,i.kt)("inlineCode",{parentName:"p"},"annotation/nirvana"),":"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"We think Docker is fantastic. However, because our data files are usually accessed through a Docker volume, there is a noticeable performance penalty when running Nirvana in Docker."))),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"mkdir -p Nirvana/Data\ncd Nirvana\ndocker pull annotation/nirvana:3.14\n")),(0,i.kt)("p",null,"For Docker, we have special instructions for running the Downloader:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"sudo docker run --rm -it -v Data:/scratch annotation/nirvana:3.14 dotnet \\\n /opt/nirvana/Downloader.dll --ga GRCh37 -o /scratch\n")),(0,i.kt)("p",null,"Similarly, we have special instructions for running Nirvana (Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.vcf.gz"},"a toy VCF")," in case you need it):"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"sudo docker run --rm -it -v Data:/scratch annotation/nirvana:3.14 dotnet \\\n /opt/nirvana/Nirvana.dll -c /scratch/Cache/GRCh37/Both \\\n -r /scratch/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n --sd /scratch/SupplementaryAnnotation/GRCh37 \\\n -i /scratch/HiSeq.10000.vcf.gz -o /scratch/HiSeq\n")),(0,i.kt)("h2",{id:"downloading-the-data-files"},"Downloading the data files"),(0,i.kt)("p",null,"To download the latest data sources (or update the ones that you already have), use the following command to automate the download from S3:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp3.1/Downloader.dll \\\n --ga GRCh37 \\\n -o Data\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"--ga")," argument specifies the genome assembly which can be ",(0,i.kt)("inlineCode",{parentName:"li"},"GRCh37"),", ",(0,i.kt)("inlineCode",{parentName:"li"},"GRCh38"),", or ",(0,i.kt)("inlineCode",{parentName:"li"},"both"),"."),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output directory")),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Glitches in the Matrix")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Every once in a while, the download process does not go smoothly. Perhaps the internet connection cut out or you ran out of disk space. The Downloader attempts to detect these situations by checking the file sizes at the very end. If you see that a file was marked ",(0,i.kt)("inlineCode",{parentName:"p"},"truncated"),", try fixing the root cause and running the downloader again."))),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"From time to time, you can re-run the Downloader to get the latest annotation files. It will only download the files that changed."))),(0,i.kt)("h2",{id:"download-a-test-vcf-file"},"Download a test VCF file"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.vcf.gz"},"a toy VCF file")," you can play around with:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"curl -O https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.vcf.gz\n")),(0,i.kt)("h2",{id:"running-nirvana"},"Running Nirvana"),(0,i.kt)("p",null,"Once you have downloaded the data sets, use the following command to annotate your VCF:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp3.1/Nirvana.dll \\\n -c Data/Cache/GRCh37/Both \\\n --sd Data/SupplementaryAnnotation/GRCh37 \\\n -r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n -i HiSeq.10000.vcf.gz \\\n -o HiSeq.10000\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-c")," argument specifies the cache prefix"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"--sd")," argument specifies the supplementary annotation directory"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-r")," argument specifies the compressed reference path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input VCF path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output filename prefix")),(0,i.kt)("p",null,"When running Nirvana, performance metrics are shown as it evaluates each chromosome in the input VCF file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"---------------------------------------------------------------------------\nNirvana (c) 2021 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 3.16.1\n---------------------------------------------------------------------------\n\nInitialization Time Positions/s\n---------------------------------------------------------------------------\nCache 00:00:01.2\nSA Position Scan 00:00:00.1 55,270\n\nReference Preload Annotation Variants/s\n---------------------------------------------------------------------------\nchr1 00:00:00.1 00:00:01.5 6,323\n\nSummary Time Percent\n---------------------------------------------------------------------------\nInitialization 00:00:01.3 23.9 %\nPreload 00:00:00.1 2.9 %\nAnnotation 00:00:01.5 27.2 %\n\nPeak memory usage: 1.434 GB\nTime: 00:00:05.2\n")),(0,i.kt)("p",null,"The output will be a JSON file called ",(0,i.kt)("inlineCode",{parentName:"p"},"HiSeq.10000.json.gz"),". Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/NirvanaDocumentation/files/HiSeq.10000.json.gz"},"the full JSON file"),"."))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/eb980efa.ffb50532.js b/assets/js/eb980efa.ffb50532.js deleted file mode 100644 index 0dce40ae..00000000 --- a/assets/js/eb980efa.ffb50532.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2134,4680,1562],{3905:(e,t,a)=>{a.d(t,{Zo:()=>d,kt:()=>h});var n=a(67294);function i(e,t,a){return t in e?Object.defineProperty(e,t,{value:a,enumerable:!0,configurable:!0,writable:!0}):e[t]=a,e}function r(e,t){var a=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);t&&(n=n.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),a.push.apply(a,n)}return a}function o(e){for(var t=1;t=0||(i[a]=e[a]);return i}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,a)&&(i[a]=e[a])}return i}var s=n.createContext({}),m=function(e){var t=n.useContext(s),a=t;return e&&(a="function"==typeof e?e(t):o(o({},t),e)),a},d=function(e){var t=m(e.components);return n.createElement(s.Provider,{value:t},e.children)},p="mdxType",c={inlineCode:"code",wrapper:function(e){var t=e.children;return n.createElement(n.Fragment,{},t)}},u=n.forwardRef((function(e,t){var a=e.components,i=e.mdxType,r=e.originalType,s=e.parentName,d=l(e,["components","mdxType","originalType","parentName"]),p=m(a),u=i,h=p["".concat(s,".").concat(u)]||p[u]||c[u]||r;return a?n.createElement(h,o(o({ref:t},d),{},{components:a})):n.createElement(h,o({ref:t},d))}));function h(e,t){var a=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var r=a.length,o=new Array(r);o[0]=u;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[p]="string"==typeof e?e:i,o[1]=l;for(var m=2;m{a.r(t),a.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var n=a(87462),i=(a(67294),a(3905));const r={},o=void 0,l={unversionedId:"data-sources/mitomap-small-variants-json",id:"version-3.18/data-sources/mitomap-small-variants-json",title:"mitomap-small-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/mitomap-small-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-small-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/mitomap-small-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/mitomap-small-variants-json.md",tags:[],version:"3.18",frontMatter:{}},s=[],m={toc:s},d="wrapper";function p(e){let{components:t,...a}=e;return(0,i.kt)(d,(0,n.Z)({},m,a,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "refAllele":"G",\n "altAllele":"A",\n "diseases":[ \n "Bipolar disorder",\n "Melanoma"\n ],\n "hasHomoplasmy":false,\n "hasHeteroplasmy":true,\n "status":"Reported",\n "clinicalSignificance":"confirmed pathogenic",\n "scorePercentile":83.30,\n "numGenBankFullLengthSeqs":2,\n "pubMedIds":["2316527","6299878","6301949"],\n "isAlleleSpecific":true\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"diseases"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"},"associated diseases")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hasHomoplasmy"),(0,i.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"hasHeteroplasmy"),(0,i.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"status"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"record status")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"clinicalSignificance"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"},"predicted pathogenicity")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float"),(0,i.kt)("td",{parentName:"tr",align:"left"},"MitoTIP score")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"numGenBankFullLengthSeqs"),(0,i.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,i.kt)("td",{parentName:"tr",align:"left"},"# of GenBank full-length sequences")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,i.kt)("td",{parentName:"tr",align:"center"},"boolean"),(0,i.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the MITOMAP alternate allele")))))}p.isMDXComponent=!0},81167:(e,t,a)=>{a.r(t),a.d(t,{contentTitle:()=>o,default:()=>p,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var n=a(87462),i=(a(67294),a(3905));const r={},o=void 0,l={unversionedId:"data-sources/mitomap-structural-variants-json",id:"version-3.18/data-sources/mitomap-structural-variants-json",title:"mitomap-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/mitomap-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/mitomap-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/mitomap-structural-variants-json.md",tags:[],version:"3.18",frontMatter:{}},s=[],m={toc:s},d="wrapper";function p(e){let{components:t,...a}=e;return(0,i.kt)(d,(0,n.Z)({},m,a,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "chromosome":"MT",\n "begin":3166,\n "end":14152,\n "variantType":"deletion",\n "reciprocalOverlap":0.18068,\n "annotationOverlap":0.42405\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,i.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,i.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,i.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"end"),(0,i.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,i.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,i.kt)("td",{parentName:"tr",align:"left"})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:"left"},"annotationOverlap"),(0,i.kt)("td",{parentName:"tr",align:"center"},"float"),(0,i.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")))))}p.isMDXComponent=!0},83306:(e,t,a)=>{a.r(t),a.d(t,{contentTitle:()=>s,default:()=>u,frontMatter:()=>l,metadata:()=>m,toc:()=>d});var n=a(87462),i=(a(67294),a(3905)),r=a(91471),o=a(81167);const l={title:"MITOMAP"},s=void 0,m={unversionedId:"data-sources/mitomap",id:"version-3.18/data-sources/mitomap",title:"MITOMAP",description:"Overview",source:"@site/versioned_docs/version-3.18/data-sources/mitomap.mdx",sourceDirName:"data-sources",slug:"/data-sources/mitomap",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/mitomap",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/mitomap.mdx",tags:[],version:"3.18",frontMatter:{title:"MITOMAP"},sidebar:"docs",previous:{title:"Mitochondrial Heteroplasmy",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/mito-heteroplasmy"},next:{title:"OMIM",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/omim"}},d=[{value:"Overview",id:"overview",children:[],level:2},{value:"Scraping HTML Pages",id:"scraping-html-pages",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[{value:"Allele Parsing",id:"allele-parsing",children:[],level:4}],level:3}],level:2},{value:"PostgreSQL Dump File",id:"postgresql-dump-file",children:[{value:"Example",id:"example-1",children:[],level:3},{value:"Parsing",id:"parsing-1",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URLs",id:"download-urls",children:[],level:2},{value:"JSON Output",id:"json-output",children:[{value:"Small Variants",id:"small-variants",children:[],level:3},{value:"Structural Variants",id:"structural-variants",children:[],level:3}],level:2}],p={toc:d},c="wrapper";function u(e){let{components:t,...l}=e;return(0,i.kt)(c,(0,n.Z)({},p,l,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"MITOMAP provides a compendium of polymorphisms and mutations in human mitochondrial DNA."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Lott, M.T., Leipzig, J.N., Derbeneva, O., Xie, H.M., Chalkia, D., Sarmady, M., Procaccio, V., and Wallace, D.C. mtDNA variation and analysis using MITOMAP and MITOMASTER. ",(0,i.kt)("em",{parentName:"p"},"Current Protocols in Bioinformatics")," 1(123):1.23.1-26 (2013). ",(0,i.kt)("a",{parentName:"p",href:"http://www.mitomap.org"},"http://www.mitomap.org")))),(0,i.kt)("h2",{id:"scraping-html-pages"},"Scraping HTML Pages"),(0,i.kt)("h3",{id:"example"},"Example"),(0,i.kt)("p",null,"MITOMAP is unique in that it doesn't offer the data in a downloadable format. As a result, the annotation content in Nirvana is scraped from the following MITOMAP pages:"),(0,i.kt)("ol",null,(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/PolymorphismsControl"},"mtDNA Control Region Sequence Variants")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/PolymorphismsCoding"},"mtDNA Coding Region & RNA Sequence Variants")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/MutationsRNA"},"Reported Mitochondrial DNA Base Substitution Diseases: rRNA/tRNA mutations")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/MutationsCodingControl"},"Reported Mitochondrial DNA Base Substitution Diseases: Coding and Control Region Point Mutations")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/DeletionsSingle"},"Reported mtDNA Deletions")),(0,i.kt)("li",{parentName:"ol"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/foswiki/bin/view/MITOMAP/InsertionsSimple"},"mtDNA Simple Insertions"))),(0,i.kt)("p",null,(0,i.kt)("img",{src:a(94518).Z})),(0,i.kt)("h3",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"Here's what the HTML code looks like:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-html"},"[\"582\",\"MT-TF\",\"Mitochondrial myopathy\",\"T582C\",\"tRNA Phe\",\"-\",\"+\",\"Reported\",\"72.90% \",\"0\",\"2\"],\n[\"583\",\"MT-TF\",\"MELAS / MM & EXIT\",\"G583A\",\"tRNA Phe\",\"-\",\"+\",\"Cfrm\",\"93.10% \",\"0\",\"3\"],\n")),(0,i.kt)("p",null,"We're mainly interested in the following columns (numbers indicate the HTML page above):"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"Position",(0,i.kt)("sup",null,"1,2,3,4")),(0,i.kt)("li",{parentName:"ul"},"Disease",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Nucleotide Change",(0,i.kt)("sup",null,"1,2")),(0,i.kt)("li",{parentName:"ul"},"Allele",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Homoplasmy",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Heteroplasmy",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"Status",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"MitoTIP",(0,i.kt)("sup",null,"3,4")),(0,i.kt)("li",{parentName:"ul"},"GB Seqs FL(CR)",(0,i.kt)("sup",null,"1,2,3,4")),(0,i.kt)("li",{parentName:"ul"},"Deletion Junction",(0,i.kt)("sup",null,"5")),(0,i.kt)("li",{parentName:"ul"},"Insert (nt)",(0,i.kt)("sup",null,"6")),(0,i.kt)("li",{parentName:"ul"},"Insert Point (nt)",(0,i.kt)("sup",null,"6")),(0,i.kt)("li",{parentName:"ul"},"References/Curated References",(0,i.kt)("sup",null,"1,2,3,4"))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"MitoTIP")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"The MitoTIP information is used to populate the ",(0,i.kt)("inlineCode",{parentName:"p"},"clinicalSignificance")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"scorePercentile"),' JSON keys. The "frequency alert" entries are skipped since it\'s not directly relevant to clinical significance.'))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Left alignment")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Many of the variants in MITOMAP have not been normalized. As part of our import procedure, we left align all insertions and deletions."))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Variant Enumeration")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Sometimes MITOMAP provides data that indicates that multiple values have been observed. Some examples of this are ",(0,i.kt)("inlineCode",{parentName:"p"},"C-C(2-8)")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"A-AC or ACC"),". Alternate alleles containing IUPAC ambiguity codes are similarly enumerated."))),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Inversions")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"MITOMAP inversions are currently treated as MNVs."))),(0,i.kt)("h4",{id:"allele-parsing"},"Allele Parsing"),(0,i.kt)("p",null,"The following MITOMAP allele parsing conventions are supported:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"C123T"),(0,i.kt)("li",{parentName:"ul"},"16021_16022del"),(0,i.kt)("li",{parentName:"ul"},"8042del2"),(0,i.kt)("li",{parentName:"ul"},"C9537insC"),(0,i.kt)("li",{parentName:"ul"},"3902_3908invACCTTGC"),(0,i.kt)("li",{parentName:"ul"},"A-AC or ACC"),(0,i.kt)("li",{parentName:"ul"},"C-C(2-8)"),(0,i.kt)("li",{parentName:"ul"},"8042delAT")),(0,i.kt)("h2",{id:"postgresql-dump-file"},"PostgreSQL Dump File"),(0,i.kt)("h3",{id:"example-1"},"Example"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"COPY mitomap.reference (id, authors, title, publication, editors, volume, number, pages, date, city, publisher, keywords, abstract, nlmid) FROM stdin;\n1 Albring, M., Griffith, J. and Attardi, G. Association of a protein structure of probable membrane derivation with HeLa cell mitochondrial DNA near its origin of replication Proceedings of the National Academy of Sciences of the United States of America . 74 4 1348-1352 1977 . . Deoxyribonucleoproteins; DNA Replication; DNA, Mitochondrial; Hela Cells; Membrane Proteins; Microscopy, Electron; Molecular Weight; Neoplasm Proteins; Protein Binding Almost all (about 95 percent) of the mitochondrial DNA molecules released by Triton X-100 lysis of HeLa cell mitochondria in the presence of 0.15 M salt are associated with a single protein-containing structure varying in appearance between a 10-20 nm knob and a 100-500 nm membrane-like patch. Analysis by high resolution electron microscopy and by polyacrylamide gel electrophoresis after cleavage of mitochondrial DNA with the endonucleases EcoRI, HindIII, and Hpa II has shown that the protein structure is attached to the DNA in the region of the D-loop, and probably near the origin of mitochondrial DNA replication. The data strongly suggest that HeLa cell mitochondrial DNA is attached in vivo to the inner mitochondrial membrane at or near the origin of replication, and that a membrane fragment of variable size remains associated with the DNA during the isolation. After sodium dodecyl sulfate extraction of mitochondrial DNA, a small 5-10 nm protein is found at the same site on a fraction of the mitochondrial DNA molecules. 266177\n2 Anderson, S., Bankier, A.T., Barrell, B.G., de Bruijn, M.H., Coulson, A.R., Drouin, J., Eperon, I.C., Nierlich, D.P., Roe, B.A., Sanger, F., Schreier, P.H., Smith, A.J., Staden, R., Young, I.G. Sequence and organization of the human mitochondrial genome Nature . 290 5806 457-465 1981 . . Base Sequence; Codon; DNA Replication; mtDNA; Evolution; Genes, Structural; Human; Nucleic Acid Precursors; Peptide Chain Initiation; Peptide Chain Termination; RNA, Ribosomal; RNA, Transfer; Transcription, Genetic The complete sequence of the 16,569-base pair human mitochondrial genome is presented. The genes for the 12S and 16S rRNAs, 22 tRNAs, cytochrome c oxidase subunits I, II and III, ATPase subunit 6, cytochrome b and eight other predicted protein coding genes have been located. The sequence shows extreme economy in that the genes have none or only a few noncoding bases between them, and in many cases the termination codons are not coded in the DNA but are created post- transcriptionally by polyadenylation of the mRNAs. 7219534\n")),(0,i.kt)("h3",{id:"parsing-1"},"Parsing"),(0,i.kt)("p",null,"From the PostgreSQL dump file, we're interested in parsing the mapping between reference IDs and the PubMed IDs:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"id"),(0,i.kt)("li",{parentName:"ul"},"nlmid")),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Why not use the PostgreSQL file for everything?")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Ideally we would use this file for parsing all of our data, but the schema contains 80+ tables and we haven't invested the time yet to see how the tables are linked together to produce the 6 main HTML pages that we're interested in."))),(0,i.kt)("h2",{id:"known-issues"},"Known Issues"),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Duplicated records")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Multiple records describing the same nucleotide change are merged into the same record. If any conflicting information is found (homoplasmy, heteroplasmy, status, clinical significance, score percentile, end coordinate, variant type), an exception is thrown."),(0,i.kt)("ul",{parentName:"div"},(0,i.kt)("li",{parentName:"ul"},"For diseases and PubMed IDs, we take the union of the values in the duplicated records."),(0,i.kt)("li",{parentName:"ul"},"For full length GenBank sequences, we take the largest number from each of the duplicated records since it provides the strongest evidence for this variant.")))),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Skipped records")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Records that represent an alternate notation of the original variant are skipped. Similarly some variants with confusing alleles (T961delT+ / -C(n)ins) are also skipped."))),(0,i.kt)("h2",{id:"download-urls"},"Download URLs"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"see ",(0,i.kt)("a",{parentName:"li",href:"#example"},"HTML Pages")," above"),(0,i.kt)("li",{parentName:"ul"},(0,i.kt)("a",{parentName:"li",href:"https://mitomap.org/downloads/mitomap.dump.sql.gz"},"PostgreSQL dump file"))),(0,i.kt)("h2",{id:"json-output"},"JSON Output"),(0,i.kt)("h3",{id:"small-variants"},"Small Variants"),(0,i.kt)(r.default,{mdxType:"SmallJSON"}),(0,i.kt)("h3",{id:"structural-variants"},"Structural Variants"),(0,i.kt)(o.default,{mdxType:"SVJSON"}))}u.isMDXComponent=!0},94518:(e,t,a)=>{a.d(t,{Z:()=>n});const n=a.p+"assets/images/MITOMAP-d8d4dd35c2336fdba5fcced77ec438e6.png"}}]); \ No newline at end of file diff --git a/assets/js/ec26a7d7.3aa58e8d.js b/assets/js/ec26a7d7.3aa58e8d.js deleted file mode 100644 index 9cc63c57..00000000 --- a/assets/js/ec26a7d7.3aa58e8d.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7245,8947],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>v});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function i(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function o(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var l=a.createContext({}),c=function(e){var t=a.useContext(l),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},p=function(e){var t=c(e.components);return a.createElement(l.Provider,{value:t},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,i=e.originalType,l=e.parentName,p=s(e,["components","mdxType","originalType","parentName"]),d=c(n),u=r,v=d["".concat(l,".").concat(u)]||d[u]||m[u]||i;return n?a.createElement(v,o(o({ref:t},p),{},{components:n})):a.createElement(v,o({ref:t},p))}));function v(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var i=n.length,o=new Array(i);o[0]=u;var s={};for(var l in t)hasOwnProperty.call(t,l)&&(s[l]=t[l]);s.originalType=e,s[d]="string"==typeof e?e:r,o[1]=s;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>d,frontMatter:()=>i,metadata:()=>s,toc:()=>l});var a=n(87462),r=(n(67294),n(3905));const i={},o=void 0,s={unversionedId:"data-sources/primate-ai-json",id:"version-3.16/data-sources/primate-ai-json",title:"primate-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/primate-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/primate-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/primate-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/primate-ai-json.md",tags:[],version:"3.16",frontMatter:{}},l=[],c={toc:l},p="wrapper";function d(e){let{components:t,...n}=e;return(0,r.kt)(p,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI":[\n {\n "hgnc":"TP53",\n "scorePercentile":0.3,\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,r.kt)("td",{parentName:"tr",align:"center"},"float"),(0,r.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))))}d.isMDXComponent=!0},41791:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>s,default:()=>m,frontMatter:()=>o,metadata:()=>l,toc:()=>c});var a=n(87462),r=(n(67294),n(3905)),i=n(3301);const o={title:"Primate AI"},s=void 0,l={unversionedId:"data-sources/primate-ai",id:"version-3.16/data-sources/primate-ai",title:"Primate AI",description:"Overview",source:"@site/versioned_docs/version-3.16/data-sources/primate-ai.mdx",sourceDirName:"data-sources",slug:"/data-sources/primate-ai",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/primate-ai",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/primate-ai.mdx",tags:[],version:"3.16",frontMatter:{title:"Primate AI"},sidebar:"version-3.16/docs",previous:{title:"PhyloP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/phylop"},next:{title:"REVEL",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/revel"}},c=[{value:"Overview",id:"overview",children:[],level:2},{value:"TSV File",id:"tsv-file",children:[{value:"Example",id:"example",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Pre-processing",id:"pre-processing",children:[{value:"Converting UCSC IDs",id:"converting-ucsc-ids",children:[],level:3},{value:"Running the Pre-Processor",id:"running-the-pre-processor",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],p={toc:c},d="wrapper";function m(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"Primate AI is a deep residual neural network for classifying the pathogenicity of missense mutations. The method is described in the publication:"),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"Sundaram, L., Gao, H., Padigepati, S.R. et al. Predicting the clinical impact of human mutation with deep neural networks. ",(0,r.kt)("em",{parentName:"p"},"Nat Genet")," ",(0,r.kt)("strong",{parentName:"p"},"50"),", 1161\u20131170 (2018). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1038/s41588-018-0167-z"},"https://doi.org/10.1038/s41588-018-0167-z")))),(0,r.kt)("h2",{id:"tsv-file"},"TSV File"),(0,r.kt)("h3",{id:"example"},"Example"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"chr pos ref alt refAA altAA strand_1pos_0neg trinucleotide_context UCSC_gene ExAC_coverage primateDL_score\nchr10 1046704 C T R C 1 CCG uc001ift.3 45.49 0.849114537239\nchr10 1046704 C G R G 1 CCG uc001ift.3 45.49 0.795686006546\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the TSV file, we're mainly interested in the following columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"chr")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"pos")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"ref")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"alt")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"primateDL_score"))),(0,r.kt)("p",null,"We also use ",(0,r.kt)("inlineCode",{parentName:"p"},"UCSC_gene")," to filter out variants that don't have matching gene models in Nirvana."),(0,r.kt)("h2",{id:"pre-processing"},"Pre-processing"),(0,r.kt)("h3",{id:"converting-ucsc-ids"},"Converting UCSC IDs"),(0,r.kt)("p",null,"Primate AI only provides UCSC IDs. As an initial pre-processing step, we'll need to convert these to either Entrez or Ensembl Gene IDs."),(0,r.kt)("p",null,"The following queries are used to download the conversions from UCSC:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-bash"},'mysql -h genome-mysql.soe.ucsc.edu -u genome -A -P 3306 \\\n -e "select * FROM knownToLocusLink;" hg19 > ucsc_locuslink.tsv\n\nmysql -h genome-mysql.soe.ucsc.edu -u genome -A -P 3306 \\\n -e "select knownToEnsembl.name, knownToEnsembl.value, ensGene.name2 FROM knownToEnsembl, ensGene WHERE knownToEnsembl.value = ensGene.name;" \\\n hg19 > ucsc_ensembl.tsv\n')),(0,r.kt)("h3",{id:"running-the-pre-processor"},"Running the Pre-Processor"),(0,r.kt)("p",null,"The Primate AI pre-processor can be run as follows:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet PrimateAiPreProcessor.dll UGA_develop.tsv PrimateAI_scores_v0.2.tsv.gz \\\n ucsc_locuslink.tsv ucsc_ensembl.tsv PrimateAI_0.2_GRCh37.tsv.gz\n")),(0,r.kt)("p",null,"During conversion, 0.5% of the UCSC Ids cannot be converted to either Entrez or Ensembl gene IDs. Once the gene IDs have been acquired, we check to see which are available in Nirvana."),(0,r.kt)("p",null,"The following Entrez Gene IDs were not found:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"399753\n401980\n504189\n504191\n100293534\n")),(0,r.kt)("p",null,"Here is the output from the pre-processor:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-scss"},"- loading UCSC to Entrez Gene ID dictionary... 73,432 genes loaded.\n- loading UCSC to Ensembl Gene ID dictionary... 76,178 genes loaded.\n- loading UGA gene ID to gene dictionary... 103,277 genes loaded.\n- parsing Primate AI variants... 70,121,953 variants parsed.\n \n# variants with unknown gene ID: 27,253 / 70,121,953\n# genes with unknown gene ID: 109 / 19,614\n \n# variants not in UGA: 2,036 / 70,121,953\n# genes not in UGA: 6 / 19,614\n")),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"Known Issues")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},"The Primate AI data set provides raw scores, but the scores are biased according to gene context. I.e. a 0.4 means something different in ",(0,r.kt)("inlineCode",{parentName:"p"},"TP53")," than it does in ",(0,r.kt)("inlineCode",{parentName:"p"},"KRAS"),"."),(0,r.kt)("p",{parentName:"div"},"As a result, the Primate AI team provided guidance on aggregating these scores and presenting them as percentiles with respect to the associated gene. According to their research, the 25",(0,r.kt)("sup",null,"th")," percentile is a good proxy for benign variants and the 75",(0,r.kt)("sup",null,"th")," percentile is a good proxy for pathogenic variants."))),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"https://basespace.illumina.com/s/cPgCSmecvhb4"},"https://basespace.illumina.com/s/cPgCSmecvhb4")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(i.default,{mdxType:"JSON"}))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/eef24e02.a3183fef.js b/assets/js/eef24e02.a3183fef.js new file mode 100644 index 00000000..840bdfc4 --- /dev/null +++ b/assets/js/eef24e02.a3183fef.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4974],{3905:(e,n,t)=>{t.d(n,{Zo:()=>p,kt:()=>h});var i=t(7294);function a(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function o(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);n&&(i=i.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,i)}return t}function l(e){for(var n=1;n=0||(a[t]=e[t]);return a}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(i=0;i=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(a[t]=e[t])}return a}var s=i.createContext({}),c=function(e){var n=i.useContext(s),t=n;return e&&(t="function"==typeof e?e(n):l(l({},n),e)),t},p=function(e){var n=c(e.components);return i.createElement(s.Provider,{value:n},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var n=e.children;return i.createElement(i.Fragment,{},n)}},d=i.forwardRef((function(e,n){var t=e.components,a=e.mdxType,o=e.originalType,s=e.parentName,p=r(e,["components","mdxType","originalType","parentName"]),u=c(t),d=a,h=u["".concat(s,".").concat(d)]||u[d]||m[d]||o;return t?i.createElement(h,l(l({ref:n},p),{},{components:t})):i.createElement(h,l({ref:n},p))}));function h(e,n){var t=arguments,a=n&&n.mdxType;if("string"==typeof e||a){var o=t.length,l=new Array(o);l[0]=d;var r={};for(var s in n)hasOwnProperty.call(n,s)&&(r[s]=n[s]);r.originalType=e,r[u]="string"==typeof e?e:a,l[1]=r;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>l,default:()=>u,frontMatter:()=>o,metadata:()=>r,toc:()=>s});var i=t(7462),a=(t(7294),t(3905));const o={title:"Jasix"},l=void 0,r={unversionedId:"utilities/jasix",id:"utilities/jasix",title:"Jasix",description:"Overview",source:"@site/docs/utilities/jasix.mdx",sourceDirName:"utilities",slug:"/utilities/jasix",permalink:"/IlluminaConnectedAnnotationsDocumentation/utilities/jasix",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/utilities/jasix.mdx",tags:[],version:"current",frontMatter:{title:"Jasix"},sidebar:"docs",previous:{title:"Variant IDs",permalink:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/variant-ids"},next:{title:"SAUtils",permalink:"/IlluminaConnectedAnnotationsDocumentation/utilities/sautils"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Creating the Jasix index",id:"creating-the-jasix-index",children:[{value:"Example",id:"example",children:[],level:3}],level:2},{value:"Querying the index",id:"querying-the-index",children:[],level:2},{value:"Extracting a section",id:"extracting-a-section",children:[],level:2}],c={toc:s},p="wrapper";function u(e){let{components:n,...t}=e;return(0,a.kt)(p,(0,i.Z)({},c,t,{components:n,mdxType:"MDXLayout"}),(0,a.kt)("h2",{id:"overview"},"Overview"),(0,a.kt)("p",null,"The Jasix index is aimed at providing TABIX like indexing capabilities for the Illumina Connected Annotations JSON output."),(0,a.kt)("h2",{id:"creating-the-jasix-index"},"Creating the Jasix index"),(0,a.kt)("p",null,"The Jasix index (that comes in a .jsi) file is generated on-the-fly with Illumina Connected Annotations output. It can also be generated independently by running the Jasix command line utility on the JSON output file. Please note that the Jasix utility can only consume JSON files that follow the Illumina Connected Annotations JSON output format. The following code blocks demonstrate the help menu and index generating functionalities of Jasix."),(0,a.kt)("h3",{id:"example"},"Example"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll -h\nUSAGE: dotnet Jasix.dll -i in.json.gz [options]\nIndexes a Illumina Connected Annotations annotated JSON file\n\nOPTIONS:\n --header, -t print also the header lines\n --only-header, -H print only the header lines\n --chromosomes, -l list chromosome names\n --index, -c create index\n --in, -i input\n --out, -o compressed output file name (default:console)\n --query, -q query range\n --section, -s complete section (positions or genes) to output\n --help, -h displays the help menu\n --version, -v displays the version\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll --index -i input.json.gz\n---------------------------------------------------------------------------\nJasix (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0\n---------------------------------------------------------------------------\n\nRef Sequence chrM indexed in 00:00:00.2\nRef Sequence chr1 indexed in 00:00:05.8\nRef Sequence chr2 indexed in 00:00:06.0\n.\n.\n.\nPeak memory usage: 28.5 MB\nTime: 00:01:14.8\n")),(0,a.kt)("h2",{id:"querying-the-index"},"Querying the index"),(0,a.kt)("p",null,"The Jasix query format is chr:start-end. If not provided, it assumes end=start. If only chr is provided, all entries for that chromosome will be provided."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll -i input.json.gz chrM:5000-7000\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'{\n "positions":[\n {\n "chromosome":"chrM",\n "refAllele":"C",\n "position":5581,\n "quality":3070.00,\n "filters":[\n "LowGQXHomSNP"\n ],\n "altAlleles":[\n "T"\n ],\n "samples":[\n {\n "variantFreq":1,\n "totalDepth":1625,\n "genotypeQuality":1,\n "alleleDepths":[\n 0,\n 1625\n ],\n "genotype":"1/1"\n }\n ],\n "variants":[\n {\n "altAllele":"T",\n "refAllele":"C",\n "begin":5581,\n "chromosome":"chrM",\n "end":5581,\n "variantType":"SNV",\n "vid":"MT:5581:T"\n }\n ]\n },\n {\n "chromosome":"chrM",\n "refAllele":"A",\n "position":6267,\n "quality":1637.00,\n "filters":[\n "LowGQXHetSNP"\n ],\n "altAlleles":[\n "G"\n ],\n "samples":[\n {\n "variantFreq":0.6873,\n "totalDepth":323,\n "genotypeQuality":1,\n "alleleDepths":[\n 101,\n 222\n ],\n "genotype":"0/1"\n }\n ],\n "variants":[\n {\n "altAllele":"G",\n "refAllele":"A",\n "begin":6267,\n "chromosome":"chrM",\n "end":6267,\n "variantType":"SNV",\n "vid":"MT:6267:G"\n }\n ]\n }\n ]\n}\n\n')),(0,a.kt)("p",null,'The default output stream is Console. However, if an output filename is provided, Jasix outputs the results to that file in a bgzip compressed format. The output is always a valid JSON entry. If requested (via -t option) the header of the indexed file will be provided. Multiple queries can be submitted in the same command and the output will contain them within the same "positions" block in order of the submitted queries (Warning: if the queries are out of order, or overlapping, the output will be out or order and intersecting).'),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll -i input.json.gz -q chrM:5000-7000 -q chrM:8500-9500 -t\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'{\n "header":{\n "annotator":"Illumina Annotation Engine 1.6.2.0",\n "creationTime":"2017-08-30 11:42:57",\n "genomeAssembly":"GRCh37",\n "schemaVersion":6,\n "dataVersion":"84.24.39",\n "dataSources":[\n {\n "name":"VEP",\n "version":"84",\n "description":"Ensembl",\n "releaseDate":"2017-01-16"\n }\n ],\n "samples":[\n "Mother"\n ]\n },\n "positions":[\n {\n "chromosome":"chrM",\n "refAllele":"C",\n "position":5581,\n "quality":3070.00,\n "filters":[\n "LowGQXHomSNP"\n ],\n "altAlleles":[\n "T"\n ],\n "samples":[\n {\n "variantFreq":1,\n "totalDepth":1625,\n "genotypeQuality":1,\n "alleleDepths":[\n 0,\n 1625\n ],\n "genotype":"1/1"\n }\n ],\n "variants":[\n {\n "altAllele":"T",\n "refAllele":"C",\n "begin":5581,\n "chromosome":"chrM",\n "end":5581,\n "variantType":"SNV",\n "vid":"MT:5581:T"\n }\n ]\n },\n {\n "chromosome":"chrM",\n "refAllele":"A",\n "position":6267,\n "quality":1637.00,\n "filters":[\n "LowGQXHetSNP"\n ],\n "altAlleles":[\n "G"\n ],\n "samples":[\n {\n "variantFreq":0.6873,\n "totalDepth":323,\n "genotypeQuality":1,\n "alleleDepths":[\n 101,\n 222\n ],\n "genotype":"0/1"\n }\n ],\n "variants":[\n {\n "altAllele":"G",\n "refAllele":"A",\n "begin":6267,\n "chromosome":"chrM",\n "end":6267,\n "variantType":"SNV",\n "vid":"MT:6267:G"\n }\n ]\n },\n {\n "chromosome":"chrM",\n "refAllele":"G",\n "position":8702,\n "quality":3070.00,\n "filters":[\n "LowGQXHomSNP"\n ],\n "altAlleles":[\n "A"\n ],\n "samples":[\n {\n "variantFreq":0.9987,\n "totalDepth":1534,\n "genotypeQuality":1,\n "alleleDepths":[\n 2,\n 1532\n ],\n "genotype":"1/1"\n }\n ],\n "variants":[\n {\n "altAllele":"A",\n "refAllele":"G",\n "begin":8702,\n "chromosome":"chrM",\n "end":8702,\n "variantType":"SNV",\n "vid":"MT:8702:A"\n }\n ]\n },\n {\n "chromosome":"chrM",\n "refAllele":"G",\n "position":9378,\n "quality":3070.00,\n "filters":[\n "LowGQXHomSNP"\n ],\n "altAlleles":[\n "A"\n ],\n "samples":[\n {\n "variantFreq":1,\n "totalDepth":1018,\n "genotypeQuality":1,\n "alleleDepths":[\n 0,\n 1018\n ],\n "genotype":"1/1"\n }\n ],\n "variants":[\n {\n "altAllele":"A",\n "refAllele":"G",\n "begin":9378,\n "chromosome":"chrM",\n "end":9378,\n "variantType":"SNV",\n "vid":"MT:9378:A"\n }\n ]\n }\n ]\n}\n')),(0,a.kt)("h2",{id:"extracting-a-section"},"Extracting a section"),(0,a.kt)("p",null,"The Illumina Connected Annotations JSON file has three sections: header, positions and genes. Header can be printed using the -H option. If you are interested in only the positions or genes section, you can use the -s or --section option."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll -i input.json.gz -s genes\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'[\n{\n "name": "ABCB10",\n "omim": [\n {\n "mimNumber": 605454,\n "geneName": "ATP-binding cassette, subfamily B, member 10"\n }\n ]\n},\n{\n "name": "ABCD3",\n "omim": [\n {\n "mimNumber": 170995,\n "geneName": "ATP-binding cassette, subfamily D, member 3 (peroxisomal membrane protein 1, 70kD)",\n "description": "The ABCD3 gene encodes a peroxisomal membrane transporter involved in the transport of branched-chain fatty acids and C27 bile acids into the peroxisome; the latter function is a crucial step in bile acid biosynthesis (summary by Ferdinandusse et al., 2015).",\n "phenotypes": [\n {\n "mimNumber": 616278,\n "phenotype": "?Bile acid synthesis defect, congenital, 5",\n "mapping": "molecular basis of the disorder is known",\n "inheritances": [\n "Autosomal recessive"\n ],\n "comments": [\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n ]\n}\n]\n')))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/eef24e02.cc004cff.js b/assets/js/eef24e02.cc004cff.js deleted file mode 100644 index a5770cde..00000000 --- a/assets/js/eef24e02.cc004cff.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4974],{3905:(e,n,t)=>{t.d(n,{Zo:()=>p,kt:()=>h});var i=t(67294);function a(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function o(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);n&&(i=i.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,i)}return t}function l(e){for(var n=1;n=0||(a[t]=e[t]);return a}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(i=0;i=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(a[t]=e[t])}return a}var s=i.createContext({}),c=function(e){var n=i.useContext(s),t=n;return e&&(t="function"==typeof e?e(n):l(l({},n),e)),t},p=function(e){var n=c(e.components);return i.createElement(s.Provider,{value:n},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var n=e.children;return i.createElement(i.Fragment,{},n)}},d=i.forwardRef((function(e,n){var t=e.components,a=e.mdxType,o=e.originalType,s=e.parentName,p=r(e,["components","mdxType","originalType","parentName"]),u=c(t),d=a,h=u["".concat(s,".").concat(d)]||u[d]||m[d]||o;return t?i.createElement(h,l(l({ref:n},p),{},{components:t})):i.createElement(h,l({ref:n},p))}));function h(e,n){var t=arguments,a=n&&n.mdxType;if("string"==typeof e||a){var o=t.length,l=new Array(o);l[0]=d;var r={};for(var s in n)hasOwnProperty.call(n,s)&&(r[s]=n[s]);r.originalType=e,r[u]="string"==typeof e?e:a,l[1]=r;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>l,default:()=>u,frontMatter:()=>o,metadata:()=>r,toc:()=>s});var i=t(87462),a=(t(67294),t(3905));const o={title:"Jasix"},l=void 0,r={unversionedId:"utilities/jasix",id:"utilities/jasix",title:"Jasix",description:"Overview",source:"@site/docs/utilities/jasix.mdx",sourceDirName:"utilities",slug:"/utilities/jasix",permalink:"/IlluminaConnectedAnnotationsDocumentation/utilities/jasix",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/utilities/jasix.mdx",tags:[],version:"current",frontMatter:{title:"Jasix"},sidebar:"docs",previous:{title:"Variant IDs",permalink:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/variant-ids"},next:{title:"SAUtils",permalink:"/IlluminaConnectedAnnotationsDocumentation/utilities/sautils"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Creating the Jasix index",id:"creating-the-jasix-index",children:[{value:"Example",id:"example",children:[],level:3}],level:2},{value:"Querying the index",id:"querying-the-index",children:[],level:2},{value:"Extracting a section",id:"extracting-a-section",children:[],level:2}],c={toc:s},p="wrapper";function u(e){let{components:n,...t}=e;return(0,a.kt)(p,(0,i.Z)({},c,t,{components:n,mdxType:"MDXLayout"}),(0,a.kt)("h2",{id:"overview"},"Overview"),(0,a.kt)("p",null,"The Jasix index is aimed at providing TABIX like indexing capabilities for the Illumina Connected Annotations JSON output."),(0,a.kt)("h2",{id:"creating-the-jasix-index"},"Creating the Jasix index"),(0,a.kt)("p",null,"The Jasix index (that comes in a .jsi) file is generated on-the-fly with Illumina Connected Annotations output. It can also be generated independently by running the Jasix command line utility on the JSON output file. Please note that the Jasix utility can only consume JSON files that follow the Illumina Connected Annotations JSON output format. The following code blocks demonstrate the help menu and index generating functionalities of Jasix."),(0,a.kt)("h3",{id:"example"},"Example"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll -h\nUSAGE: dotnet Jasix.dll -i in.json.gz [options]\nIndexes a Illumina Connected Annotations annotated JSON file\n\nOPTIONS:\n --header, -t print also the header lines\n --only-header, -H print only the header lines\n --chromosomes, -l list chromosome names\n --index, -c create index\n --in, -i input\n --out, -o compressed output file name (default:console)\n --query, -q query range\n --section, -s complete section (positions or genes) to output\n --help, -h displays the help menu\n --version, -v displays the version\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll --index -i input.json.gz\n---------------------------------------------------------------------------\nJasix (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0\n---------------------------------------------------------------------------\n\nRef Sequence chrM indexed in 00:00:00.2\nRef Sequence chr1 indexed in 00:00:05.8\nRef Sequence chr2 indexed in 00:00:06.0\n.\n.\n.\nPeak memory usage: 28.5 MB\nTime: 00:01:14.8\n")),(0,a.kt)("h2",{id:"querying-the-index"},"Querying the index"),(0,a.kt)("p",null,"The Jasix query format is chr:start-end. If not provided, it assumes end=start. If only chr is provided, all entries for that chromosome will be provided."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll -i input.json.gz chrM:5000-7000\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'{\n "positions":[\n {\n "chromosome":"chrM",\n "refAllele":"C",\n "position":5581,\n "quality":3070.00,\n "filters":[\n "LowGQXHomSNP"\n ],\n "altAlleles":[\n "T"\n ],\n "samples":[\n {\n "variantFreq":1,\n "totalDepth":1625,\n "genotypeQuality":1,\n "alleleDepths":[\n 0,\n 1625\n ],\n "genotype":"1/1"\n }\n ],\n "variants":[\n {\n "altAllele":"T",\n "refAllele":"C",\n "begin":5581,\n "chromosome":"chrM",\n "end":5581,\n "variantType":"SNV",\n "vid":"MT:5581:T"\n }\n ]\n },\n {\n "chromosome":"chrM",\n "refAllele":"A",\n "position":6267,\n "quality":1637.00,\n "filters":[\n "LowGQXHetSNP"\n ],\n "altAlleles":[\n "G"\n ],\n "samples":[\n {\n "variantFreq":0.6873,\n "totalDepth":323,\n "genotypeQuality":1,\n "alleleDepths":[\n 101,\n 222\n ],\n "genotype":"0/1"\n }\n ],\n "variants":[\n {\n "altAllele":"G",\n "refAllele":"A",\n "begin":6267,\n "chromosome":"chrM",\n "end":6267,\n "variantType":"SNV",\n "vid":"MT:6267:G"\n }\n ]\n }\n ]\n}\n\n')),(0,a.kt)("p",null,'The default output stream is Console. However, if an output filename is provided, Jasix outputs the results to that file in a bgzip compressed format. The output is always a valid JSON entry. If requested (via -t option) the header of the indexed file will be provided. Multiple queries can be submitted in the same command and the output will contain them within the same "positions" block in order of the submitted queries (Warning: if the queries are out of order, or overlapping, the output will be out or order and intersecting).'),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll -i input.json.gz -q chrM:5000-7000 -q chrM:8500-9500 -t\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'{\n "header":{\n "annotator":"Illumina Annotation Engine 1.6.2.0",\n "creationTime":"2017-08-30 11:42:57",\n "genomeAssembly":"GRCh37",\n "schemaVersion":6,\n "dataVersion":"84.24.39",\n "dataSources":[\n {\n "name":"VEP",\n "version":"84",\n "description":"Ensembl",\n "releaseDate":"2017-01-16"\n }\n ],\n "samples":[\n "Mother"\n ]\n },\n "positions":[\n {\n "chromosome":"chrM",\n "refAllele":"C",\n "position":5581,\n "quality":3070.00,\n "filters":[\n "LowGQXHomSNP"\n ],\n "altAlleles":[\n "T"\n ],\n "samples":[\n {\n "variantFreq":1,\n "totalDepth":1625,\n "genotypeQuality":1,\n "alleleDepths":[\n 0,\n 1625\n ],\n "genotype":"1/1"\n }\n ],\n "variants":[\n {\n "altAllele":"T",\n "refAllele":"C",\n "begin":5581,\n "chromosome":"chrM",\n "end":5581,\n "variantType":"SNV",\n "vid":"MT:5581:T"\n }\n ]\n },\n {\n "chromosome":"chrM",\n "refAllele":"A",\n "position":6267,\n "quality":1637.00,\n "filters":[\n "LowGQXHetSNP"\n ],\n "altAlleles":[\n "G"\n ],\n "samples":[\n {\n "variantFreq":0.6873,\n "totalDepth":323,\n "genotypeQuality":1,\n "alleleDepths":[\n 101,\n 222\n ],\n "genotype":"0/1"\n }\n ],\n "variants":[\n {\n "altAllele":"G",\n "refAllele":"A",\n "begin":6267,\n "chromosome":"chrM",\n "end":6267,\n "variantType":"SNV",\n "vid":"MT:6267:G"\n }\n ]\n },\n {\n "chromosome":"chrM",\n "refAllele":"G",\n "position":8702,\n "quality":3070.00,\n "filters":[\n "LowGQXHomSNP"\n ],\n "altAlleles":[\n "A"\n ],\n "samples":[\n {\n "variantFreq":0.9987,\n "totalDepth":1534,\n "genotypeQuality":1,\n "alleleDepths":[\n 2,\n 1532\n ],\n "genotype":"1/1"\n }\n ],\n "variants":[\n {\n "altAllele":"A",\n "refAllele":"G",\n "begin":8702,\n "chromosome":"chrM",\n "end":8702,\n "variantType":"SNV",\n "vid":"MT:8702:A"\n }\n ]\n },\n {\n "chromosome":"chrM",\n "refAllele":"G",\n "position":9378,\n "quality":3070.00,\n "filters":[\n "LowGQXHomSNP"\n ],\n "altAlleles":[\n "A"\n ],\n "samples":[\n {\n "variantFreq":1,\n "totalDepth":1018,\n "genotypeQuality":1,\n "alleleDepths":[\n 0,\n 1018\n ],\n "genotype":"1/1"\n }\n ],\n "variants":[\n {\n "altAllele":"A",\n "refAllele":"G",\n "begin":9378,\n "chromosome":"chrM",\n "end":9378,\n "variantType":"SNV",\n "vid":"MT:9378:A"\n }\n ]\n }\n ]\n}\n')),(0,a.kt)("h2",{id:"extracting-a-section"},"Extracting a section"),(0,a.kt)("p",null,"The Illumina Connected Annotations JSON file has three sections: header, positions and genes. Header can be printed using the -H option. If you are interested in only the positions or genes section, you can use the -s or --section option."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll -i input.json.gz -s genes\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'[\n{\n "name": "ABCB10",\n "omim": [\n {\n "mimNumber": 605454,\n "geneName": "ATP-binding cassette, subfamily B, member 10"\n }\n ]\n},\n{\n "name": "ABCD3",\n "omim": [\n {\n "mimNumber": 170995,\n "geneName": "ATP-binding cassette, subfamily D, member 3 (peroxisomal membrane protein 1, 70kD)",\n "description": "The ABCD3 gene encodes a peroxisomal membrane transporter involved in the transport of branched-chain fatty acids and C27 bile acids into the peroxisome; the latter function is a crucial step in bile acid biosynthesis (summary by Ferdinandusse et al., 2015).",\n "phenotypes": [\n {\n "mimNumber": 616278,\n "phenotype": "?Bile acid synthesis defect, congenital, 5",\n "mapping": "molecular basis of the disorder is known",\n "inheritances": [\n "Autosomal recessive"\n ],\n "comments": [\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n ]\n}\n]\n')))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/ef4059aa.935f8198.js b/assets/js/ef4059aa.935f8198.js deleted file mode 100644 index 12799cca..00000000 --- a/assets/js/ef4059aa.935f8198.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3790],{3905:(t,e,n)=>{n.d(e,{Zo:()=>d,kt:()=>g});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var s=a.createContext({}),c=function(t){var e=a.useContext(s),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},d=function(t){var e=c(t.components);return a.createElement(s.Provider,{value:e},t.children)},u="mdxType",m={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},p=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,s=t.parentName,d=i(t,["components","mdxType","originalType","parentName"]),u=c(n),p=r,g=u["".concat(s,".").concat(p)]||u[p]||m[p]||l;return n?a.createElement(g,o(o({ref:e},d),{},{components:n})):a.createElement(g,o({ref:e},d))}));function g(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=p;var i={};for(var s in e)hasOwnProperty.call(e,s)&&(i[s]=e[s]);i.originalType=t,i[u]="string"==typeof t?t:r,o[1]=i;for(var c=2;c{n.d(e,{Z:()=>r});var a=n(67294);function r(t){let{className:e,name:n,children:r,githubUrl:l,twitterUrl:o}=t;return a.createElement("div",{className:e},a.createElement("div",{className:"card card--full-height"},a.createElement("div",{className:"card__header"},a.createElement("div",{className:"avatar avatar--vertical"},a.createElement("img",{className:"avatar__photo avatar__photo--xl",src:l+".png"}),a.createElement("div",{className:"avatar__intro"},a.createElement("h3",{className:"avatar__name"},n)))),a.createElement("div",{className:"card__body"},r),a.createElement("div",{className:"card__footer"},a.createElement("div",{className:"button-group button-group--block"},l&&a.createElement("a",{className:"button button--secondary",href:l},"GitHub"),o&&a.createElement("a",{className:"button button--secondary",href:o},"Twitter")))))}},668:(t,e,n)=>{n.r(e),n.d(e,{TeamProfileCardCol:()=>d,contentTitle:()=>i,default:()=>p,frontMatter:()=>o,metadata:()=>s,toc:()=>c});var a=n(87462),r=(n(67294),n(3905)),l=n(63427);const o={id:"introduction",title:"Introduction",description:"Clinical-grade variant annotation",hide_title:!0,slug:"/"},i=void 0,s={unversionedId:"introduction/introduction",id:"introduction/introduction",title:"Introduction",description:"Clinical-grade variant annotation",source:"@site/docs/introduction/introduction.mdx",sourceDirName:"introduction",slug:"/",permalink:"/IlluminaConnectedAnnotationsDocumentation/",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/introduction/introduction.mdx",tags:[],version:"current",frontMatter:{id:"introduction",title:"Introduction",description:"Clinical-grade variant annotation",hide_title:!0,slug:"/"},sidebar:"docs",next:{title:"Dependencies",permalink:"/IlluminaConnectedAnnotationsDocumentation/introduction/dependencies"}},c=[{value:"What does Illumina Connected Annotations annotate?",id:"what-does-illumina-connected-annotations-annotate",children:[],level:2},{value:"Download",id:"download",children:[],level:2}];function d(t){return(0,r.kt)(l.Z,(0,a.Z)({},t,{className:"col col--6 margin-bottom--lg",mdxType:"TeamProfileCard"}))}const u={toc:c,TeamProfileCardCol:d},m="wrapper";function p(t){let{components:e,...l}=t;return(0,r.kt)(m,(0,a.Z)({},u,l,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("p",null,(0,r.kt)("img",{src:n(87951).Z})),(0,r.kt)("p",null,"Illumina Connected Annotations provides translational research-grade annotation of genomic variants (SNVs, MNVs, insertions, deletions, indels, STRs, gene fusions, and SVs (including CNVs). It can be run as a stand-alone package, or integrated into larger software tools that require variant annotation."),(0,r.kt)("p",null,"The input to Illumina Connected Annotations are VCFs and the output is a structured JSON representation of all annotation and sample information (as extracted from the VCF). Illumina Connected Annotations handles multiple alternate alleles and multiple samples with ease."),(0,r.kt)("p",null,"The software is being developed under a rigorous SDLC and testing process to ensure accuracy of the results and enable embedding in other software. Illumina Connected Annotations uses a continuous integration pipeline where millions of variant annotations are monitored against baseline values daily."),(0,r.kt)("h2",{id:"what-does-illumina-connected-annotations-annotate"},"What does Illumina Connected Annotations annotate?"),(0,r.kt)("p",null,"We use Sequence Ontology consequences to describe how each variant impacts a given transcript:"),(0,r.kt)("p",null,(0,r.kt)("img",{src:n(98812).Z})),(0,r.kt)("p",null,"The transcript and gene models are obtained from ",(0,r.kt)("a",{parentName:"p",href:"https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/"},"RefSeq")," and ",(0,r.kt)("a",{parentName:"p",href:"https://ftp.ensembl.org/pub/"},"Ensembl"),"."),(0,r.kt)("p",null,"In addition, it uses external data sources to provide additional context for each variant.\nIllumina Connected Annotations provides annotations from the following sources divided into 2 tiers: Professional and basic.\nThe basic tier can be accessed free of charge. The professional tier requires a license. For access, please contact ",(0,r.kt)("a",{parentName:"p",href:"mailto:annotation_support@illumina.com."},"annotation_support@illumina.com.")," "),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Data Source"),(0,r.kt)("th",{parentName:"tr",align:null},"Availability"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"Primate AI-3D"),(0,r.kt)("td",{parentName:"tr",align:null},"Professional")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"Splice AI"),(0,r.kt)("td",{parentName:"tr",align:null},"Professional")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"COSMIC"),(0,r.kt)("td",{parentName:"tr",align:null},"Professional")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"OMIM"),(0,r.kt)("td",{parentName:"tr",align:null},"Professional")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"ClinVar"),(0,r.kt)("td",{parentName:"tr",align:null},"Basic")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"1000 Genomes Project"),(0,r.kt)("td",{parentName:"tr",align:null},"Basic")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"DANN"),(0,r.kt)("td",{parentName:"tr",align:null},"Basic")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"dbSNP"),(0,r.kt)("td",{parentName:"tr",align:null},"Basic")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"DECIPHER"),(0,r.kt)("td",{parentName:"tr",align:null},"Basic")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"GERP"),(0,r.kt)("td",{parentName:"tr",align:null},"Basic")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"GME Variome"),(0,r.kt)("td",{parentName:"tr",align:null},"Basic")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"gnomAD"),(0,r.kt)("td",{parentName:"tr",align:null},"Basic")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"MITOMAP"),(0,r.kt)("td",{parentName:"tr",align:null},"Basic")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"REVEL"),(0,r.kt)("td",{parentName:"tr",align:null},"Basic")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"TOPMed"),(0,r.kt)("td",{parentName:"tr",align:null},"Basic")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"Cancer Hotspots"),(0,r.kt)("td",{parentName:"tr",align:null},"Basic")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"FusionCatcher"),(0,r.kt)("td",{parentName:"tr",align:null},"Basic")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"ClinGen"),(0,r.kt)("td",{parentName:"tr",align:null},"Basic")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"MultiZ 100 way"),(0,r.kt)("td",{parentName:"tr",align:null},"Basic")))),(0,r.kt)("h2",{id:"download"},"Download"),(0,r.kt)("p",null,"Please visit ",(0,r.kt)("a",{parentName:"p",href:"https://developer.illumina.com/IlluminaConnectedAnnotations"},"Illumina Connected Annotations"),"."))}p.isMDXComponent=!0},87951:(t,e,n)=>{n.d(e,{Z:()=>a});const a=n.p+"assets/images/ICAnnotations-966475fab8adae0519d1667d592ad4b2.png"},98812:(t,e,n)=>{n.d(e,{Z:()=>a});const a=n.p+"assets/images/TranscriptConsequences-60ca1c43a36dacf896fecdabf09ce02c.svg"}}]); \ No newline at end of file diff --git a/assets/js/ef4059aa.e422b82f.js b/assets/js/ef4059aa.e422b82f.js new file mode 100644 index 00000000..28848abf --- /dev/null +++ b/assets/js/ef4059aa.e422b82f.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[3790],{3905:(t,e,n)=>{n.d(e,{Zo:()=>d,kt:()=>g});var a=n(7294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function o(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var s=a.createContext({}),c=function(t){var e=a.useContext(s),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},d=function(t){var e=c(t.components);return a.createElement(s.Provider,{value:e},t.children)},u="mdxType",m={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},p=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,s=t.parentName,d=i(t,["components","mdxType","originalType","parentName"]),u=c(n),p=r,g=u["".concat(s,".").concat(p)]||u[p]||m[p]||l;return n?a.createElement(g,o(o({ref:e},d),{},{components:n})):a.createElement(g,o({ref:e},d))}));function g(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,o=new Array(l);o[0]=p;var i={};for(var s in e)hasOwnProperty.call(e,s)&&(i[s]=e[s]);i.originalType=t,i[u]="string"==typeof t?t:r,o[1]=i;for(var c=2;c{n.r(e),n.d(e,{TeamProfileCardCol:()=>u,contentTitle:()=>s,default:()=>g,frontMatter:()=>i,metadata:()=>c,toc:()=>d});var a=n(7462),r=n(7294),l=n(3905);function o(t){let{className:e,name:n,children:a,githubUrl:l,twitterUrl:o}=t;return r.createElement("div",{className:e},r.createElement("div",{className:"card card--full-height"},r.createElement("div",{className:"card__header"},r.createElement("div",{className:"avatar avatar--vertical"},r.createElement("img",{className:"avatar__photo avatar__photo--xl",src:l+".png"}),r.createElement("div",{className:"avatar__intro"},r.createElement("h3",{className:"avatar__name"},n)))),r.createElement("div",{className:"card__body"},a),r.createElement("div",{className:"card__footer"},r.createElement("div",{className:"button-group button-group--block"},l&&r.createElement("a",{className:"button button--secondary",href:l},"GitHub"),o&&r.createElement("a",{className:"button button--secondary",href:o},"Twitter")))))}const i={id:"introduction",title:"Introduction",description:"Clinical-grade variant annotation",hide_title:!0,slug:"/"},s=void 0,c={unversionedId:"introduction/introduction",id:"introduction/introduction",title:"Introduction",description:"Clinical-grade variant annotation",source:"@site/docs/introduction/introduction.mdx",sourceDirName:"introduction",slug:"/",permalink:"/IlluminaConnectedAnnotationsDocumentation/",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/introduction/introduction.mdx",tags:[],version:"current",frontMatter:{id:"introduction",title:"Introduction",description:"Clinical-grade variant annotation",hide_title:!0,slug:"/"},sidebar:"docs",next:{title:"Dependencies",permalink:"/IlluminaConnectedAnnotationsDocumentation/introduction/dependencies"}},d=[{value:"What does Illumina Connected Annotations annotate?",id:"what-does-illumina-connected-annotations-annotate",children:[],level:2},{value:"Download",id:"download",children:[],level:2}];function u(t){return(0,l.kt)(o,(0,a.Z)({},t,{className:"col col--6 margin-bottom--lg",mdxType:"TeamProfileCard"}))}const m={toc:d,TeamProfileCardCol:u},p="wrapper";function g(t){let{components:e,...r}=t;return(0,l.kt)(p,(0,a.Z)({},m,r,{components:e,mdxType:"MDXLayout"}),(0,l.kt)("p",null,(0,l.kt)("img",{src:n(7588).Z})),(0,l.kt)("p",null,"Illumina Connected Annotations provides translational research-grade annotation of genomic variants (SNVs, MNVs, insertions, deletions, indels, STRs, gene fusions, and SVs (including CNVs). It can be run as a stand-alone package, or integrated into larger software tools that require variant annotation."),(0,l.kt)("p",null,"The input to Illumina Connected Annotations are VCFs and the output is a structured JSON representation of all annotation and sample information (as extracted from the VCF). Illumina Connected Annotations handles multiple alternate alleles and multiple samples with ease."),(0,l.kt)("p",null,"The software is being developed under a rigorous SDLC and testing process to ensure accuracy of the results and enable embedding in other software. Illumina Connected Annotations uses a continuous integration pipeline where millions of variant annotations are monitored against baseline values daily."),(0,l.kt)("h2",{id:"what-does-illumina-connected-annotations-annotate"},"What does Illumina Connected Annotations annotate?"),(0,l.kt)("p",null,"We use Sequence Ontology consequences to describe how each variant impacts a given transcript:"),(0,l.kt)("p",null,(0,l.kt)("img",{src:n(1258).Z})),(0,l.kt)("p",null,"The transcript and gene models are obtained from ",(0,l.kt)("a",{parentName:"p",href:"https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/"},"RefSeq")," and ",(0,l.kt)("a",{parentName:"p",href:"https://ftp.ensembl.org/pub/"},"Ensembl"),"."),(0,l.kt)("p",null,"In addition, it uses external data sources to provide additional context for each variant.\nIllumina Connected Annotations provides annotations from the following sources divided into 2 tiers: Professional and basic.\nThe basic tier can be accessed free of charge. The professional tier requires a license. For access, please contact ",(0,l.kt)("a",{parentName:"p",href:"mailto:annotation_support@illumina.com."},"annotation_support@illumina.com.")," "),(0,l.kt)("table",null,(0,l.kt)("thead",{parentName:"table"},(0,l.kt)("tr",{parentName:"thead"},(0,l.kt)("th",{parentName:"tr",align:null},"Data Source"),(0,l.kt)("th",{parentName:"tr",align:null},"Availability"))),(0,l.kt)("tbody",{parentName:"table"},(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"Primate AI-3D"),(0,l.kt)("td",{parentName:"tr",align:null},"Professional")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"Splice AI"),(0,l.kt)("td",{parentName:"tr",align:null},"Professional")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"COSMIC"),(0,l.kt)("td",{parentName:"tr",align:null},"Professional")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"OMIM"),(0,l.kt)("td",{parentName:"tr",align:null},"Professional")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"ClinVar"),(0,l.kt)("td",{parentName:"tr",align:null},"Basic")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"1000 Genomes Project"),(0,l.kt)("td",{parentName:"tr",align:null},"Basic")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"DANN"),(0,l.kt)("td",{parentName:"tr",align:null},"Basic")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"dbSNP"),(0,l.kt)("td",{parentName:"tr",align:null},"Basic")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"DECIPHER"),(0,l.kt)("td",{parentName:"tr",align:null},"Basic")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"GERP"),(0,l.kt)("td",{parentName:"tr",align:null},"Basic")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"GME Variome"),(0,l.kt)("td",{parentName:"tr",align:null},"Basic")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"gnomAD"),(0,l.kt)("td",{parentName:"tr",align:null},"Basic")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"MITOMAP"),(0,l.kt)("td",{parentName:"tr",align:null},"Basic")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"REVEL"),(0,l.kt)("td",{parentName:"tr",align:null},"Basic")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"TOPMed"),(0,l.kt)("td",{parentName:"tr",align:null},"Basic")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"Cancer Hotspots"),(0,l.kt)("td",{parentName:"tr",align:null},"Basic")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"FusionCatcher"),(0,l.kt)("td",{parentName:"tr",align:null},"Basic")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"ClinGen"),(0,l.kt)("td",{parentName:"tr",align:null},"Basic")),(0,l.kt)("tr",{parentName:"tbody"},(0,l.kt)("td",{parentName:"tr",align:null},"MultiZ 100 way"),(0,l.kt)("td",{parentName:"tr",align:null},"Basic")))),(0,l.kt)("h2",{id:"download"},"Download"),(0,l.kt)("p",null,"Please visit ",(0,l.kt)("a",{parentName:"p",href:"https://developer.illumina.com/illumina-connected-annotations"},"Illumina Connected Annotations"),"."))}g.isMDXComponent=!0},7588:(t,e,n)=>{n.d(e,{Z:()=>a});const a=n.p+"assets/images/ICAnnotations-966475fab8adae0519d1667d592ad4b2.png"},1258:(t,e,n)=>{n.d(e,{Z:()=>a});const a=n.p+"assets/images/TranscriptConsequences-60ca1c43a36dacf896fecdabf09ce02c.svg"}}]); \ No newline at end of file diff --git a/assets/js/efeb3f9c.6440e64c.js b/assets/js/efeb3f9c.6440e64c.js deleted file mode 100644 index d7c3d616..00000000 --- a/assets/js/efeb3f9c.6440e64c.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[5333],{3905:(n,e,t)=>{t.d(e,{Zo:()=>d,kt:()=>u});var a=t(67294);function i(n,e,t){return e in n?Object.defineProperty(n,e,{value:t,enumerable:!0,configurable:!0,writable:!0}):n[e]=t,n}function o(n,e){var t=Object.keys(n);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(n);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(n,e).enumerable}))),t.push.apply(t,a)}return t}function r(n){for(var e=1;e=0||(i[t]=n[t]);return i}(n,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(n);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(n,t)&&(i[t]=n[t])}return i}var c=a.createContext({}),l=function(n){var e=a.useContext(c),t=e;return n&&(t="function"==typeof n?n(e):r(r({},e),n)),t},d=function(n){var e=l(n.components);return a.createElement(c.Provider,{value:e},n.children)},p="mdxType",g={inlineCode:"code",wrapper:function(n){var e=n.children;return a.createElement(a.Fragment,{},e)}},m=a.forwardRef((function(n,e){var t=n.components,i=n.mdxType,o=n.originalType,c=n.parentName,d=s(n,["components","mdxType","originalType","parentName"]),p=l(t),m=i,u=p["".concat(c,".").concat(m)]||p[m]||g[m]||o;return t?a.createElement(u,r(r({ref:e},d),{},{components:t})):a.createElement(u,r({ref:e},d))}));function u(n,e){var t=arguments,i=e&&e.mdxType;if("string"==typeof n||i){var o=t.length,r=new Array(o);r[0]=m;var s={};for(var c in e)hasOwnProperty.call(e,c)&&(s[c]=e[c]);s.originalType=n,s[p]="string"==typeof n?n:i,r[1]=s;for(var l=2;l{t.r(e),t.d(e,{contentTitle:()=>r,default:()=>p,frontMatter:()=>o,metadata:()=>s,toc:()=>c});var a=t(87462),i=(t(67294),t(3905));const o={title:"Parsing Nirvana JSON"},r=void 0,s={unversionedId:"introduction/parsing-json",id:"version-3.16/introduction/parsing-json",title:"Parsing Nirvana JSON",description:"Why JSON?",source:"@site/versioned_docs/version-3.16/introduction/parsing-json.md",sourceDirName:"introduction",slug:"/introduction/parsing-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/introduction/parsing-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/introduction/parsing-json.md",tags:[],version:"3.16",frontMatter:{title:"Parsing Nirvana JSON"},sidebar:"version-3.16/docs",previous:{title:"Getting Started",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/introduction/getting-started"},next:{title:"Annotating COVID-19",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/introduction/covid19"}},c=[{value:"Why JSON?",id:"why-json",children:[{value:"What do other annotators use?",id:"what-do-other-annotators-use",children:[],level:3},{value:"What do we gain by using JSON?",id:"what-do-we-gain-by-using-json",children:[],level:3}],level:2},{value:"Parsing JSON",id:"parsing-json",children:[{value:"Organization",id:"organization",children:[],level:3},{value:"JASIX",id:"jasix",children:[],level:3}],level:2}],l={toc:c},d="wrapper";function p(n){let{components:e,...o}=n;return(0,i.kt)(d,(0,a.Z)({},l,o,{components:e,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"why-json"},"Why JSON?"),(0,i.kt)("p",null,"VCF is a fantastic file format that was developed during the methods development activities within the 1000 Genomes Project. Prior to that, variant callers were outputting information into a variety of tab-delimited formats. Sometimes based on existing standards (like GFF), while most were proprietary. The primary intent of VCF files was to provide a human-readable, standardized representation of genetic variants. Similar to SAM/BAM files, VCF files used BCF files as their binary counterpart."),(0,i.kt)("p",null,"In the very beginning, Nirvana offered VCF output for annotation. While many variant annotators offer an option to output VCF files, one could argue if they are still human-readable. Here's an example from a VCF file produced by VEP v102:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre"},"chr3 107840527 . A ATTTTTTTTT,AT,ATTTTTTTT 153.51 PASS AN=6;MQ=244.10;\nSOR=1.739;QD=2.24;DP=57;AF=0.500,0.167,0.333;FS=0.000;AC=3,1,2;CSQ=TTTTTTTTT|\nintron_variant&non_coding_transcript_variant|MODIFIER|LINC00635|ENSG00000241469|\nTranscript|ENST00000608506.6|lncRNA||4/4|ENST00000608506.6:n.622-132_622-124dup|||||||\nrs35564779||-1||HGNC|HGNC:27184|||5|||||||||Ensembl|||||||||||||||||||||||||||||||||||\n|||||||||0.792|-0.109757,T|intron_variant&non_coding_transcript_variant|MODIFIER|\nLINC00635|ENSG00000241469|Transcript|ENST00000608506.6|lncRNA||4/4|\nENST00000608506.6:n.622-124dup|||||||rs35564779||-1||HGNC|HGNC:27184|||5|||||||||\nEnsembl||||||||||||||||||||||||||||||||||||||||||||0.932|-0.075622,TTTTTTTT|\nintron_variant&non_coding_transcript_variant|MODIFIER|LINC00635|ENSG00000241469|\nTranscript|ENST00000608506.6|lncRNA||4/4|ENST00000608506.6:n.622-131_622-124dup|||||||\nrs35564779||-1||HGNC|HGNC:27184|||5|||||||||Ensembl|||||||||||||||||||||||||||||||||||\n|||||||||0.808|-0.105490,TTTTTTTTT|intron_variant&non_coding_transcript_variant|\nMODIFIER|LINC00636|ENSG00000240423|Transcript|ENST00000649048.1|lncRNA||2/3|\nENST00000649048.1:n.179+5223_179+5231dup|||||||rs35564779||1||HGNC|HGNC:27702|||||||||\n|||Ensembl||||||||||||||||||||||||||||||||||||||||||||0.792|-0.109757, (etc.)\n")),(0,i.kt)("p",null,"Originally Nirvana used the same VCF notation as VEP uses above. The problem is that you end up with a large amount of text that is difficult to parse out by eye and requires the use of several delimiters to divide the information into useful segments. When we originally annotated this variant using VEP, ",(0,i.kt)("strong",{parentName:"p"},"this single variant used 488,909 bytes")," (almost \xbd MB). Surprisingly, we found that this broke some downstream tools that had preconceived notions of how long a single line could be in a VCF file."),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Whitespace is not allowed in the VCF INFO field. This means that if you wanted to express a gene description from OMIM: ",(0,i.kt)("strong",{parentName:"p"},'"HRAS PROTOONCOGENE, GTPase; HRAS"'),", you would need to replace the spaces with something else like an underline. You would also need to hope that the VCF parser correctly handles embedded commas and semicolons in the description."))),(0,i.kt)("h3",{id:"what-do-other-annotators-use"},"What do other annotators use?"),(0,i.kt)("p",null,"Unfortunately, file format standardization has not made it all the way to variant annotation yet. The ",(0,i.kt)("a",{parentName:"p",href:"https://ga4gh-gks.github.io/variant_annotation.html"},"GA4GH Annotation group")," had many discussions on the topic several years ago. While a set of JSON schemas were created in that effort, there wasn't enough momentum to make this a new standard."),(0,i.kt)("p",null,"While there is some overlap in general file formats (JSON vs VCF vs TSV), none of those are compatible with each other. I.e. the VCF representation in VEP and snpEff is different just like the JSON schemas used by VEP, Nirvana, and GA4GH are different."),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Source"),(0,i.kt)("th",{parentName:"tr",align:null},"Formats"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"VEP"),(0,i.kt)("td",{parentName:"tr",align:null},(0,i.kt)("strong",{parentName:"td"},"JSON"),", TSV, VCF")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"snpEff"),(0,i.kt)("td",{parentName:"tr",align:null},"VCF")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"Annovar"),(0,i.kt)("td",{parentName:"tr",align:null},"TSV")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"Nirvana"),(0,i.kt)("td",{parentName:"tr",align:null},(0,i.kt)("strong",{parentName:"td"},"JSON"))),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"GA4GH"),(0,i.kt)("td",{parentName:"tr",align:null},(0,i.kt)("strong",{parentName:"td"},"JSON"))))),(0,i.kt)("p",null,"We are interested in working together with others in the annotation space to develop a common annotation file format. Our belief is that this would accelerate methods development and benchmarking activities within annotation much in the same way the creation of SAM/BAM & VCF/BCF accelerated secondary analysis development."),(0,i.kt)("h3",{id:"what-do-we-gain-by-using-json"},"What do we gain by using JSON?"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"JSON files are better at showing hierarchical and other relational data. For example when we output ClinVar data, we often want to output several overlapping RCV entries (variants coupled with a disease phenotype). In each, we would want to output a list of phenotypes, clinical significance, etc. That is difficult to accomplish in a human-readable way using VCF files (without resorting to growing lexicon of delimiters)."),(0,i.kt)("li",{parentName:"ul"},"JSON files use JavaScript data types, while VCF INFO fields don't directly have data types. Instead, external metadata located in the VCF header is required to indicated the preferred data type."),(0,i.kt)("li",{parentName:"ul"},"JSON files are more verbose. Often this is seen as a negative, but compression largely compensates for this. Given the following excerpt from the VCF example above ",(0,i.kt)("inlineCode",{parentName:"li"},"HGNC:27184|||5|||||||||Ensembl")," it's not immediately obvious what the ",(0,i.kt)("inlineCode",{parentName:"li"},"5")," refers to (without checking the VCF header for details). With JSON files, you would always see a key name associated with a value."),(0,i.kt)("li",{parentName:"ul"},"JSON files can be natively imported into different search and analytics solutions like Elasticsearch and Snowflake."),(0,i.kt)("li",{parentName:"ul"},"JSON strings do not have any limitations on the use of whitespace.")),(0,i.kt)("h2",{id:"parsing-json"},"Parsing JSON"),(0,i.kt)("p",null,"Our JSON files are organized similarly to original VCF variants:"),(0,i.kt)("p",null,(0,i.kt)("img",{src:t(48978).Z})),(0,i.kt)("p",null,"Nirvana JSON files can get very large and sometimes we receive feedback that a bioinformatician tried to read the JSON file into Python or R resulting in a program that ran out of available RAM. This happens because those parsers try to load everything into memory all at once."),(0,i.kt)("p",null,"To get around those issues, we play some clever tricks with newlines that enables our users to parse our JSON files quickly and efficiently."),(0,i.kt)("h3",{id:"organization"},"Organization"),(0,i.kt)("p",null,"Our JSON file is arranged as follows:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the header section is located on the first line"),(0,i.kt)("li",{parentName:"ul"},"each line after that corresponds to a position (same as a row in a VCF file)",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"until you reach the genes section ",(0,i.kt)("inlineCode",{parentName:"li"},'],"genes":[')))),(0,i.kt)("li",{parentName:"ul"},"each line after that corresponds to a gene",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"until you reach the end ",(0,i.kt)("inlineCode",{parentName:"li"},"]}"))))),(0,i.kt)("p",null,"Knowing this, you can load each position line as an independent JSON object and extract the information you need. "),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Jupyter Notebook")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"To demonstrate this, we have put together a ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/Illumina/NirvanaDocumentation/blob/master/static/files/parse-nirvana-json-python.ipynb"},"Jupyter notebook demonstrating how to do this in Python")," and a ",(0,i.kt)("a",{parentName:"p",href:"https://github.com/Illumina/NirvanaDocumentation/blob/master/static/files/parse-nirvana-json-r.ipynb"},"R version")," as well."))),(0,i.kt)("h3",{id:"jasix"},"JASIX"),(0,i.kt)("p",null,"One of the tools that we really like in the VCF ecosystem is ",(0,i.kt)("a",{parentName:"p",href:"https://dx.doi.org/10.1093%2Fbioinformatics%2Fbtq671"},"tabix"),". Unfortunately, tabix only works for tab-delimited file formats. As a result, we created a similar tool for Nirvana JSON files called JASIX."),(0,i.kt)("p",null,"Here's an example of how you might use JASIX:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/netcoreapp2.1/Jasix.dll -i dragen.json.gz -q chr1:942450-942455\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the Nirvana JSON path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-q")," argument specifies a genomic range ",(0,i.kt)("em",{parentName:"li"},"(you can use as many of these as you want)"))),(0,i.kt)("p",null,"JASIX also includes additional options for showing the Nirvana header or for extracting different sections (like the genes section)."),(0,i.kt)("p",null,"The output from JASIX is compliant JSON object shown in pretty-printed form:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'{"positions":[\n{\n "chromosome": "chr1",\n "position": 942451,\n "refAllele": "T",\n "altAlleles": [\n "C"\n ],\n "quality": 484.23,\n "filters": [\n "PASS"\n ],\n "cytogeneticBand": "1p36.33",\n "samples": [\n {\n "genotype": "1/1",\n "variantFrequencies": [\n 1\n ],\n "totalDepth": 21,\n "genotypeQuality": 60,\n "alleleDepths": [\n 0,\n 21\n ]\n },\n {\n "genotype": "1/1",\n "variantFrequencies": [\n 1\n ],\n "totalDepth": 32,\n "genotypeQuality": 93,\n "alleleDepths": [\n 0,\n 32\n ]\n },\n {\n "genotype": "1/1",\n "variantFrequencies": [\n 1\n ],\n "totalDepth": 36,\n "genotypeQuality": 105,\n "alleleDepths": [\n 0,\n 36\n ]\n }\n ],\n "variants": [\n {\n "vid": "1-942451-T-C",\n "chromosome": "chr1",\n "begin": 942451,\n "end": 942451,\n "refAllele": "T",\n "altAllele": "C",\n "variantType": "SNV",\n "hgvsg": "NC_000001.11:g.942451T>C",\n "phylopScore": -0.1,\n "clinvar": [\n {\n "id": "VCV000836156.1",\n "reviewStatus": "criteria provided, single submitter",\n "significance": [\n "uncertain significance"\n ],\n "refAllele": "T",\n "altAllele": "T",\n "lastUpdatedDate": "2020-08-20"\n },\n {\n "id": "RCV001037211.1",\n "variationId": 836156,\n "reviewStatus": "criteria provided, single submitter",\n "alleleOrigins": [\n "germline"\n ],\n "refAllele": "T",\n "altAllele": "T",\n "phenotypes": [\n "not provided"\n ],\n "medGenIds": [\n "CN517202"\n ],\n "significance": [\n "uncertain significance"\n ],\n "lastUpdatedDate": "2020-08-20",\n "pubMedIds": [\n "28492532"\n ]\n }\n ],\n "dbsnp": [\n "rs6672356"\n ],\n "gnomad": {\n "coverage": 25,\n "allAf": 0.999855,\n "allAn": 123742,\n "allAc": 123724,\n "allHc": 61853,\n "afrAf": 0.999416,\n "afrAn": 10278,\n "afrAc": 10272,\n "afrHc": 5133,\n "amrAf": 0.99995,\n "amrAn": 20008,\n "amrAc": 20007,\n "amrHc": 10003,\n "easAf": 1,\n "easAn": 6054,\n "easAc": 6054,\n "easHc": 3027,\n "finAf": 1,\n "finAn": 8696,\n "finAc": 8696,\n "finHc": 4348,\n "nfeAf": 0.999899,\n "nfeAn": 49590,\n "nfeAc": 49585,\n "nfeHc": 24790,\n "asjAf": 1,\n "asjAn": 7208,\n "asjAc": 7208,\n "asjHc": 3604,\n "sasAf": 0.99967,\n "sasAn": 18160,\n "sasAc": 18154,\n "sasHc": 9074,\n "othAf": 1,\n "othAn": 3748,\n "othAc": 3748,\n "othHc": 1874,\n "maleAf": 0.9999,\n "maleAn": 69780,\n "maleAc": 69773,\n "maleHc": 34883,\n "femaleAf": 0.999796,\n "femaleAn": 53962,\n "femaleAc": 53951,\n "femaleHc": 26970,\n "controlsAllAf": 0.999815,\n "controlsAllAn": 48654,\n "controlsAllAc": 48645\n },\n "oneKg": {\n "allAf": 1,\n "afrAf": 1,\n "amrAf": 1,\n "easAf": 1,\n "eurAf": 1,\n "sasAf": 1,\n "allAn": 5008,\n "afrAn": 1322,\n "amrAn": 694,\n "easAn": 1008,\n "eurAn": 1006,\n "sasAn": 978,\n "allAc": 5008,\n "afrAc": 1322,\n "amrAc": 694,\n "easAc": 1008,\n "eurAc": 1006,\n "sasAc": 978\n },\n "primateAI": [\n {\n "hgnc": "SAMD11",\n "scorePercentile": 0.87\n }\n ],\n "revel": {\n "score": 0.145\n },\n "topmed": {\n "allAf": 0.999809,\n "allAn": 125568,\n "allAc": 125544,\n "allHc": 62760\n },\n "transcripts": [\n {\n "transcript": "ENST00000420190.6",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "downstream_gene_variant"\n ],\n "proteinId": "ENSP00000411579.2"\n },\n {\n "transcript": "ENST00000342066.7",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "1110",\n "cdsPos": "1027",\n "exons": "10/14",\n "proteinPos": "343",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000342066.7:c.1027T>C",\n "hgvsp": "ENSP00000342313.3:p.(Trp343Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000342313.3",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000618181.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "732",\n "cdsPos": "652",\n "exons": "7/11",\n "proteinPos": "218",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000618181.4:c.652T>C",\n "hgvsp": "ENSP00000480870.1:p.(Trp218Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000480870.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000622503.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "1110",\n "cdsPos": "1030",\n "exons": "10/14",\n "proteinPos": "344",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000622503.4:c.1030T>C",\n "hgvsp": "ENSP00000482138.1:p.(Trp344Arg)",\n "isCanonical": true,\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000482138.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000618323.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "cTg/cCg",\n "aminoAcids": "L/P",\n "cdnaPos": "712",\n "cdsPos": "632",\n "exons": "8/12",\n "proteinPos": "211",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000618323.4:c.632T>C",\n "hgvsp": "ENSP00000480678.1:p.(Leu211Pro)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "unknown",\n "proteinId": "ENSP00000480678.1",\n "siftScore": 0.03,\n "siftPrediction": "deleterious - low confidence"\n },\n {\n "transcript": "ENST00000616016.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "ccT/ccC",\n "aminoAcids": "P",\n "cdnaPos": "944",\n "cdsPos": "864",\n "exons": "9/13",\n "proteinPos": "288",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "synonymous_variant"\n ],\n "hgvsc": "ENST00000616016.4:c.864T>C",\n "hgvsp": "ENST00000616016.4:c.864T>C(p.(Pro288=))",\n "proteinId": "ENSP00000478421.1"\n },\n {\n "transcript": "ENST00000618779.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "921",\n "cdsPos": "841",\n "exons": "9/13",\n "proteinPos": "281",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000618779.4:c.841T>C",\n "hgvsp": "ENSP00000484256.1:p.(Trp281Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000484256.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000616125.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "783",\n "cdsPos": "703",\n "exons": "8/12",\n "proteinPos": "235",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000616125.4:c.703T>C",\n "hgvsp": "ENSP00000484643.1:p.(Trp235Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000484643.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000620200.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "cTg/cCg",\n "aminoAcids": "L/P",\n "cdnaPos": "427",\n "cdsPos": "347",\n "exons": "5/9",\n "proteinPos": "116",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000620200.4:c.347T>C",\n "hgvsp": "ENSP00000484820.1:p.(Leu116Pro)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "unknown",\n "proteinId": "ENSP00000484820.1",\n "siftScore": 0.16,\n "siftPrediction": "tolerated - low confidence"\n },\n {\n "transcript": "ENST00000617307.4",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "867",\n "cdsPos": "787",\n "exons": "9/13",\n "proteinPos": "263",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000617307.4:c.787T>C",\n "hgvsp": "ENSP00000482090.1:p.(Trp263Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000482090.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "NM_152486.2",\n "source": "RefSeq",\n "bioType": "protein_coding",\n "codons": "Cgg/Cgg",\n "aminoAcids": "R",\n "cdnaPos": "1107",\n "cdsPos": "1027",\n "exons": "10/14",\n "proteinPos": "343",\n "geneId": "148398",\n "hgnc": "SAMD11",\n "consequence": [\n "synonymous_variant"\n ],\n "hgvsc": "NM_152486.2:c.1027T>C",\n "hgvsp": "NM_152486.2:c.1027T>C(p.(Arg343=))",\n "isCanonical": true,\n "proteinId": "NP_689699.2"\n },\n {\n "transcript": "ENST00000341065.8",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "750",\n "cdsPos": "751",\n "exons": "8/12",\n "proteinPos": "251",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000341065.8:c.750T>C",\n "hgvsp": "ENSP00000349216.4:p.(Trp251Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000349216.4",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000455979.1",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "codons": "Tgg/Cgg",\n "aminoAcids": "W/R",\n "cdnaPos": "507",\n "cdsPos": "508",\n "exons": "4/7",\n "proteinPos": "170",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "missense_variant"\n ],\n "hgvsc": "ENST00000455979.1:c.507T>C",\n "hgvsp": "ENSP00000412228.1:p.(Trp170Arg)",\n "polyPhenScore": 0,\n "polyPhenPrediction": "benign",\n "proteinId": "ENSP00000412228.1",\n "siftScore": 1,\n "siftPrediction": "tolerated"\n },\n {\n "transcript": "ENST00000478729.1",\n "source": "Ensembl",\n "bioType": "processed_transcript",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "downstream_gene_variant"\n ]\n },\n {\n "transcript": "ENST00000474461.1",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "cdnaPos": "389",\n "exons": "3/4",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "non_coding_transcript_exon_variant"\n ],\n "hgvsc": "ENST00000474461.1:n.389T>C"\n },\n {\n "transcript": "ENST00000466827.1",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "cdnaPos": "191",\n "exons": "2/2",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "non_coding_transcript_exon_variant"\n ],\n "hgvsc": "ENST00000466827.1:n.191T>C"\n },\n {\n "transcript": "ENST00000464948.1",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "cdnaPos": "286",\n "exons": "1/2",\n "geneId": "ENSG00000187634",\n "hgnc": "SAMD11",\n "consequence": [\n "non_coding_transcript_exon_variant"\n ],\n "hgvsc": "ENST00000464948.1:n.286T>C"\n },\n {\n "transcript": "NM_015658.3",\n "source": "RefSeq",\n "bioType": "protein_coding",\n "geneId": "26155",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ],\n "isCanonical": true,\n "proteinId": "NP_056473.2"\n },\n {\n "transcript": "ENST00000483767.5",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "geneId": "ENSG00000188976",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ]\n },\n {\n "transcript": "ENST00000327044.6",\n "source": "Ensembl",\n "bioType": "protein_coding",\n "geneId": "ENSG00000188976",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ],\n "isCanonical": true,\n "proteinId": "ENSP00000317992.6"\n },\n {\n "transcript": "ENST00000477976.5",\n "source": "Ensembl",\n "bioType": "retained_intron",\n "geneId": "ENSG00000188976",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ]\n },\n {\n "transcript": "ENST00000496938.1",\n "source": "Ensembl",\n "bioType": "processed_transcript",\n "geneId": "ENSG00000188976",\n "hgnc": "NOC2L",\n "consequence": [\n "downstream_gene_variant"\n ]\n }\n ]\n }\n ]\n}\n]}\n')))}p.isMDXComponent=!0},48978:(n,e,t)=>{t.d(e,{Z:()=>a});const a=t.p+"assets/images/JSON-Layout-fc8e5c0cf4c8428981cd206fe9b6feac.svg"}}]); \ No newline at end of file diff --git a/assets/js/f004b3ca.abcff7e4.js b/assets/js/f004b3ca.abcff7e4.js deleted file mode 100644 index 6e3bde5f..00000000 --- a/assets/js/f004b3ca.abcff7e4.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6374],{3905:(e,n,t)=>{t.d(n,{Zo:()=>p,kt:()=>h});var i=t(67294);function a(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function r(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);n&&(i=i.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,i)}return t}function o(e){for(var n=1;n=0||(a[t]=e[t]);return a}(e,n);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(i=0;i=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(a[t]=e[t])}return a}var s=i.createContext({}),c=function(e){var n=i.useContext(s),t=n;return e&&(t="function"==typeof e?e(n):o(o({},n),e)),t},p=function(e){var n=c(e.components);return i.createElement(s.Provider,{value:n},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var n=e.children;return i.createElement(i.Fragment,{},n)}},d=i.forwardRef((function(e,n){var t=e.components,a=e.mdxType,r=e.originalType,s=e.parentName,p=l(e,["components","mdxType","originalType","parentName"]),u=c(t),d=a,h=u["".concat(s,".").concat(d)]||u[d]||m[d]||r;return t?i.createElement(h,o(o({ref:n},p),{},{components:t})):i.createElement(h,o({ref:n},p))}));function h(e,n){var t=arguments,a=n&&n.mdxType;if("string"==typeof e||a){var r=t.length,o=new Array(r);o[0]=d;var l={};for(var s in n)hasOwnProperty.call(n,s)&&(l[s]=n[s]);l.originalType=e,l[u]="string"==typeof e?e:a,o[1]=l;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>o,default:()=>u,frontMatter:()=>r,metadata:()=>l,toc:()=>s});var i=t(87462),a=(t(67294),t(3905));const r={title:"Jasix"},o=void 0,l={unversionedId:"utilities/jasix",id:"version-3.16/utilities/jasix",title:"Jasix",description:"Overview",source:"@site/versioned_docs/version-3.16/utilities/jasix.mdx",sourceDirName:"utilities",slug:"/utilities/jasix",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/utilities/jasix",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/utilities/jasix.mdx",tags:[],version:"3.16",frontMatter:{title:"Jasix"},sidebar:"version-3.16/docs",previous:{title:"Variant IDs",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/core-functionality/variant-ids"}},s=[{value:"Overview",id:"overview",children:[],level:2},{value:"Creating the Jasix index",id:"creating-the-jasix-index",children:[{value:"Example",id:"example",children:[],level:3}],level:2},{value:"Querying the index",id:"querying-the-index",children:[],level:2},{value:"Extracting a section",id:"extracting-a-section",children:[],level:2}],c={toc:s},p="wrapper";function u(e){let{components:n,...t}=e;return(0,a.kt)(p,(0,i.Z)({},c,t,{components:n,mdxType:"MDXLayout"}),(0,a.kt)("h2",{id:"overview"},"Overview"),(0,a.kt)("p",null,"The Jasix index is aimed at providing TABIX like indexing capabilities for the Nirvana JSON output."),(0,a.kt)("h2",{id:"creating-the-jasix-index"},"Creating the Jasix index"),(0,a.kt)("p",null,"The Jasix index (that comes in a .jsi) file is generated on-the-fly with Nirvana output. It can also be generated independently by running the Jasix command line utility on the JSON output file. Please note that the Jasix utility can only consume JSON files that follow the Nirvana JSON output format. The following code blocks demonstrate the help menu and index generating functionalities of Jasix."),(0,a.kt)("h3",{id:"example"},"Example"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll -h\nUSAGE: dotnet Jasix.dll -i in.json.gz [options]\nIndexes a Nirvana annotated JSON file\n\nOPTIONS:\n --header, -t print also the header lines\n --only-header, -H print only the header lines\n --chromosomes, -l list chromosome names\n --index, -c create index\n --in, -i input\n --out, -o compressed output file name (default:console)\n --query, -q query range\n --section, -s complete section (positions or genes) to output\n --help, -h displays the help menu\n --version, -v displays the version\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll --index -i input.json.gz\n---------------------------------------------------------------------------\nJasix (c) 2017 Illumina, Inc.\nStromberg, Roy, Lajugie, Jiang, Li, and Kang 2.0.0\n---------------------------------------------------------------------------\n\nRef Sequence chrM indexed in 00:00:00.2\nRef Sequence chr1 indexed in 00:00:05.8\nRef Sequence chr2 indexed in 00:00:06.0\n.\n.\n.\nPeak memory usage: 28.5 MB\nTime: 00:01:14.8\n")),(0,a.kt)("h2",{id:"querying-the-index"},"Querying the index"),(0,a.kt)("p",null,"The Jasix query format is chr:start-end. If not provided, it assumes end=start. If only chr is provided, all entries for that chromosome will be provided."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll -i input.json.gz chrM:5000-7000\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'{\n "positions":[\n {\n "chromosome":"chrM",\n "refAllele":"C",\n "position":5581,\n "quality":3070.00,\n "filters":[\n "LowGQXHomSNP"\n ],\n "altAlleles":[\n "T"\n ],\n "samples":[\n {\n "variantFreq":1,\n "totalDepth":1625,\n "genotypeQuality":1,\n "alleleDepths":[\n 0,\n 1625\n ],\n "genotype":"1/1"\n }\n ],\n "variants":[\n {\n "altAllele":"T",\n "refAllele":"C",\n "begin":5581,\n "chromosome":"chrM",\n "end":5581,\n "variantType":"SNV",\n "vid":"MT:5581:T"\n }\n ]\n },\n {\n "chromosome":"chrM",\n "refAllele":"A",\n "position":6267,\n "quality":1637.00,\n "filters":[\n "LowGQXHetSNP"\n ],\n "altAlleles":[\n "G"\n ],\n "samples":[\n {\n "variantFreq":0.6873,\n "totalDepth":323,\n "genotypeQuality":1,\n "alleleDepths":[\n 101,\n 222\n ],\n "genotype":"0/1"\n }\n ],\n "variants":[\n {\n "altAllele":"G",\n "refAllele":"A",\n "begin":6267,\n "chromosome":"chrM",\n "end":6267,\n "variantType":"SNV",\n "vid":"MT:6267:G"\n }\n ]\n }\n ]\n}\n\n')),(0,a.kt)("p",null,'The default output stream is Console. However, if an output filename is provided, Jasix outputs the results to that file in a bgzip compressed format. The output is always a valid JSON entry. If requested (via -t option) the header of the indexed file will be provided. Multiple queries can be submitted in the same command and the output will contain them within the same "positions" block in order of the submitted queries (Warning: if the queries are out of order, or overlapping, the output will be out or order and intersecting).'),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll -i input.json.gz -q chrM:5000-7000 -q chrM:8500-9500 -t\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'{\n "header":{\n "annotator":"Illumina Annotation Engine 1.6.2.0",\n "creationTime":"2017-08-30 11:42:57",\n "genomeAssembly":"GRCh37",\n "schemaVersion":6,\n "dataVersion":"84.24.39",\n "dataSources":[\n {\n "name":"VEP",\n "version":"84",\n "description":"Ensembl",\n "releaseDate":"2017-01-16"\n }\n ],\n "samples":[\n "Mother"\n ]\n },\n "positions":[\n {\n "chromosome":"chrM",\n "refAllele":"C",\n "position":5581,\n "quality":3070.00,\n "filters":[\n "LowGQXHomSNP"\n ],\n "altAlleles":[\n "T"\n ],\n "samples":[\n {\n "variantFreq":1,\n "totalDepth":1625,\n "genotypeQuality":1,\n "alleleDepths":[\n 0,\n 1625\n ],\n "genotype":"1/1"\n }\n ],\n "variants":[\n {\n "altAllele":"T",\n "refAllele":"C",\n "begin":5581,\n "chromosome":"chrM",\n "end":5581,\n "variantType":"SNV",\n "vid":"MT:5581:T"\n }\n ]\n },\n {\n "chromosome":"chrM",\n "refAllele":"A",\n "position":6267,\n "quality":1637.00,\n "filters":[\n "LowGQXHetSNP"\n ],\n "altAlleles":[\n "G"\n ],\n "samples":[\n {\n "variantFreq":0.6873,\n "totalDepth":323,\n "genotypeQuality":1,\n "alleleDepths":[\n 101,\n 222\n ],\n "genotype":"0/1"\n }\n ],\n "variants":[\n {\n "altAllele":"G",\n "refAllele":"A",\n "begin":6267,\n "chromosome":"chrM",\n "end":6267,\n "variantType":"SNV",\n "vid":"MT:6267:G"\n }\n ]\n },\n {\n "chromosome":"chrM",\n "refAllele":"G",\n "position":8702,\n "quality":3070.00,\n "filters":[\n "LowGQXHomSNP"\n ],\n "altAlleles":[\n "A"\n ],\n "samples":[\n {\n "variantFreq":0.9987,\n "totalDepth":1534,\n "genotypeQuality":1,\n "alleleDepths":[\n 2,\n 1532\n ],\n "genotype":"1/1"\n }\n ],\n "variants":[\n {\n "altAllele":"A",\n "refAllele":"G",\n "begin":8702,\n "chromosome":"chrM",\n "end":8702,\n "variantType":"SNV",\n "vid":"MT:8702:A"\n }\n ]\n },\n {\n "chromosome":"chrM",\n "refAllele":"G",\n "position":9378,\n "quality":3070.00,\n "filters":[\n "LowGQXHomSNP"\n ],\n "altAlleles":[\n "A"\n ],\n "samples":[\n {\n "variantFreq":1,\n "totalDepth":1018,\n "genotypeQuality":1,\n "alleleDepths":[\n 0,\n 1018\n ],\n "genotype":"1/1"\n }\n ],\n "variants":[\n {\n "altAllele":"A",\n "refAllele":"G",\n "begin":9378,\n "chromosome":"chrM",\n "end":9378,\n "variantType":"SNV",\n "vid":"MT:9378:A"\n }\n ]\n }\n ]\n}\n')),(0,a.kt)("h2",{id:"extracting-a-section"},"Extracting a section"),(0,a.kt)("p",null,"The Nirvana JSON file has three sections: header, positions and genes. Header can be printed using the -H option. If you are interested in only the positions or genes section, you can use the -s or --section option."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet Jasix.dll -i input.json.gz -s genes\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'[\n{\n "name": "ABCB10",\n "omim": [\n {\n "mimNumber": 605454,\n "geneName": "ATP-binding cassette, subfamily B, member 10"\n }\n ]\n},\n{\n "name": "ABCD3",\n "omim": [\n {\n "mimNumber": 170995,\n "geneName": "ATP-binding cassette, subfamily D, member 3 (peroxisomal membrane protein 1, 70kD)",\n "description": "The ABCD3 gene encodes a peroxisomal membrane transporter involved in the transport of branched-chain fatty acids and C27 bile acids into the peroxisome; the latter function is a crucial step in bile acid biosynthesis (summary by Ferdinandusse et al., 2015).",\n "phenotypes": [\n {\n "mimNumber": 616278,\n "phenotype": "?Bile acid synthesis defect, congenital, 5",\n "mapping": "molecular basis of the disorder is known",\n "inheritances": [\n "Autosomal recessive"\n ],\n "comments": [\n "unconfirmed or possibly spurious mapping"\n ]\n }\n ]\n }\n ]\n}\n]\n')))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/f023eebb.179d6276.js b/assets/js/f023eebb.179d6276.js deleted file mode 100644 index 4ff1664e..00000000 --- a/assets/js/f023eebb.179d6276.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2031],{3905:(e,t,n)=>{n.d(t,{Zo:()=>s,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function i(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var l=r.createContext({}),p=function(e){var t=r.useContext(l),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},s=function(e){var t=p(e.components);return r.createElement(l.Provider,{value:t},e.children)},u="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,l=e.parentName,s=c(e,["components","mdxType","originalType","parentName"]),u=p(n),d=a,f=u["".concat(l,".").concat(d)]||u[d]||m[d]||o;return n?r.createElement(f,i(i({ref:t},s),{},{components:n})):r.createElement(f,i({ref:t},s))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,i=new Array(o);i[0]=d;var c={};for(var l in t)hasOwnProperty.call(t,l)&&(c[l]=t[l]);c.originalType=e,c[u]="string"==typeof e?e:a,i[1]=c;for(var p=2;p{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>c,toc:()=>l});var r=n(87462),a=(n(67294),n(3905));const o={},i=void 0,c={unversionedId:"data-sources/primate-ai-json",id:"version-3.18/data-sources/primate-ai-json",title:"primate-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/primate-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/primate-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/primate-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/primate-ai-json.md",tags:[],version:"3.18",frontMatter:{}},l=[],p={toc:l},s="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(s,(0,r.Z)({},p,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"primateAI":[\n {\n "hgnc":"TP53",\n "scorePercentile":0.3,\n }\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"scorePercentile"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/f048ed9e.12166596.js b/assets/js/f048ed9e.12166596.js new file mode 100644 index 00000000..ee2b592c --- /dev/null +++ b/assets/js/f048ed9e.12166596.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9962],{3905:(e,n,t)=>{t.d(n,{Zo:()=>d,kt:()=>h});var a=t(7294);function i(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function o(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function l(e){for(var n=1;n=0||(i[t]=e[t]);return i}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(i[t]=e[t])}return i}var s=a.createContext({}),c=function(e){var n=a.useContext(s),t=n;return e&&(t="function"==typeof e?e(n):l(l({},n),e)),t},d=function(e){var n=c(e.components);return a.createElement(s.Provider,{value:n},e.children)},m="mdxType",u={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},p=a.forwardRef((function(e,n){var t=e.components,i=e.mdxType,o=e.originalType,s=e.parentName,d=r(e,["components","mdxType","originalType","parentName"]),m=c(t),p=i,h=m["".concat(s,".").concat(p)]||m[p]||u[p]||o;return t?a.createElement(h,l(l({ref:n},d),{},{components:t})):a.createElement(h,l({ref:n},d))}));function h(e,n){var t=arguments,i=n&&n.mdxType;if("string"==typeof e||i){var o=t.length,l=new Array(o);l[0]=p;var r={};for(var s in n)hasOwnProperty.call(n,s)&&(r[s]=n[s]);r.originalType=e,r[m]="string"==typeof e?e:i,l[1]=r;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>l,default:()=>m,frontMatter:()=>o,metadata:()=>r,toc:()=>s});var a=t(7462),i=(t(7294),t(3905));const o={title:"Getting Started"},l=void 0,r={unversionedId:"introduction/getting-started",id:"introduction/getting-started",title:"Getting Started",description:"Illumina Connected Annotations is written in C# using .NET Core (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files.",source:"@site/docs/introduction/getting-started.md",sourceDirName:"introduction",slug:"/introduction/getting-started",permalink:"/IlluminaConnectedAnnotationsDocumentation/introduction/getting-started",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/introduction/getting-started.md",tags:[],version:"current",frontMatter:{title:"Getting Started"},sidebar:"docs",previous:{title:"Dependencies",permalink:"/IlluminaConnectedAnnotationsDocumentation/introduction/dependencies"},next:{title:"1000 Genomes",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes"}},s=[{value:"Getting Illumina Connected Annotations",id:"getting-illumina-connected-annotations",children:[{value:"Latest Release",id:"latest-release",children:[],level:3},{value:"Quick Start",id:"quick-start",children:[],level:3},{value:"Docker",id:"docker",children:[],level:3}],level:2},{value:"Downloading the data files",id:"downloading-the-data-files",children:[],level:2},{value:"Download a test VCF file",id:"download-a-test-vcf-file",children:[],level:2},{value:"Running Illumina Connected Annotations",id:"running-illumina-connected-annotations",children:[],level:2},{value:"The Illumina Connected Annotations command line",id:"the-illumina-connected-annotations-command-line",children:[{value:"Specifying annotation sources",id:"specifying-annotation-sources",children:[],level:3}],level:2}],c={toc:s},d="wrapper";function m(e){let{components:n,...o}=e;return(0,i.kt)(d,(0,a.Z)({},c,o,{components:n,mdxType:"MDXLayout"}),(0,i.kt)("p",null,"Illumina Connected Annotations is written in C# using ",(0,i.kt)("a",{parentName:"p",href:"https://www.microsoft.com/net/download/core"},".NET Core")," (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files."),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Illumina Connected Annotations currently uses .NET6.0. Please make sure that you have the most current runtime from the ",(0,i.kt)("a",{parentName:"p",href:"https://www.microsoft.com/net/download/core"},".NET Core downloads")," page."))),(0,i.kt)("h2",{id:"getting-illumina-connected-annotations"},"Getting Illumina Connected Annotations"),(0,i.kt)("h3",{id:"latest-release"},"Latest Release"),(0,i.kt)("p",null,"Please visit ",(0,i.kt)("a",{parentName:"p",href:"https://developer.illumina.com/IlluminaConnectedAnnotations"},"Illumina Connected Annotations"),". to obtain the latest release."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"mkdir -p IlluminaConnectedAnnotations/Data\ncd IlluminaConnectedAnnotations\nunzip IlluminaConnectedAnnotations-3.18.1-net6.0.zip\n")),(0,i.kt)("h3",{id:"quick-start"},"Quick Start"),(0,i.kt)("p",null,"If you want to get started right away, we've created ",(0,i.kt)("a",{target:"_blank",href:t(172).Z},"a script")," that unzips the Illumina Connected Annotations build, downloads the annotation data, and starts annotating a test file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"bash ./TestIlluminaConnectedAnnotations.sh IlluminaConnectedAnnotationsBuild.zip\n")),(0,i.kt)("p",null,"We have verified that this script works on Windows (using Git Bash or WSL), Linux, and Mac OS X."),(0,i.kt)("h3",{id:"docker"},"Docker"),(0,i.kt)("p",null,"Obtain the docker image in a zip file (e.g. IlluminaConnectedAnnotations-v3.21.0-net6.0-docker.tar.gz), and load it as follows"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"docker load < IlluminaConnectedAnnotations-v3.21.0-net6.0-docker.tar.gz\n")),(0,i.kt)("p",null,"For Docker, we have special instructions for running the Downloader:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"docker run --rm -it -v local/data/folder:/scratch IlluminaConnectedAnnotations:v3.21.0 Downloader --ga GRCh37 -o /scratch\n")),(0,i.kt)("p",null,"Similarly, we have special instructions for running IlluminaConnectedAnnotations (Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/HiSeq.10000.vcf.gz"},"a toy VCF")," in case you need it):"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"docker run --rm -it -v local/data/folder:/scratch IlluminaConnectedAnnotations:v3.21.0 IlluminaConnectedAnnotations -c /scratch/Cache/ \\\n -r /scratch/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n --sd /scratch/SupplementaryAnnotation/GRCh37 \\\n -i /scratch/HiSeq.10000.vcf.gz -o /scratch/HiSeq\n")),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Please note that since our data files are usually accessed through a Docker volume, there is a noticeable performance penalty when running Illumina Connected Annotations in Docker."))),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"For convenience, the user is encouraged to create aliases for the docker commands. For example:"),(0,i.kt)("pre",{parentName:"div"},(0,i.kt)("code",{parentName:"pre",className:"language-bash"},'alias IlluminaConnectedAnnotations="docker run --rm -it -v local/data/folder:/scratch IlluminaConnectedAnnotations:v3.21.0 IlluminaConnectedAnnotations"\n')))),(0,i.kt)("h2",{id:"downloading-the-data-files"},"Downloading the data files"),(0,i.kt)("p",null,"To download the latest data sources (or update the ones that you already have), use the following command to automate the download from S3:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/net6.0/Downloader.dll \\\n --ga GRCh37 \\\n -o Data\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"--ga")," argument specifies the genome assembly which can be ",(0,i.kt)("inlineCode",{parentName:"li"},"GRCh37"),", ",(0,i.kt)("inlineCode",{parentName:"li"},"GRCh38"),", or ",(0,i.kt)("inlineCode",{parentName:"li"},"both"),"."),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output directory")),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Glitches in the Matrix")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Every once in a while, the download process does not go smoothly. Perhaps the internet connection cut out or you ran out of disk space. The Downloader attempts to detect these situations by checking the file sizes at the very end. If you see that a file was marked ",(0,i.kt)("inlineCode",{parentName:"p"},"truncated"),", try fixing the root cause and running the downloader again."))),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"From time to time, you can re-run the Downloader to get the latest annotation files. It will only download the files that changed."))),(0,i.kt)("h2",{id:"download-a-test-vcf-file"},"Download a test VCF file"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/HiSeq.10000.vcf.gz"},"a toy VCF file")," you can play around with:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"curl -O https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/HiSeq.10000.vcf.gz\n")),(0,i.kt)("h2",{id:"running-illumina-connected-annotations"},"Running Illumina Connected Annotations"),(0,i.kt)("p",null,"Once you have downloaded the data sets, use the following command to annotate your VCF:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet Annotator.dll \\\n -c Data/Cache \\\n --sd Data/SupplementaryAnnotation/GRCh37 \\\n -r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n -i HiSeq.10000.vcf.gz \\\n -o HiSeq.10000\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-c")," argument specifies the cache directory"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"--sd")," argument specifies the supplementary annotation directory"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-r")," argument specifies the compressed reference path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input VCF path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output filename prefix")),(0,i.kt)("p",null,"When running Illumina Connected Annotations, performance metrics are shown as it evaluates each chromosome in the input VCF file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"---------------------------------------------------------------------------\nIlluminaConnectedAnnotations (c) 2020 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0\n---------------------------------------------------------------------------\n\nInitialization Time Positions/s\n---------------------------------------------------------------------------\nCache 00:00:00.0\nSA Position Scan 00:00:00.0 153,634\n\nReference Preload Annotation Variants/s\n---------------------------------------------------------------------------\nchr1 00:00:00.2 00:00:00.8 11,873\n\nSummary Time Percent\n---------------------------------------------------------------------------\nInitialization 00:00:00.0 1.5 %\nPreload 00:00:00.2 4.9 %\nAnnotation 00:00:00.8 18.5 %\n\nTime: 00:00:04.4\n")),(0,i.kt)("p",null,"The output will be a JSON file called ",(0,i.kt)("inlineCode",{parentName:"p"},"HiSeq.10000.json.gz"),". Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/HiSeq.10000.json.gz"},"the full JSON file"),"."),(0,i.kt)("h2",{id:"the-illumina-connected-annotations-command-line"},"The Illumina Connected Annotations command line"),(0,i.kt)("p",null,"The full command line options can be viewed by using the ",(0,i.kt)("inlineCode",{parentName:"p"},"-h")," option or no options"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet Annotator.dll\n---------------------------------------------------------------------------\nIlluminaConnectedAnnotations (c) 2020 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0\n---------------------------------------------------------------------------\n\nUSAGE: dotnet Annotator.dll -i -c --sd -r -o \nAnnotates a set of variants\n\nOPTIONS:\n --cache, -c \n input cache directory\n --in, -i input VCF path\n --out, -o output file path\n --ref, -r input compressed reference sequence path\n --sd input supplementary annotation directory\n --sources, -s annotation data sources to be used (comma\n separated list of supported tags)\n --force-mt forces to annotate mitochondrial variants\n --legacy-vids enables support for legacy VIDs\n --enable-dq report DQ from VCF samples field\n --enable-bidirectional-fusions\n enables support for bidirectional gene fusions\n --str user provided STR annotation TSV file\n --vcf-info additional vcf info field keys (comma separated)\n desired in the output\n --vcf-sample-info \n additional vcf format field keys (comma separated)\n desired in the output\n --help, -h displays the help menu\n --version, -v displays the version\n\nSupplementary annotation version: 69, Reference version: 7\n")),(0,i.kt)("h3",{id:"specifying-annotation-sources"},"Specifying annotation sources"),(0,i.kt)("p",null,"By default, Illumina Connected Annotations will use all available data sources. However, the user can customize the set of sources using the ",(0,i.kt)("inlineCode",{parentName:"p"},"--sources|-s")," option. If an unknown source is specified, a warning message will be printed."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet Annotator.dll \\\n -c Data/Cache/GRCh37 \\\n --sd Data/SupplementaryAnnotation/GRCh37 \\\n -r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n -i HiSeq.10000.vcf.gz \\\n -o HiSeq.10000 \\\n -s omim,gnomad,ense\n ---------------------------------------------------------------------------\n IlluminaConnectedAnnotations (c) 2020 Illumina, Inc.\n Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0\n ---------------------------------------------------------------------------\n\n WARNING: Unknown tag in data-sources: ense.\n Available values are: aminoAcidConservation,primateAI,dbsnp,spliceAI,revel,cosmic,clinvar,gnomad,\n mitomap,oneKg,gmeVariome,topmed,clingen,decipher,gnomAD-preview,clingenDosageSensitivityMap,\n gerpScore,dannScore,omim,clingenGeneValidity,phylopScore,lowComplexityRegion,refMinor,\n heteroplasmy,Ensembl,RefSeq\n\n Initialization Time Positions/s\n ---------------------------------------------------------------------------\n SA Position Scan 00:00:00.3 307,966\n ....\n ..\n")),(0,i.kt)("p",null,"The list of available values is compiled from the files provided (using ",(0,i.kt)("inlineCode",{parentName:"p"},"-c")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"--sd")," options)."))}m.isMDXComponent=!0},172:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/files/TestIlluminaConnectedAnnotations-e26785a7184802763e147e22e2a39eb6.sh"}}]); \ No newline at end of file diff --git a/assets/js/f048ed9e.7b91fe00.js b/assets/js/f048ed9e.7b91fe00.js deleted file mode 100644 index 982f1d9c..00000000 --- a/assets/js/f048ed9e.7b91fe00.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2696],{3905:(e,n,t)=>{t.d(n,{Zo:()=>d,kt:()=>h});var a=t(67294);function i(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function o(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function l(e){for(var n=1;n=0||(i[t]=e[t]);return i}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(i[t]=e[t])}return i}var s=a.createContext({}),c=function(e){var n=a.useContext(s),t=n;return e&&(t="function"==typeof e?e(n):l(l({},n),e)),t},d=function(e){var n=c(e.components);return a.createElement(s.Provider,{value:n},e.children)},m="mdxType",u={inlineCode:"code",wrapper:function(e){var n=e.children;return a.createElement(a.Fragment,{},n)}},p=a.forwardRef((function(e,n){var t=e.components,i=e.mdxType,o=e.originalType,s=e.parentName,d=r(e,["components","mdxType","originalType","parentName"]),m=c(t),p=i,h=m["".concat(s,".").concat(p)]||m[p]||u[p]||o;return t?a.createElement(h,l(l({ref:n},d),{},{components:t})):a.createElement(h,l({ref:n},d))}));function h(e,n){var t=arguments,i=n&&n.mdxType;if("string"==typeof e||i){var o=t.length,l=new Array(o);l[0]=p;var r={};for(var s in n)hasOwnProperty.call(n,s)&&(r[s]=n[s]);r.originalType=e,r[m]="string"==typeof e?e:i,l[1]=r;for(var c=2;c{t.r(n),t.d(n,{contentTitle:()=>l,default:()=>m,frontMatter:()=>o,metadata:()=>r,toc:()=>s});var a=t(87462),i=(t(67294),t(3905));const o={title:"Getting Started"},l=void 0,r={unversionedId:"introduction/getting-started",id:"introduction/getting-started",title:"Getting Started",description:"Illumina Connected Annotations is written in C# using .NET Core (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files.",source:"@site/docs/introduction/getting-started.md",sourceDirName:"introduction",slug:"/introduction/getting-started",permalink:"/IlluminaConnectedAnnotationsDocumentation/introduction/getting-started",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/introduction/getting-started.md",tags:[],version:"current",frontMatter:{title:"Getting Started"},sidebar:"docs",previous:{title:"Dependencies",permalink:"/IlluminaConnectedAnnotationsDocumentation/introduction/dependencies"},next:{title:"1000 Genomes",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes"}},s=[{value:"Getting Illumina Connected Annotations",id:"getting-illumina-connected-annotations",children:[{value:"Latest Release",id:"latest-release",children:[],level:3},{value:"Quick Start",id:"quick-start",children:[],level:3},{value:"Docker",id:"docker",children:[],level:3}],level:2},{value:"Downloading the data files",id:"downloading-the-data-files",children:[],level:2},{value:"Download a test VCF file",id:"download-a-test-vcf-file",children:[],level:2},{value:"Running Illumina Connected Annotations",id:"running-illumina-connected-annotations",children:[],level:2},{value:"The Illumina Connected Annotations command line",id:"the-illumina-connected-annotations-command-line",children:[{value:"Specifying annotation sources",id:"specifying-annotation-sources",children:[],level:3}],level:2}],c={toc:s},d="wrapper";function m(e){let{components:n,...o}=e;return(0,i.kt)(d,(0,a.Z)({},c,o,{components:n,mdxType:"MDXLayout"}),(0,i.kt)("p",null,"Illumina Connected Annotations is written in C# using ",(0,i.kt)("a",{parentName:"p",href:"https://www.microsoft.com/net/download/core"},".NET Core")," (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files."),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Illumina Connected Annotations currently uses .NET6.0. Please make sure that you have the most current runtime from the ",(0,i.kt)("a",{parentName:"p",href:"https://www.microsoft.com/net/download/core"},".NET Core downloads")," page."))),(0,i.kt)("h2",{id:"getting-illumina-connected-annotations"},"Getting Illumina Connected Annotations"),(0,i.kt)("h3",{id:"latest-release"},"Latest Release"),(0,i.kt)("p",null,"Please visit ",(0,i.kt)("a",{parentName:"p",href:"https://developer.illumina.com/IlluminaConnectedAnnotations"},"Illumina Connected Annotations"),". to obtain the latest release."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"mkdir -p IlluminaConnectedAnnotations/Data\ncd IlluminaConnectedAnnotations\nunzip IlluminaConnectedAnnotations-3.18.1-net6.0.zip\n")),(0,i.kt)("h3",{id:"quick-start"},"Quick Start"),(0,i.kt)("p",null,"If you want to get started right away, we've created ",(0,i.kt)("a",{target:"_blank",href:t(30172).Z},"a script")," that unzips the Illumina Connected Annotations build, downloads the annotation data, and starts annotating a test file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"bash ./TestIlluminaConnectedAnnotations.sh IlluminaConnectedAnnotationsBuild.zip\n")),(0,i.kt)("p",null,"We have verified that this script works on Windows (using Git Bash or WSL), Linux, and Mac OS X."),(0,i.kt)("h3",{id:"docker"},"Docker"),(0,i.kt)("p",null,"Obtain the docker image in a zip file (e.g. IlluminaConnectedAnnotations-v3.21.0-net6.0-docker.tar.gz), and load it as follows"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"docker load < IlluminaConnectedAnnotations-v3.21.0-net6.0-docker.tar.gz\n")),(0,i.kt)("p",null,"For Docker, we have special instructions for running the Downloader:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"docker run --rm -it -v local/data/folder:/scratch IlluminaConnectedAnnotations:v3.21.0 Downloader --ga GRCh37 -o /scratch\n")),(0,i.kt)("p",null,"Similarly, we have special instructions for running IlluminaConnectedAnnotations (Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/HiSeq.10000.vcf.gz"},"a toy VCF")," in case you need it):"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"docker run --rm -it -v local/data/folder:/scratch IlluminaConnectedAnnotations:v3.21.0 IlluminaConnectedAnnotations -c /scratch/Cache/ \\\n -r /scratch/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n --sd /scratch/SupplementaryAnnotation/GRCh37 \\\n -i /scratch/HiSeq.10000.vcf.gz -o /scratch/HiSeq\n")),(0,i.kt)("div",{className:"admonition admonition-caution alert alert--warning"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"16",height:"16",viewBox:"0 0 16 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M8.893 1.5c-.183-.31-.52-.5-.887-.5s-.703.19-.886.5L.138 13.499a.98.98 0 0 0 0 1.001c.193.31.53.501.886.501h13.964c.367 0 .704-.19.877-.5a1.03 1.03 0 0 0 .01-1.002L8.893 1.5zm.133 11.497H6.987v-2.003h2.039v2.003zm0-3.004H6.987V5.987h2.039v4.006z"}))),"caution")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Please note that since our data files are usually accessed through a Docker volume, there is a noticeable performance penalty when running Illumina Connected Annotations in Docker."))),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"For convenience, the user is encouraged to create aliases for the docker commands. For example:"),(0,i.kt)("pre",{parentName:"div"},(0,i.kt)("code",{parentName:"pre",className:"language-bash"},'alias IlluminaConnectedAnnotations="docker run --rm -it -v local/data/folder:/scratch IlluminaConnectedAnnotations:v3.21.0 IlluminaConnectedAnnotations"\n')))),(0,i.kt)("h2",{id:"downloading-the-data-files"},"Downloading the data files"),(0,i.kt)("p",null,"To download the latest data sources (or update the ones that you already have), use the following command to automate the download from S3:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet bin/Release/net6.0/Downloader.dll \\\n --ga GRCh37 \\\n -o Data\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"--ga")," argument specifies the genome assembly which can be ",(0,i.kt)("inlineCode",{parentName:"li"},"GRCh37"),", ",(0,i.kt)("inlineCode",{parentName:"li"},"GRCh38"),", or ",(0,i.kt)("inlineCode",{parentName:"li"},"both"),"."),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output directory")),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Glitches in the Matrix")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Every once in a while, the download process does not go smoothly. Perhaps the internet connection cut out or you ran out of disk space. The Downloader attempts to detect these situations by checking the file sizes at the very end. If you see that a file was marked ",(0,i.kt)("inlineCode",{parentName:"p"},"truncated"),", try fixing the root cause and running the downloader again."))),(0,i.kt)("div",{className:"admonition admonition-tip alert alert--success"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"12",height:"16",viewBox:"0 0 12 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M6.5 0C3.48 0 1 2.19 1 5c0 .92.55 2.25 1 3 1.34 2.25 1.78 2.78 2 4v1h5v-1c.22-1.22.66-1.75 2-4 .45-.75 1-2.08 1-3 0-2.81-2.48-5-5.5-5zm3.64 7.48c-.25.44-.47.8-.67 1.11-.86 1.41-1.25 2.06-1.45 3.23-.02.05-.02.11-.02.17H5c0-.06 0-.13-.02-.17-.2-1.17-.59-1.83-1.45-3.23-.2-.31-.42-.67-.67-1.11C2.44 6.78 2 5.65 2 5c0-2.2 2.02-4 4.5-4 1.22 0 2.36.42 3.22 1.19C10.55 2.94 11 3.94 11 5c0 .66-.44 1.78-.86 2.48zM4 14h5c-.23 1.14-1.3 2-2.5 2s-2.27-.86-2.5-2z"}))),"tip")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"From time to time, you can re-run the Downloader to get the latest annotation files. It will only download the files that changed."))),(0,i.kt)("h2",{id:"download-a-test-vcf-file"},"Download a test VCF file"),(0,i.kt)("p",null,"Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/HiSeq.10000.vcf.gz"},"a toy VCF file")," you can play around with:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"curl -O https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/HiSeq.10000.vcf.gz\n")),(0,i.kt)("h2",{id:"running-illumina-connected-annotations"},"Running Illumina Connected Annotations"),(0,i.kt)("p",null,"Once you have downloaded the data sets, use the following command to annotate your VCF:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet Annotator.dll \\\n -c Data/Cache \\\n --sd Data/SupplementaryAnnotation/GRCh37 \\\n -r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n -i HiSeq.10000.vcf.gz \\\n -o HiSeq.10000\n")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-c")," argument specifies the cache directory"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"--sd")," argument specifies the supplementary annotation directory"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-r")," argument specifies the compressed reference path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-i")," argument specifies the input VCF path"),(0,i.kt)("li",{parentName:"ul"},"the ",(0,i.kt)("inlineCode",{parentName:"li"},"-o")," argument specifies the output filename prefix")),(0,i.kt)("p",null,"When running Illumina Connected Annotations, performance metrics are shown as it evaluates each chromosome in the input VCF file:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"---------------------------------------------------------------------------\nIlluminaConnectedAnnotations (c) 2020 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0\n---------------------------------------------------------------------------\n\nInitialization Time Positions/s\n---------------------------------------------------------------------------\nCache 00:00:00.0\nSA Position Scan 00:00:00.0 153,634\n\nReference Preload Annotation Variants/s\n---------------------------------------------------------------------------\nchr1 00:00:00.2 00:00:00.8 11,873\n\nSummary Time Percent\n---------------------------------------------------------------------------\nInitialization 00:00:00.0 1.5 %\nPreload 00:00:00.2 4.9 %\nAnnotation 00:00:00.8 18.5 %\n\nTime: 00:00:04.4\n")),(0,i.kt)("p",null,"The output will be a JSON file called ",(0,i.kt)("inlineCode",{parentName:"p"},"HiSeq.10000.json.gz"),". Here's ",(0,i.kt)("a",{parentName:"p",href:"https://illumina.github.io/IlluminaConnectedAnnotationsDocumentation/files/HiSeq.10000.json.gz"},"the full JSON file"),"."),(0,i.kt)("h2",{id:"the-illumina-connected-annotations-command-line"},"The Illumina Connected Annotations command line"),(0,i.kt)("p",null,"The full command line options can be viewed by using the ",(0,i.kt)("inlineCode",{parentName:"p"},"-h")," option or no options"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet Annotator.dll\n---------------------------------------------------------------------------\nIlluminaConnectedAnnotations (c) 2020 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0\n---------------------------------------------------------------------------\n\nUSAGE: dotnet Annotator.dll -i -c --sd -r -o \nAnnotates a set of variants\n\nOPTIONS:\n --cache, -c \n input cache directory\n --in, -i input VCF path\n --out, -o output file path\n --ref, -r input compressed reference sequence path\n --sd input supplementary annotation directory\n --sources, -s annotation data sources to be used (comma\n separated list of supported tags)\n --force-mt forces to annotate mitochondrial variants\n --legacy-vids enables support for legacy VIDs\n --enable-dq report DQ from VCF samples field\n --enable-bidirectional-fusions\n enables support for bidirectional gene fusions\n --str user provided STR annotation TSV file\n --vcf-info additional vcf info field keys (comma separated)\n desired in the output\n --vcf-sample-info \n additional vcf format field keys (comma separated)\n desired in the output\n --help, -h displays the help menu\n --version, -v displays the version\n\nSupplementary annotation version: 69, Reference version: 7\n")),(0,i.kt)("h3",{id:"specifying-annotation-sources"},"Specifying annotation sources"),(0,i.kt)("p",null,"By default, Illumina Connected Annotations will use all available data sources. However, the user can customize the set of sources using the ",(0,i.kt)("inlineCode",{parentName:"p"},"--sources|-s")," option. If an unknown source is specified, a warning message will be printed."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-bash"},"dotnet Annotator.dll \\\n -c Data/Cache/GRCh37 \\\n --sd Data/SupplementaryAnnotation/GRCh37 \\\n -r Data/References/Homo_sapiens.GRCh37.Nirvana.dat \\\n -i HiSeq.10000.vcf.gz \\\n -o HiSeq.10000 \\\n -s omim,gnomad,ense\n ---------------------------------------------------------------------------\n IlluminaConnectedAnnotations (c) 2020 Illumina, Inc.\n Stromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0\n ---------------------------------------------------------------------------\n\n WARNING: Unknown tag in data-sources: ense.\n Available values are: aminoAcidConservation,primateAI,dbsnp,spliceAI,revel,cosmic,clinvar,gnomad,\n mitomap,oneKg,gmeVariome,topmed,clingen,decipher,gnomAD-preview,clingenDosageSensitivityMap,\n gerpScore,dannScore,omim,clingenGeneValidity,phylopScore,lowComplexityRegion,refMinor,\n heteroplasmy,Ensembl,RefSeq\n\n Initialization Time Positions/s\n ---------------------------------------------------------------------------\n SA Position Scan 00:00:00.3 307,966\n ....\n ..\n")),(0,i.kt)("p",null,"The list of available values is compiled from the files provided (using ",(0,i.kt)("inlineCode",{parentName:"p"},"-c")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"--sd")," options)."))}m.isMDXComponent=!0},30172:(e,n,t)=>{t.d(n,{Z:()=>a});const a=t.p+"assets/files/TestIlluminaConnectedAnnotations-e26785a7184802763e147e22e2a39eb6.sh"}}]); \ No newline at end of file diff --git a/assets/js/f10eb61d.ad246840.js b/assets/js/f10eb61d.ad246840.js deleted file mode 100644 index 318958bd..00000000 --- a/assets/js/f10eb61d.ad246840.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1493],{50677:e=>{e.exports=JSON.parse('{"pluginId":"default","version":"3.17","label":"3.17","banner":"unmaintained","badge":true,"className":"docs-version-3.17","isLast":false,"docsSidebars":{"version-3.17/docs":[{"type":"category","label":"Introduction","items":[{"type":"link","label":"Introduction","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/","docId":"introduction/introduction"},{"type":"link","label":"Dependencies","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/introduction/dependencies","docId":"introduction/dependencies"},{"type":"link","label":"Getting Started","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/introduction/getting-started","docId":"introduction/getting-started"},{"type":"link","label":"Parsing Nirvana JSON","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/introduction/parsing-json","docId":"introduction/parsing-json"},{"type":"link","label":"Annotating COVID-19","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/introduction/covid19","docId":"introduction/covid19"}],"collapsible":true,"collapsed":true},{"type":"category","label":"Data Sources","items":[{"type":"link","label":"1000 Genomes","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/1000Genomes","docId":"data-sources/1000Genomes"},{"type":"link","label":"Amino Acid Conservation","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/amino-acid-conservation","docId":"data-sources/amino-acid-conservation"},{"type":"link","label":"ClinGen","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/clingen","docId":"data-sources/clingen"},{"type":"link","label":"ClinVar","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/clinvar","docId":"data-sources/clinvar"},{"type":"link","label":"COSMIC","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/cosmic","docId":"data-sources/cosmic"},{"type":"link","label":"dbSNP","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/dbsnp","docId":"data-sources/dbsnp"},{"type":"link","label":"FusionCatcher","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/fusioncatcher","docId":"data-sources/fusioncatcher"},{"type":"link","label":"gnomAD","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/gnomad","docId":"data-sources/gnomad"},{"type":"link","label":"Mitochondrial Heteroplasmy","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/mito-heteroplasmy","docId":"data-sources/mito-heteroplasmy"},{"type":"link","label":"MITOMAP","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/mitomap","docId":"data-sources/mitomap"},{"type":"link","label":"OMIM","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/omim","docId":"data-sources/omim"},{"type":"link","label":"PhyloP","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/phylop","docId":"data-sources/phylop"},{"type":"link","label":"Primate AI","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/primate-ai","docId":"data-sources/primate-ai"},{"type":"link","label":"REVEL","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/revel","docId":"data-sources/revel"},{"type":"link","label":"Splice AI","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/splice-ai","docId":"data-sources/splice-ai"},{"type":"link","label":"TOPMed","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/topmed","docId":"data-sources/topmed"}],"collapsible":true,"collapsed":true},{"type":"category","label":"File Formats","items":[{"type":"link","label":"Nirvana JSON File Format","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/file-formats/nirvana-json-file-format","docId":"file-formats/nirvana-json-file-format"},{"type":"link","label":"Custom Annotations","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/file-formats/custom-annotations","docId":"file-formats/custom-annotations"}],"collapsible":true,"collapsed":true},{"type":"category","label":"Core Functionality","items":[{"type":"link","label":"Canonical Transcripts","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/core-functionality/canonical-transcripts","docId":"core-functionality/canonical-transcripts"},{"type":"link","label":"Gene Fusion Detection","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/core-functionality/gene-fusions","docId":"core-functionality/gene-fusions"},{"type":"link","label":"MNV Recomposition","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/core-functionality/mnv-recomposition","docId":"core-functionality/mnv-recomposition"},{"type":"link","label":"Variant IDs","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/core-functionality/variant-ids","docId":"core-functionality/variant-ids"}],"collapsible":true,"collapsed":true},{"type":"category","label":"Utilities","items":[{"type":"link","label":"Jasix","href":"/IlluminaConnectedAnnotationsDocumentation/3.17/utilities/jasix","docId":"utilities/jasix"}],"collapsible":true,"collapsed":true}]},"docs":{"core-functionality/canonical-transcripts":{"id":"core-functionality/canonical-transcripts","title":"Canonical Transcripts","description":"Overview","sidebar":"version-3.17/docs"},"core-functionality/gene-fusions":{"id":"core-functionality/gene-fusions","title":"Gene Fusion Detection","description":"Overview","sidebar":"version-3.17/docs"},"core-functionality/mnv-recomposition":{"id":"core-functionality/mnv-recomposition","title":"MNV Recomposition","description":"Overview","sidebar":"version-3.17/docs"},"core-functionality/variant-ids":{"id":"core-functionality/variant-ids","title":"Variant IDs","description":"Overview","sidebar":"version-3.17/docs"},"data-sources/1000Genomes":{"id":"data-sources/1000Genomes","title":"1000 Genomes","description":"Overview","sidebar":"version-3.17/docs"},"data-sources/1000Genomes-snv-json":{"id":"data-sources/1000Genomes-snv-json","title":"1000Genomes-snv-json","description":"| Field | Type | Notes |"},"data-sources/1000Genomes-sv-json":{"id":"data-sources/1000Genomes-sv-json","title":"1000Genomes-sv-json","description":"| Field | Type | Notes |"},"data-sources/amino-acid-conservation":{"id":"data-sources/amino-acid-conservation","title":"Amino Acid Conservation","description":"Overview","sidebar":"version-3.17/docs"},"data-sources/amino-acid-conservation-json":{"id":"data-sources/amino-acid-conservation-json","title":"amino-acid-conservation-json","description":"| Field | Type | Notes |"},"data-sources/clingen":{"id":"data-sources/clingen","title":"ClinGen","description":"Overview","sidebar":"version-3.17/docs"},"data-sources/clingen-dosage-json":{"id":"data-sources/clingen-dosage-json","title":"clingen-dosage-json","description":"| Field | Type | Notes |"},"data-sources/clingen-gene-validity-json":{"id":"data-sources/clingen-gene-validity-json","title":"clingen-gene-validity-json","description":"| Field | Type | Notes |"},"data-sources/clingen-json":{"id":"data-sources/clingen-json","title":"clingen-json","description":"| Field | Type | Notes |"},"data-sources/clinvar":{"id":"data-sources/clinvar","title":"ClinVar","description":"Overview","sidebar":"version-3.17/docs"},"data-sources/clinvar-json":{"id":"data-sources/clinvar-json","title":"clinvar-json","description":"| Field | Type | Notes |"},"data-sources/cosmic":{"id":"data-sources/cosmic","title":"COSMIC","description":"Overview","sidebar":"version-3.17/docs"},"data-sources/cosmic-json":{"id":"data-sources/cosmic-json","title":"cosmic-json","description":"| Field | Type | Notes |"},"data-sources/dbsnp":{"id":"data-sources/dbsnp","title":"dbSNP","description":"Overview","sidebar":"version-3.17/docs"},"data-sources/dbsnp-json":{"id":"data-sources/dbsnp-json","title":"dbsnp-json","description":"| Field | Type | Notes |"},"data-sources/fusioncatcher":{"id":"data-sources/fusioncatcher","title":"FusionCatcher","description":"Overview","sidebar":"version-3.17/docs"},"data-sources/fusioncatcher-json":{"id":"data-sources/fusioncatcher-json","title":"fusioncatcher-json","description":"| Field | Type | Notes |"},"data-sources/gnomad":{"id":"data-sources/gnomad","title":"gnomAD","description":"Overview","sidebar":"version-3.17/docs"},"data-sources/gnomad-lof-json":{"id":"data-sources/gnomad-lof-json","title":"gnomad-lof-json","description":"| Field | Type | Notes |"},"data-sources/gnomad-small-variants-json":{"id":"data-sources/gnomad-small-variants-json","title":"gnomad-small-variants-json","description":"| Field | Type | Notes |"},"data-sources/mito-heteroplasmy":{"id":"data-sources/mito-heteroplasmy","title":"Mitochondrial Heteroplasmy","description":"Overview","sidebar":"version-3.17/docs"},"data-sources/mitomap":{"id":"data-sources/mitomap","title":"MITOMAP","description":"Overview","sidebar":"version-3.17/docs"},"data-sources/mitomap-small-variants-json":{"id":"data-sources/mitomap-small-variants-json","title":"mitomap-small-variants-json","description":"| Field | Type | Notes |"},"data-sources/mitomap-structural-variants-json":{"id":"data-sources/mitomap-structural-variants-json","title":"mitomap-structural-variants-json","description":"| Field | Type | Notes |"},"data-sources/omim":{"id":"data-sources/omim","title":"OMIM","description":"Overview","sidebar":"version-3.17/docs"},"data-sources/omim-json":{"id":"data-sources/omim-json","title":"omim-json","description":"| Field | Type | Notes |"},"data-sources/phylop":{"id":"data-sources/phylop","title":"PhyloP","description":"Overview","sidebar":"version-3.17/docs"},"data-sources/phylop-json":{"id":"data-sources/phylop-json","title":"phylop-json","description":"| Field | Type | Notes |"},"data-sources/primate-ai":{"id":"data-sources/primate-ai","title":"Primate AI","description":"Overview","sidebar":"version-3.17/docs"},"data-sources/primate-ai-json":{"id":"data-sources/primate-ai-json","title":"primate-ai-json","description":"| Field | Type | Notes |"},"data-sources/revel":{"id":"data-sources/revel","title":"REVEL","description":"Overview","sidebar":"version-3.17/docs"},"data-sources/revel-json":{"id":"data-sources/revel-json","title":"revel-json","description":"| Field | Type | Notes |"},"data-sources/splice-ai":{"id":"data-sources/splice-ai","title":"Splice AI","description":"Overview","sidebar":"version-3.17/docs"},"data-sources/splice-ai-json":{"id":"data-sources/splice-ai-json","title":"splice-ai-json","description":"| Field | Type | Notes |"},"data-sources/topmed":{"id":"data-sources/topmed","title":"TOPMed","description":"Overview","sidebar":"version-3.17/docs"},"data-sources/topmed-json":{"id":"data-sources/topmed-json","title":"topmed-json","description":"| Field | Type | Notes |"},"file-formats/custom-annotations":{"id":"file-formats/custom-annotations","title":"Custom Annotations","description":"Overview","sidebar":"version-3.17/docs"},"file-formats/nirvana-json-file-format":{"id":"file-formats/nirvana-json-file-format","title":"Nirvana JSON File Format","description":"Overview","sidebar":"version-3.17/docs"},"introduction/covid19":{"id":"introduction/covid19","title":"Annotating COVID-19","description":"The Nirvana development team is mainly focused on providing annotations for the human genome. This focus allows us to maximize our resources towards understanding human health.","sidebar":"version-3.17/docs"},"introduction/dependencies":{"id":"introduction/dependencies","title":"Dependencies","description":"All of the following dependencies have been included in this repository.","sidebar":"version-3.17/docs"},"introduction/getting-started":{"id":"introduction/getting-started","title":"Getting Started","description":"Nirvana is written in C# using .NET Core (an amazing runtime environment that currently runs on Windows, Linux, Mac OS X, and in Docker images). Once .NET Core has been downloaded, all you need to do is grab the source, compile it, and grab the data files.","sidebar":"version-3.17/docs"},"introduction/introduction":{"id":"introduction/introduction","title":"Introduction","description":"Clinical-grade variant annotation","sidebar":"version-3.17/docs"},"introduction/parsing-json":{"id":"introduction/parsing-json","title":"Parsing Nirvana JSON","description":"Why JSON?","sidebar":"version-3.17/docs"},"utilities/jasix":{"id":"utilities/jasix","title":"Jasix","description":"Overview","sidebar":"version-3.17/docs"}}}')}}]); \ No newline at end of file diff --git a/assets/js/f13f62c0.021e064a.js b/assets/js/f13f62c0.021e064a.js deleted file mode 100644 index d51db7ce..00000000 --- a/assets/js/f13f62c0.021e064a.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6882],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>u});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var c=r.createContext({}),s=function(e){var t=r.useContext(c),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},p=function(e){var t=s(e.components);return r.createElement(c.Provider,{value:t},e.children)},d="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},g=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,c=e.parentName,p=i(e,["components","mdxType","originalType","parentName"]),d=s(n),g=a,u=d["".concat(c,".").concat(g)]||d[g]||m[g]||o;return n?r.createElement(u,l(l({ref:t},p),{},{components:n})):r.createElement(u,l({ref:t},p))}));function u(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=g;var i={};for(var c in t)hasOwnProperty.call(t,c)&&(i[c]=t[c]);i.originalType=e,i[d]="string"==typeof e?e:a,l[1]=i;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>d,frontMatter:()=>o,metadata:()=>i,toc:()=>c});var r=n(87462),a=(n(67294),n(3905));const o={},l=void 0,i={unversionedId:"data-sources/fusioncatcher-json",id:"version-3.18/data-sources/fusioncatcher-json",title:"fusioncatcher-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/fusioncatcher-json.md",sourceDirName:"data-sources",slug:"/data-sources/fusioncatcher-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/fusioncatcher-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/fusioncatcher-json.md",tags:[],version:"3.18",frontMatter:{}},c=[{value:"genes",id:"genes",children:[],level:4},{value:"gene",id:"gene",children:[],level:4}],s={toc:c},p="wrapper";function d(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},' "fusionCatcher":[\n {\n "genes":{\n "first":{\n "hgnc":"ETV6",\n "isOncogene":true\n },\n "second":{\n "hgnc":"RUNX1"\n },\n "isParalogPair":true,\n "isPseudogenePair":true,\n "isReadthrough":true\n },\n "germlineSources":[\n "1000 Genomes Project"\n ],\n "somaticSources":[\n "COSMIC",\n "TCGA oesophageal carcinomas"\n ]\n }\n ]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"genes"),(0,a.kt)("td",{parentName:"tr",align:"center"},"genes object"),(0,a.kt)("td",{parentName:"tr",align:"left"},"5' gene & 3' gene")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"germlineSources"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"matches in known germline data sources")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"somaticSources"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"matches in known somatic data sources")))),(0,a.kt)("h4",{id:"genes"},"genes"),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"first"),(0,a.kt)("td",{parentName:"tr",align:"center"},"gene object"),(0,a.kt)("td",{parentName:"tr",align:"left"},"5' gene")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"second"),(0,a.kt)("td",{parentName:"tr",align:"center"},"gene object"),(0,a.kt)("td",{parentName:"tr",align:"left"},"3' gene")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"isParalogPair"),(0,a.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,a.kt)("td",{parentName:"tr",align:"left"},"true when both genes are paralogs for each other")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"isPseudogenePair"),(0,a.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,a.kt)("td",{parentName:"tr",align:"left"},"true when both genes are pseudogenes for each other")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"isReadthrough"),(0,a.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,a.kt)("td",{parentName:"tr",align:"left"},"true when this fusion gene is a readthrough event (both are on the same strand and there are no genes between them)")))),(0,a.kt)("h4",{id:"gene"},"gene"),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"gene symbol. e.g. MSH6")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"isOncogene"),(0,a.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,a.kt)("td",{parentName:"tr",align:"left"},"true when this gene is an oncogene")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/f1773292.76f6f662.js b/assets/js/f1773292.76f6f662.js deleted file mode 100644 index 9569f05d..00000000 --- a/assets/js/f1773292.76f6f662.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6831],{3905:(t,e,n)=>{n.d(e,{Zo:()=>c,kt:()=>k});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function i(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function o(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var p=r.createContext({}),d=function(t){var e=r.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},c=function(t){var e=d(t.components);return r.createElement(p.Provider,{value:e},t.children)},m="mdxType",s={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},u=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,i=t.originalType,p=t.parentName,c=l(t,["components","mdxType","originalType","parentName"]),m=d(n),u=a,k=m["".concat(p,".").concat(u)]||m[u]||s[u]||i;return n?r.createElement(k,o(o({ref:e},c),{},{components:n})):r.createElement(k,o({ref:e},c))}));function k(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var i=n.length,o=new Array(i);o[0]=u;var l={};for(var p in e)hasOwnProperty.call(e,p)&&(l[p]=e[p]);l.originalType=t,l[m]="string"==typeof t?t:a,o[1]=l;for(var d=2;d{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>m,frontMatter:()=>i,metadata:()=>l,toc:()=>p});var r=n(87462),a=(n(67294),n(3905));const i={title:"Dependencies"},o=void 0,l={unversionedId:"introduction/dependencies",id:"version-3.14/introduction/dependencies",title:"Dependencies",description:"All of the following dependencies have been included in this repository.",source:"@site/versioned_docs/version-3.14/introduction/dependencies.md",sourceDirName:"introduction",slug:"/introduction/dependencies",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/introduction/dependencies",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.14/introduction/dependencies.md",tags:[],version:"3.14",frontMatter:{title:"Dependencies"},sidebar:"version-3.14/docs",previous:{title:"Introduction",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/"},next:{title:"Getting Started",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.14/introduction/getting-started"}},p=[],d={toc:p},c="wrapper";function m(t){let{components:e,...n}=t;return(0,a.kt)(c,(0,r.Z)({},d,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("p",null,"All of the following dependencies have been included in this repository."),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Name"),(0,a.kt)("th",{parentName:"tr",align:"center"},"License"),(0,a.kt)("th",{parentName:"tr",align:null},"Usage"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/aws/aws-extensions-for-dotnet-cli"},"Amazon.Lambda")),(0,a.kt)("td",{parentName:"tr",align:"center"},"Apache"),(0,a.kt)("td",{parentName:"tr",align:null},"AWS extensions for .NET CLI")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/aws/aws-sdk-net/"},"AWSSDK")),(0,a.kt)("td",{parentName:"tr",align:"center"},"Apache"),(0,a.kt)("td",{parentName:"tr",align:null},"AWS Lambda, S3, SNS support")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://www.newtonsoft.com/json"},"Json.NET")),(0,a.kt)("td",{parentName:"tr",align:"center"},"MIT"),(0,a.kt)("td",{parentName:"tr",align:null},"JASIX utility")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/ebiggers/libdeflate"},"libdeflate")),(0,a.kt)("td",{parentName:"tr",align:"center"},"MIT"),(0,a.kt)("td",{parentName:"tr",align:null},"BlockCompression library")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/moq/moq4"},"Moq")),(0,a.kt)("td",{parentName:"tr",align:"center"},"BSD"),(0,a.kt)("td",{parentName:"tr",align:null},"Mocking framework for unit tests")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"http://www.ndesk.org/Options"},"NDesk.Options")),(0,a.kt)("td",{parentName:"tr",align:"center"},"MIT/X11"),(0,a.kt)("td",{parentName:"tr",align:null},"CommandLine library")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/xunit/xunit"},"xUnit")),(0,a.kt)("td",{parentName:"tr",align:"center"},"Apache"),(0,a.kt)("td",{parentName:"tr",align:null},"Unit testing framework")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/Dead2/zlib-ng"},"zlib-ng")),(0,a.kt)("td",{parentName:"tr",align:"center"},"zlib"),(0,a.kt)("td",{parentName:"tr",align:null},"BlockCompression library")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/facebook/zstd"},"zstd")),(0,a.kt)("td",{parentName:"tr",align:"center"},"BSD"),(0,a.kt)("td",{parentName:"tr",align:null},"BlockCompression library")))))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/f2592a37.909c2969.js b/assets/js/f2592a37.909c2969.js deleted file mode 100644 index 294d0e63..00000000 --- a/assets/js/f2592a37.909c2969.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8823],{3905:(t,e,r)=>{r.d(e,{Zo:()=>m,kt:()=>f});var n=r(67294);function a(t,e,r){return e in t?Object.defineProperty(t,e,{value:r,enumerable:!0,configurable:!0,writable:!0}):t[e]=r,t}function o(t,e){var r=Object.keys(t);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(t);e&&(n=n.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),r.push.apply(r,n)}return r}function i(t){for(var e=1;e=0||(a[r]=t[r]);return a}(t,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(t,r)&&(a[r]=t[r])}return a}var p=n.createContext({}),c=function(t){var e=n.useContext(p),r=e;return t&&(r="function"==typeof t?t(e):i(i({},e),t)),r},m=function(t){var e=c(t.components);return n.createElement(p.Provider,{value:e},t.children)},s="mdxType",u={inlineCode:"code",wrapper:function(t){var e=t.children;return n.createElement(n.Fragment,{},e)}},d=n.forwardRef((function(t,e){var r=t.components,a=t.mdxType,o=t.originalType,p=t.parentName,m=l(t,["components","mdxType","originalType","parentName"]),s=c(r),d=a,f=s["".concat(p,".").concat(d)]||s[d]||u[d]||o;return r?n.createElement(f,i(i({ref:e},m),{},{components:r})):n.createElement(f,i({ref:e},m))}));function f(t,e){var r=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var o=r.length,i=new Array(o);i[0]=d;var l={};for(var p in e)hasOwnProperty.call(e,p)&&(l[p]=e[p]);l.originalType=t,l[s]="string"==typeof t?t:a,i[1]=l;for(var c=2;c{r.r(e),r.d(e,{contentTitle:()=>i,default:()=>s,frontMatter:()=>o,metadata:()=>l,toc:()=>p});var n=r(87462),a=(r(67294),r(3905));const o={},i=void 0,l={unversionedId:"data-sources/mitomap-structural-variants-json",id:"version-3.17/data-sources/mitomap-structural-variants-json",title:"mitomap-structural-variants-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.17/data-sources/mitomap-structural-variants-json.md",sourceDirName:"data-sources",slug:"/data-sources/mitomap-structural-variants-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.17/data-sources/mitomap-structural-variants-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.17/data-sources/mitomap-structural-variants-json.md",tags:[],version:"3.17",frontMatter:{}},p=[],c={toc:p},m="wrapper";function s(t){let{components:e,...r}=t;return(0,a.kt)(m,(0,n.Z)({},c,r,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"mitomap":[ \n { \n "chromosome":"MT",\n "begin":3166,\n "end":14152,\n "variantType":"deletion",\n "reciprocalOverlap":0.18068,\n "annotationOverlap":0.42405\n }\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"chromosome"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"begin"),(0,a.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"end"),(0,a.kt)("td",{parentName:"tr",align:"center"},"integer"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"variantType"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"})),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"reciprocalOverlap"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"annotationOverlap"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"Range: 0 - 1. Specified up to 5 decimal places")))))}s.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/f262a5f6.643471a1.js b/assets/js/f262a5f6.643471a1.js new file mode 100644 index 00000000..bd73ca3a --- /dev/null +++ b/assets/js/f262a5f6.643471a1.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6969,5702],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>h});var a=n(7294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),u=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=u(e.components);return a.createElement(s.Provider,{value:t},e.children)},d="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),d=u(n),m=r,h=d["".concat(s,".").concat(m)]||d[m]||p[m]||o;return n?a.createElement(h,i(i({ref:t},c),{},{components:n})):a.createElement(h,i({ref:t},c))}));function h(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[d]="string"==typeof e?e:r,i[1]=l;for(var u=2;u{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>d,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(7462),r=(n(7294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/gerp-json",id:"data-sources/gerp-json",title:"gerp-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gerp-json.md",sourceDirName:"data-sources",slug:"/data-sources/gerp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gerp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gerp-json.md",tags:[],version:"current",frontMatter:{}},s=[],u={toc:s},c="wrapper";function d(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gerpScore": 1.27\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"gerpScore"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: -\u221e to +\u221e")))))}d.isMDXComponent=!0},1969:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>p,frontMatter:()=>i,metadata:()=>s,toc:()=>u});var a=n(7462),r=(n(7294),n(3905)),o=n(5538);const i={title:"GERP"},l=void 0,s={unversionedId:"data-sources/gerp",id:"data-sources/gerp",title:"GERP",description:"Overview",source:"@site/docs/data-sources/gerp.mdx",sourceDirName:"data-sources",slug:"/data-sources/gerp",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gerp",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gerp.mdx",tags:[],version:"current",frontMatter:{title:"GERP"},sidebar:"docs",previous:{title:"FusionCatcher",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/fusioncatcher"},next:{title:"GME Variome",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gme"}},u=[{value:"Overview",id:"overview",children:[],level:2},{value:"Source Files",id:"source-files",children:[{value:"Example GRCh37",id:"example-grch37",children:[],level:3},{value:"Example GRCh38",id:"example-grch38",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[{value:"GRCh37",id:"grch37",children:[],level:3},{value:"GRCh38",id:"grch38",children:[],level:3}],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:u},d="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"GERP identifies constrained elements in multiple alignments by quantifying substitution deficits.\nThese deficits represent substitutions that would have occurred if the element were neutral DNA, but did not occur because the element has been under functional constraint (Rejected Substitutions).\nIllumina Connected Annotations uses GERP++ which is based on a significantly faster and more statistically robust maximum likelihood estimation procedure to compute expected rates of evolution."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},'Davydov, Eugene V., et al. "Identifying a high fraction of the human genome to be under selective constraint using GERP++." ',(0,r.kt)("em",{parentName:"p"},"PLoS computational biology")," ",(0,r.kt)("strong",{parentName:"p"},"6.12")," e1001025 (2010). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1371/journal.pcbi.1001025"},"https://doi.org/10.1371/journal.pcbi.1001025")))),(0,r.kt)("h2",{id:"source-files"},"Source Files"),(0,r.kt)("h3",{id:"example-grch37"},"Example GRCh37"),(0,r.kt)("p",null,"GRCh37 file is a TSV format"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-tsv"},"chr position GERP\n1 12177 0.83\n1 12178 -0.206\n1 12179 -0.492\n1 12180 -1.66\n1 12181 0.83\n1 12182 0.83\n1 12183 -0.417\n1 12184 0.83\n")),(0,r.kt)("h3",{id:"example-grch38"},"Example GRCh38"),(0,r.kt)("p",null,"GRCh38 file is a lift-over BED format"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-tsv"},"chr pos_start pos_end GERP\n1 12646 12647 0.298\n1 12647 12648 2.63\n1 12648 12649 1.87\n1 12649 12650 0.252\n1 12650 12651 -2.06\n1 12651 12652 2.61\n1 12652 12653 3.97\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the CSV file, we are interested in columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"chr")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"position")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"GERP"))),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("p",null,"None"),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("h3",{id:"grch37"},"GRCh37"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html"},"http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html")),(0,r.kt)("h3",{id:"grch38"},"GRCh38"),(0,r.kt)("p",null,"The data is not available for GRCh38 on GERP++ website, and was obtained from ",(0,r.kt)("a",{parentName:"p",href:"https://personal.broadinstitute.org/konradk/loftee_data/GRCh38/"},"https://personal.broadinstitute.org/konradk/loftee_data/GRCh38/")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(o.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/f262a5f6.cd1b9d99.js b/assets/js/f262a5f6.cd1b9d99.js deleted file mode 100644 index 16f8588d..00000000 --- a/assets/js/f262a5f6.cd1b9d99.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[6969,5702],{3905:(e,t,n)=>{n.d(t,{Zo:()=>c,kt:()=>h});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var s=a.createContext({}),u=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},c=function(e){var t=u(e.components);return a.createElement(s.Provider,{value:t},e.children)},d="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),d=u(n),m=r,h=d["".concat(s,".").concat(m)]||d[m]||p[m]||o;return n?a.createElement(h,i(i({ref:t},c),{},{components:n})):a.createElement(h,i({ref:t},c))}));function h(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[d]="string"==typeof e?e:r,i[1]=l;for(var u=2;u{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>d,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var a=n(87462),r=(n(67294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/gerp-json",id:"data-sources/gerp-json",title:"gerp-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gerp-json.md",sourceDirName:"data-sources",slug:"/data-sources/gerp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gerp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gerp-json.md",tags:[],version:"current",frontMatter:{}},s=[],u={toc:s},c="wrapper";function d(e){let{components:t,...n}=e;return(0,r.kt)(c,(0,a.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"gerpScore": 1.27\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:null},"Field"),(0,r.kt)("th",{parentName:"tr",align:null},"Type"),(0,r.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:null},"gerpScore"),(0,r.kt)("td",{parentName:"tr",align:null},"float"),(0,r.kt)("td",{parentName:"tr",align:null},"Range: -\u221e to +\u221e")))))}d.isMDXComponent=!0},21969:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>p,frontMatter:()=>i,metadata:()=>s,toc:()=>u});var a=n(87462),r=(n(67294),n(3905)),o=n(65538);const i={title:"GERP"},l=void 0,s={unversionedId:"data-sources/gerp",id:"data-sources/gerp",title:"GERP",description:"Overview",source:"@site/docs/data-sources/gerp.mdx",sourceDirName:"data-sources",slug:"/data-sources/gerp",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gerp",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gerp.mdx",tags:[],version:"current",frontMatter:{title:"GERP"},sidebar:"docs",previous:{title:"FusionCatcher",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/fusioncatcher"},next:{title:"GME Variome",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gme"}},u=[{value:"Overview",id:"overview",children:[],level:2},{value:"Source Files",id:"source-files",children:[{value:"Example GRCh37",id:"example-grch37",children:[],level:3},{value:"Example GRCh38",id:"example-grch38",children:[],level:3},{value:"Parsing",id:"parsing",children:[],level:3}],level:2},{value:"Known Issues",id:"known-issues",children:[],level:2},{value:"Download URL",id:"download-url",children:[{value:"GRCh37",id:"grch37",children:[],level:3},{value:"GRCh38",id:"grch38",children:[],level:3}],level:2},{value:"JSON Output",id:"json-output",children:[],level:2}],c={toc:u},d="wrapper";function p(e){let{components:t,...n}=e;return(0,r.kt)(d,(0,a.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h2",{id:"overview"},"Overview"),(0,r.kt)("p",null,"GERP identifies constrained elements in multiple alignments by quantifying substitution deficits.\nThese deficits represent substitutions that would have occurred if the element were neutral DNA, but did not occur because the element has been under functional constraint (Rejected Substitutions).\nIllumina Connected Annotations uses GERP++ which is based on a significantly faster and more statistically robust maximum likelihood estimation procedure to compute expected rates of evolution."),(0,r.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,r.kt)("div",{parentName:"div",className:"admonition-heading"},(0,r.kt)("h5",{parentName:"div"},(0,r.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,r.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,r.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,r.kt)("div",{parentName:"div",className:"admonition-content"},(0,r.kt)("p",{parentName:"div"},'Davydov, Eugene V., et al. "Identifying a high fraction of the human genome to be under selective constraint using GERP++." ',(0,r.kt)("em",{parentName:"p"},"PLoS computational biology")," ",(0,r.kt)("strong",{parentName:"p"},"6.12")," e1001025 (2010). ",(0,r.kt)("a",{parentName:"p",href:"https://doi.org/10.1371/journal.pcbi.1001025"},"https://doi.org/10.1371/journal.pcbi.1001025")))),(0,r.kt)("h2",{id:"source-files"},"Source Files"),(0,r.kt)("h3",{id:"example-grch37"},"Example GRCh37"),(0,r.kt)("p",null,"GRCh37 file is a TSV format"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-tsv"},"chr position GERP\n1 12177 0.83\n1 12178 -0.206\n1 12179 -0.492\n1 12180 -1.66\n1 12181 0.83\n1 12182 0.83\n1 12183 -0.417\n1 12184 0.83\n")),(0,r.kt)("h3",{id:"example-grch38"},"Example GRCh38"),(0,r.kt)("p",null,"GRCh38 file is a lift-over BED format"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-tsv"},"chr pos_start pos_end GERP\n1 12646 12647 0.298\n1 12647 12648 2.63\n1 12648 12649 1.87\n1 12649 12650 0.252\n1 12650 12651 -2.06\n1 12651 12652 2.61\n1 12652 12653 3.97\n")),(0,r.kt)("h3",{id:"parsing"},"Parsing"),(0,r.kt)("p",null,"From the CSV file, we are interested in columns:"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"chr")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"position")),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("inlineCode",{parentName:"li"},"GERP"))),(0,r.kt)("h2",{id:"known-issues"},"Known Issues"),(0,r.kt)("p",null,"None"),(0,r.kt)("h2",{id:"download-url"},"Download URL"),(0,r.kt)("h3",{id:"grch37"},"GRCh37"),(0,r.kt)("p",null,(0,r.kt)("a",{parentName:"p",href:"http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html"},"http://mendel.stanford.edu/SidowLab/downloads/gerp/index.html")),(0,r.kt)("h3",{id:"grch38"},"GRCh38"),(0,r.kt)("p",null,"The data is not available for GRCh38 on GERP++ website, and was obtained from ",(0,r.kt)("a",{parentName:"p",href:"https://personal.broadinstitute.org/konradk/loftee_data/GRCh38/"},"https://personal.broadinstitute.org/konradk/loftee_data/GRCh38/")),(0,r.kt)("h2",{id:"json-output"},"JSON Output"),(0,r.kt)(o.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/f32a6935.6499adde.js b/assets/js/f32a6935.6499adde.js deleted file mode 100644 index 3e13d0f5..00000000 --- a/assets/js/f32a6935.6499adde.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9383],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function s(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var i=r.createContext({}),l=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):s(s({},t),e)),n},p=function(e){var t=l(e.components);return r.createElement(i.Provider,{value:t},e.children)},u="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,p=c(e,["components","mdxType","originalType","parentName"]),u=l(n),m=a,f=u["".concat(i,".").concat(m)]||u[m]||d[m]||o;return n?r.createElement(f,s(s({ref:t},p),{},{components:n})):r.createElement(f,s({ref:t},p))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,s=new Array(o);s[0]=m;var c={};for(var i in t)hasOwnProperty.call(t,i)&&(c[i]=t[i]);c.originalType=e,c[u]="string"==typeof e?e:a,s[1]=c;for(var l=2;l{n.r(t),n.d(t,{contentTitle:()=>s,default:()=>u,frontMatter:()=>o,metadata:()=>c,toc:()=>i});var r=n(87462),a=(n(67294),n(3905));const o={},s=void 0,c={unversionedId:"data-sources/dbsnp-json",id:"version-3.2.5/data-sources/dbsnp-json",title:"dbsnp-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.2.5/data-sources/dbsnp-json.md",sourceDirName:"data-sources",slug:"/data-sources/dbsnp-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/dbsnp-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/data-sources/dbsnp-json.md",tags:[],version:"3.2.5",frontMatter:{}},i=[],l={toc:i},p="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},l,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"dbsnp":[\n "rs1042821"\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"dbsnp"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,a.kt)("td",{parentName:"tr",align:"left"},"dbSNP rsIDs")))))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/f59caf00.04c5f494.js b/assets/js/f59caf00.04c5f494.js deleted file mode 100644 index 64ceefee..00000000 --- a/assets/js/f59caf00.04c5f494.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[9449],{3905:(_,E,t)=>{t.d(E,{Zo:()=>N,kt:()=>o});var e=t(67294);function A(_,E,t){return E in _?Object.defineProperty(_,E,{value:t,enumerable:!0,configurable:!0,writable:!0}):_[E]=t,_}function n(_,E){var t=Object.keys(_);if(Object.getOwnPropertySymbols){var e=Object.getOwnPropertySymbols(_);E&&(e=e.filter((function(E){return Object.getOwnPropertyDescriptor(_,E).enumerable}))),t.push.apply(t,e)}return t}function a(_){for(var E=1;E=0||(A[t]=_[t]);return A}(_,E);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(_);for(e=0;e=0||Object.prototype.propertyIsEnumerable.call(_,t)&&(A[t]=_[t])}return A}var M=e.createContext({}),l=function(_){var E=e.useContext(M),t=E;return _&&(t="function"==typeof _?_(E):a(a({},E),_)),t},N=function(_){var E=l(_.components);return e.createElement(M.Provider,{value:E},_.children)},R="mdxType",i={inlineCode:"code",wrapper:function(_){var E=_.children;return e.createElement(e.Fragment,{},E)}},F=e.forwardRef((function(_,E){var t=_.components,A=_.mdxType,n=_.originalType,M=_.parentName,N=r(_,["components","mdxType","originalType","parentName"]),R=l(t),F=A,o=R["".concat(M,".").concat(F)]||R[F]||i[F]||n;return t?e.createElement(o,a(a({ref:E},N),{},{components:t})):e.createElement(o,a({ref:E},N))}));function o(_,E){var t=arguments,A=E&&E.mdxType;if("string"==typeof _||A){var n=t.length,a=new Array(n);a[0]=F;var r={};for(var M in E)hasOwnProperty.call(E,M)&&(r[M]=E[M]);r.originalType=_,r[R]="string"==typeof _?_:A,a[1]=r;for(var l=2;l{t.r(E),t.d(E,{contentTitle:()=>a,default:()=>R,frontMatter:()=>n,metadata:()=>r,toc:()=>M});var e=t(87462),A=(t(67294),t(3905));const n={},a=void 0,r={unversionedId:"data-sources/gnomad-structural-variants-data_description",id:"version-3.21/data-sources/gnomad-structural-variants-data_description",title:"gnomad-structural-variants-data_description",description:"Bed Example",source:"@site/versioned_docs/version-3.21/data-sources/gnomad-structural-variants-data_description.md",sourceDirName:"data-sources",slug:"/data-sources/gnomad-structural-variants-data_description",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gnomad-structural-variants-data_description",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/gnomad-structural-variants-data_description.md",tags:[],version:"3.21",frontMatter:{}},M=[{value:"Bed Example",id:"bed-example",children:[],level:4},{value:"TSV Example",id:"tsv-example",children:[],level:4},{value:"Structural Variant Type Mapping",id:"structural-variant-type-mapping",children:[],level:4}],l={toc:M},N="wrapper";function R(_){let{components:E,...t}=_;return(0,A.kt)(N,(0,e.Z)({},l,t,{components:E,mdxType:"MDXLayout"}),(0,A.kt)("h4",{id:"bed-example"},"Bed Example"),(0,A.kt)("p",null,"The bed file was obtained from original source for GRCh37"),(0,A.kt)("pre",null,(0,A.kt)("code",{parentName:"pre",className:"language-scss"},"#chrom start end name svtype ALGORITHMS BOTHSIDES_SUPPORT CHR2 CPX_INTERVALS CPX_TYPE END2 ENDEVIDENCE HIGH_SR_BACKGROUND PCRPLUS_DEPLETED PESR_GT_OVERDISPERSION POS2 PROTEIN_CODING__COPY_GAIN PROTEIN_CODING__DUP_LOF PROTEIN_CODING__DUP_PARTIAL PROTEIN_CODING__INTERGENIC PROTEIN_CODING__INTRONIC PROTEIN_CODING__INV_SPAN PROTEIN_CODING__LOF PROTEIN_CODING__MSV_EXON_OVR PROTEIN_CODING__NEAREST_TSS PROTEIN_CODING__PROMOTER PROTEIN_CODING__UTR SOURCE STRANDS SVLEN SVTYPE UNRESOLVED_TYPE UNSTABLE_AF_PCRPLUS VARIABLE_ACROSS_BATCHES AN AC AF N_BI_GENOS N_HOMREF N_HET N_HOMALT FREQ_HOMREF FREQ_HET FREQ_HOMALT MALE_AN MALE_AC MALE_AF MALE_N_BI_GENOS MALE_N_HOMREF MALE_N_HET MALE_N_HOMALT MALE_FREQ_HOMREF MALE_FREQ_HET MALE_FREQ_HOMALT MALE_N_HEMIREF MALE_N_HEMIALT MALE_FREQ_HEMIREF MALE_FREQ_HEMIALT PAR FEMALE_AN FEMALE_AC FEMALE_AF FEMALE_N_BI_GENOS FEMALE_N_HOMREF FEMALE_N_HET FEMALE_N_HOMALT FEMALE_FREQ_HOMREF FEMALE_FREQ_HET FEMALE_FREQ_HOMALT POPMAX_AF AFR_AN AFR_AC AFR_AF AFR_N_BI_GENOS AFR_N_HOMREF AFR_N_HET AFR_N_HOMALT AFR_FREQ_HOMREF AFR_FREQ_HEAFR_FREQ_HOMALT AFR_MALE_AN AFR_MALE_AC AFR_MALE_AF AFR_MALE_N_BI_GENOS AFR_MALE_N_HOMREF AFR_MALE_N_HET AFR_MALE_N_HOMALT AFR_MALE_FREQ_HOMREF AFR_MALE_FREQ_HET AFR_MALE_FREQ_HOMALT AFR_MALE_N_HEMIREF AFR_MALE_N_HEMIALT AFR_MALE_FREQ_HEMIREF AFR_MALE_FREQ_HEMIALT AFR_FEMALE_AN AFR_FEMALE_AC AFR_FEMALE_AF AFR_FEMALE_N_BI_GENOS AFR_FEMALE_N_HOMREF AFR_FEMALE_N_HET AFR_FEMALE_N_HOMALT AFR_FEMALE_FREQ_HOMREF AFR_FEMALE_FREQ_HET AFR_FEMALE_FREQ_HOMALT AMR_AN AMR_AC AMR_AF AMR_N_BI_GENOS AMR_N_HOMREF AMR_N_HET AMR_N_HOMALT AMR_FREQ_HOMREF AMR_FREQ_HET AMR_FREQ_HOMALT AMR_MALE_AN AMR_MALE_AC AMR_MALE_AF AMR_MALE_N_BI_GENOS AMR_MALE_N_HOMREF AMR_MALE_N_HET AMR_MALE_N_HOMALT AMR_MALE_FREQ_HOMREF AMR_MALE_FREQ_HET AMR_MALE_FREQ_HOMALT AMR_MALE_N_HEMIREF AMR_MALE_N_HEMIALT AMR_MALE_FREQ_HEMIREF AMR_MALE_FREQ_HEMIALT AMR_FEMALE_AN AMR_FEMALE_AC AMR_FEMALE_AF AMR_FEMALE_N_BI_GENOS AMR_FEMALE_N_HOMREF AMR_FEMALE_N_HET AMR_FEMALE_N_HOMALT AMR_FEMALE_FREQ_HOMREF AMR_FEMALE_FREQ_HET AMR_FEMALE_FREQ_HOMALT EAS_AN EAS_AC EAS_AF EAS_N_BI_GENOS EAS_N_HOMREF EAS_N_HET EAS_N_HOMALT EAS_FREQ_HOMREF EAS_FREQ_HET EAS_FREQ_HOMALT EAS_MALE_AN EAS_MALE_AC EAS_MALE_AF EAS_MALE_N_BI_GENOS EAS_MALE_N_HOMREF EAS_MALE_N_HET EAS_MALE_N_HOMALT EAS_MALE_FREQ_HOMREF EAS_MALE_FREQ_HET EAS_MALE_FREQ_HOMALT EAS_MALE_N_HEMIREF EAS_MALE_N_HEMIALT EAS_MALE_FREQ_HEMIREF EAS_MALE_FREQ_HEMIALT EAS_FEMALE_AN EAS_FEMALE_AC EAS_FEMALE_AF EAS_FEMALE_N_BI_GENOS EAS_FEMALE_N_HOMREF EAS_FEMALE_N_HET EAS_FEMALE_N_HOMALT EAS_FEMALE_FREQ_HOMREF EAS_FEMALE_FREQ_HET EAS_FEMALE_FREQ_HOMALT EUR_AN EUR_AC EUR_AF EUR_N_BI_GENOS EUR_N_HOMREF EUR_N_HET EUR_N_HOMALT EUR_FREQ_HOMREF EUR_FREQ_HET EUR_FREQ_HOMALT EUR_MALE_AN EUR_MALE_AC EUR_MALE_AF EUR_MALE_N_BI_GENOS EUR_MALE_N_HOMREF EUR_MALE_N_HET EUR_MALE_N_HOMALT EUR_MALE_FREQ_HOMREF EUR_MALE_FREQ_HET EUR_MALE_FREQ_HOMALT EUR_MALE_N_HEMIREF EUR_MALE_N_HEMIALT EUR_MALE_FREQ_HEMIREF EUR_MALE_FREQ_HEMIALT EUR_FEMALE_AN EUR_FEMALE_AC EUR_FEMALE_AF EUR_FEMALE_N_BI_GENOS EUR_FEMALE_N_HOMREF EUR_FEMALE_N_HET EUR_FEMALE_N_HOMALT EUR_FEMALE_FREQ_HOMREF EUR_FEMALE_FREQ_HET EUR_FEMALE_FREQ_HOMALT OTH_AN OTH_AC OTH_AF OTH_N_BI_GENOS OTH_N_HOMREF OTH_N_HET OTH_N_HOMALT OTH_FREQ_HOMREF OTH_FREQ_HET OTH_FREQ_HOMALT OTH_MALE_AN OTH_MALE_AC OTH_MALE_AF OTH_MALE_N_BI_GENOS OTH_MALE_N_HOMREF OTH_MALE_N_HET OTH_MALE_N_HOMALT OTH_MALE_FREQ_HOMREF OTH_MALE_FREQ_HET OTH_MALE_FREQ_HOMALT OTH_MALE_N_HEMIREF OTH_MALE_N_HEMIALT OTH_MALE_FREQ_HEMIREF OTH_MALE_FREQ_HEMIALT OTH_FEMALE_AN OTH_FEMALE_AC OTH_FEMALE_AF OTH_FEMALE_N_BI_GENOS OTH_FEMALE_N_HOMREF OTH_FEMALE_N_HET OTH_FEMALE_N_HOMALT OTH_FEMALE_FREQ_HOMREF OTH_FEMALE_FREQ_HET OTH_FEMALE_FREQ_HOMALT FILTER\n1 10641 10642 gnomAD-SV_v2.1_BND_1_1 BND manta False 15 NA NA 10643 10643 PE,SR False False True 10642 NA NA NA False NA NA NA NA NA NA NA NA NA -1 BND SINGLE_ENDER_-- False False 21366 145 0.006785999983549118 10683 10543 135 5 0.9868950247764587 0.012636899948120117 0.00046803298755548894 10866 69 0.00634999992325902 5433 5366 65 2 0.987667977809906 0.011963900178670883 0.000368120992789045 NA NA NA NA False 10454 76 0.007269999943673615227 5154 70 3 0.9860339760780334 0.013392000459134579 0.0005739430198445916 0.015956999734044075 93972 0.007660999894142151 4699 4629 68 2 0.9851030111312866 0.014471200294792652 0.0004256220126990229 5154 33 0.006403000093996525 2577 2544 33 0 0.9871940016746521 0.012805599719285965 0.0NA NA NA NA 4232 39 0.009216000325977802 2116 2079 35 2 0.9825140237808228 0.01654059998691082 0.0009451800142414868 1910 7 0.003664999967440963 955 949 5 1 0.9937170147895813 0.00523559981957078 0.001047119963914156 950 4 0.004211000166833401 475 472 2 1 0.9936839938163757 0.00421052984893322 0.0021052600350230932 NA NA NA NA 952 3 0.0031510000117123127 476473 3 0 0.9936969876289368 0.006302520167082548 0.0 2296 31 0.013501999899744987 1148 11131 0 0.9729970097541809 0.02700350061058998 0.0 1312 13 0.009909000247716904 656 643 13 0.9801830053329468 0.01981710083782673 0.0 NA NA NA NA 976 18 0.018442999571561813 488470 18 0 0.9631149768829346 0.03688519820570946 0.0 7574 32 0.004224999807775021 3787 37528 2 0.9920780062675476 0.007393720094114542 0.0005281229969114065 3374 17 0.005038999952375889 1681671 15 1 0.9905160069465637 0.008891520090401173 0.000592768017668277 NA NA NA NA 41815 0.003587000072002411 2091 2077 13 1 0.9933050274848938 0.006217120215296745 0.00047823999193497188 3 0.015956999734044075 94 91 3 0 0.968084990978241 0.03191490098834038 0.0 76 0.026316000148653984 38 36 2 0 0.9473680257797241 0.05263160169124603 0.0 NA NA NA NA 112 1 0.008929000236093998 56 55 1 0 0.982142984867096 0.017857100814580917 0.0UNRESOLVED \n")),(0,A.kt)("h4",{id:"tsv-example"},"TSV Example"),(0,A.kt)("p",null,"The tsv was obtained from lifted over dataset created by dbVar for GRCh38"),(0,A.kt)("pre",null,(0,A.kt)("code",{parentName:"pre",className:"language-scss"},"#variant_call_accession variant_call_id variant_call_type experiment_id sample_id sampleset_id assembly chrcontig outer_start start inner_start inner_stop stop outer_stop insertion_length variant_region_acc variant_region_id copy_number description validation zygosity origin phenotype hgvs_name placement_method placement_rank placements_per_assembly remap_alignment remap_best_within_cluster remap_coverage remap_diff_chr remap_failure_code allele_count allele_frequency allele_number\nnssv15777856 gnomAD-SV_v2.1_CNV_10_564_alt_1 copy number variation 1 1 GRCh38.p12 10 736806 738184 nsv4039284 10__782746___784124______GRCh37.p13_copy_number_variation 0 Remapped BestAvailable Single First Pass 0 1 AC=21,AFR_AC=10,AMR_AC=9,EAS_AC=0,EUR_AC=2,OTH_AC=0AF=0.038889,AFR_AF=0.044643,AMR_AF=0.03913,EAS_AF=0,EUR_AF=0.023256,OTH_AF=0 AN=540,AFR_AN=224,AMR_AN=230,EAS_AN=0,EUR_AN=86,OTH_AN=0\n")),(0,A.kt)("h4",{id:"structural-variant-type-mapping"},"Structural Variant Type Mapping"),(0,A.kt)("p",null,"The source files represented the structural variants with keys using various naming conventions.\nIn the Nirvana JSON output, these keys will be mapped according to the following. "),(0,A.kt)("table",null,(0,A.kt)("thead",{parentName:"table"},(0,A.kt)("tr",{parentName:"thead"},(0,A.kt)("th",{parentName:"tr",align:null},"Nirvana JSON SV Type Key"),(0,A.kt)("th",{parentName:"tr",align:null},"GRCh37 Source SV Type Key"),(0,A.kt)("th",{parentName:"tr",align:null},"GRCh38 Source SV Type Key"))),(0,A.kt)("tbody",{parentName:"table"},(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"copy_number_variation"),(0,A.kt)("td",{parentName:"tr",align:null}),(0,A.kt)("td",{parentName:"tr",align:null},"copy number variation")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"deletion"),(0,A.kt)("td",{parentName:"tr",align:null},"DEL, CN=0"),(0,A.kt)("td",{parentName:"tr",align:null},"deletion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"duplication"),(0,A.kt)("td",{parentName:"tr",align:null},"DUP"),(0,A.kt)("td",{parentName:"tr",align:null},"duplication")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"insertion"),(0,A.kt)("td",{parentName:"tr",align:null},"INS"),(0,A.kt)("td",{parentName:"tr",align:null},"insertion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"inversion"),(0,A.kt)("td",{parentName:"tr",align:null},"INV"),(0,A.kt)("td",{parentName:"tr",align:null},"inversion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,A.kt)("td",{parentName:"tr",align:null},"INS:ME"),(0,A.kt)("td",{parentName:"tr",align:null},"mobile element insertion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,A.kt)("td",{parentName:"tr",align:null},"INS:ME:ALU"),(0,A.kt)("td",{parentName:"tr",align:null},"alu insertion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,A.kt)("td",{parentName:"tr",align:null},"INS:ME:LINE1"),(0,A.kt)("td",{parentName:"tr",align:null},"line1 insertion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"mobile_element_insertion"),(0,A.kt)("td",{parentName:"tr",align:null},"INS:ME:SVA"),(0,A.kt)("td",{parentName:"tr",align:null},"sva insertion")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"structural alteration"),(0,A.kt)("td",{parentName:"tr",align:null}),(0,A.kt)("td",{parentName:"tr",align:null},"sequence alteration")),(0,A.kt)("tr",{parentName:"tbody"},(0,A.kt)("td",{parentName:"tr",align:null},"complex_structural_alteration"),(0,A.kt)("td",{parentName:"tr",align:null},"CPX"),(0,A.kt)("td",{parentName:"tr",align:null})))))}R.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/f5e69b41.f236e1b2.js b/assets/js/f5e69b41.f236e1b2.js deleted file mode 100644 index fdf3f32b..00000000 --- a/assets/js/f5e69b41.f236e1b2.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8660],{3905:(t,e,n)=>{n.d(e,{Zo:()=>s,kt:()=>k});var a=n(67294);function r(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function l(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(t);e&&(a=a.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,a)}return n}function i(t){for(var e=1;e=0||(r[n]=t[n]);return r}(t,e);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(t);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(r[n]=t[n])}return r}var o=a.createContext({}),m=function(t){var e=a.useContext(o),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},s=function(t){var e=m(t.components);return a.createElement(o.Provider,{value:e},t.children)},c="mdxType",d={inlineCode:"code",wrapper:function(t){var e=t.children;return a.createElement(a.Fragment,{},e)}},u=a.forwardRef((function(t,e){var n=t.components,r=t.mdxType,l=t.originalType,o=t.parentName,s=p(t,["components","mdxType","originalType","parentName"]),c=m(n),u=r,k=c["".concat(o,".").concat(u)]||c[u]||d[u]||l;return n?a.createElement(k,i(i({ref:e},s),{},{components:n})):a.createElement(k,i({ref:e},s))}));function k(t,e){var n=arguments,r=e&&e.mdxType;if("string"==typeof t||r){var l=n.length,i=new Array(l);i[0]=u;var p={};for(var o in e)hasOwnProperty.call(e,o)&&(p[o]=e[o]);p.originalType=t,p[c]="string"==typeof t?t:r,i[1]=p;for(var m=2;m{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>c,frontMatter:()=>l,metadata:()=>p,toc:()=>o});var a=n(87462),r=(n(67294),n(3905));const l={},i=void 0,p={unversionedId:"data-sources/clinvar-json",id:"version-3.2.5/data-sources/clinvar-json",title:"clinvar-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.2.5/data-sources/clinvar-json.md",sourceDirName:"data-sources",slug:"/data-sources/clinvar-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.2.5/data-sources/clinvar-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.2.5/data-sources/clinvar-json.md",tags:[],version:"3.2.5",frontMatter:{}},o=[],m={toc:o},s="wrapper";function c(t){let{components:e,...n}=t;return(0,r.kt)(s,(0,a.Z)({},m,n,{components:e,mdxType:"MDXLayout"}),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-json"},'"clinvar":[\n {\n "id":"RCV000030258.4",\n "reviewStatus":"reviewed by expert panel",\n "alleleOrigins":[\n "germline"\n ],\n "refAllele":"G",\n "altAllele":"A",\n "phenotypes":[\n "Lynch syndrome"\n ],\n "medGenIds":[\n "C1333990"\n ],\n "omimIds":[\n "120435"\n ],\n "significance":[\n "benign"\n ],\n "lastUpdatedDate":"2017-05-01",\n "isAlleleSpecific":true\n }\n]\n')),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,r.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,r.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"id"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"ClinVar ID")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"reviewStatus"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"alleleOrigins"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"refAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"altAllele"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"phenotypes"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"})),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"medGenIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"MedGen IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"omimIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"OMIM IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"orphanetIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"Orphanet IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"significance"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"see possible values below")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"lastUpdatedDate"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string"),(0,r.kt)("td",{parentName:"tr",align:"left"},"yyyy-MM-dd")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"pubMedIds"),(0,r.kt)("td",{parentName:"tr",align:"center"},"string array"),(0,r.kt)("td",{parentName:"tr",align:"left"},"PubMed IDs")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"left"},"isAlleleSpecific"),(0,r.kt)("td",{parentName:"tr",align:"center"},"bool"),(0,r.kt)("td",{parentName:"tr",align:"left"},"true when the current variant alternate allele matches the ClinVar alternate allele")))),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"reviewStatus:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"no assertion provided"),(0,r.kt)("li",{parentName:"ul"},"no assertion criteria provided"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, single submitter"),(0,r.kt)("li",{parentName:"ul"},"practice guideline"),(0,r.kt)("li",{parentName:"ul"},"classified by multiple submitters"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, conflicting interpretations"),(0,r.kt)("li",{parentName:"ul"},"criteria provided, multiple submitters, no conflicts"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"alleleOrigins:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"unknown"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"germline"),(0,r.kt)("li",{parentName:"ul"},"somatic"),(0,r.kt)("li",{parentName:"ul"},"inherited"),(0,r.kt)("li",{parentName:"ul"},"paternal"),(0,r.kt)("li",{parentName:"ul"},"maternal"),(0,r.kt)("li",{parentName:"ul"},"de-novo"),(0,r.kt)("li",{parentName:"ul"},"biparental"),(0,r.kt)("li",{parentName:"ul"},"uniparental"),(0,r.kt)("li",{parentName:"ul"},"not-tested"),(0,r.kt)("li",{parentName:"ul"},"tested-inconclusive")),(0,r.kt)("p",null,(0,r.kt)("strong",{parentName:"p"},"significance:")),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},"uncertain significance"),(0,r.kt)("li",{parentName:"ul"},"not provided"),(0,r.kt)("li",{parentName:"ul"},"benign"),(0,r.kt)("li",{parentName:"ul"},"likely benign"),(0,r.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,r.kt)("li",{parentName:"ul"},"pathogenic"),(0,r.kt)("li",{parentName:"ul"},"drug response"),(0,r.kt)("li",{parentName:"ul"},"histocompatibility"),(0,r.kt)("li",{parentName:"ul"},"association"),(0,r.kt)("li",{parentName:"ul"},"risk factor"),(0,r.kt)("li",{parentName:"ul"},"protective"),(0,r.kt)("li",{parentName:"ul"},"affects"),(0,r.kt)("li",{parentName:"ul"},"conflicting data from submitters"),(0,r.kt)("li",{parentName:"ul"},"other"),(0,r.kt)("li",{parentName:"ul"},"no interpretation for the single variant"),(0,r.kt)("li",{parentName:"ul"},"conflicting interpretations of pathogenicity")))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/f6b8bff6.b0d4551b.js b/assets/js/f6b8bff6.b0d4551b.js deleted file mode 100644 index 0bd0c8bb..00000000 --- a/assets/js/f6b8bff6.b0d4551b.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[7816,448],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>g});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function i(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var s=r.createContext({}),c=function(e){var t=r.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},d=function(e){var t=c(e.components);return r.createElement(s.Provider,{value:t},e.children)},u="mdxType",p={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,s=e.parentName,d=l(e,["components","mdxType","originalType","parentName"]),u=c(n),m=a,g=u["".concat(s,".").concat(m)]||u[m]||p[m]||o;return n?r.createElement(g,i(i({ref:t},d),{},{components:n})):r.createElement(g,i({ref:t},d))}));function g(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,i=new Array(o);i[0]=m;var l={};for(var s in t)hasOwnProperty.call(t,s)&&(l[s]=t[s]);l.originalType=e,l[u]="string"==typeof e?e:a,i[1]=l;for(var c=2;c{n.r(t),n.d(t,{contentTitle:()=>i,default:()=>u,frontMatter:()=>o,metadata:()=>l,toc:()=>s});var r=n(87462),a=(n(67294),n(3905));const o={},i=void 0,l={unversionedId:"data-sources/gme-json",id:"version-3.21/data-sources/gme-json",title:"gme-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/gme-json.md",sourceDirName:"data-sources",slug:"/data-sources/gme-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gme-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/gme-json.md",tags:[],version:"3.21",frontMatter:{}},s=[],c={toc:s},d="wrapper";function u(e){let{components:t,...n}=e;return(0,a.kt)(d,(0,r.Z)({},c,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"gmeVariome":{\n "allAc":10,\n "allAn":202,\n "allAf":0.049504,\n "failedFilter":true\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAc"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"GME allele count")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAn"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"GME allele number")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAf"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"GME allele frequency")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,a.kt)("td",{parentName:"tr",align:null},"bool"),(0,a.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}u.isMDXComponent=!0},11767:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>p,frontMatter:()=>i,metadata:()=>s,toc:()=>c});var r=n(87462),a=(n(67294),n(3905)),o=n(42301);const i={title:"GME Variome"},l=void 0,s={unversionedId:"data-sources/gme",id:"version-3.21/data-sources/gme",title:"GME Variome",description:"Overview",source:"@site/versioned_docs/version-3.21/data-sources/gme.mdx",sourceDirName:"data-sources",slug:"/data-sources/gme",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gme",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/gme.mdx",tags:[],version:"3.21",frontMatter:{title:"GME Variome"},sidebar:"docs",previous:{title:"GERP",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gerp"},next:{title:"gnomAD",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gnomad"}},c=[{value:"Overview",id:"overview",children:[{value:"TSV Extraction",id:"tsv-extraction",children:[{value:"Parsing",id:"parsing",children:[],level:4}],level:3}],level:2},{value:"GRCh37 liftover",id:"grch37-liftover",children:[],level:2},{value:"Download URL",id:"download-url",children:[],level:2},{value:"JSON output",id:"json-output",children:[],level:2}],d={toc:c},u="wrapper";function p(e){let{components:t,...n}=e;return(0,a.kt)(u,(0,r.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("h2",{id:"overview"},"Overview"),(0,a.kt)("p",null,"The ",(0,a.kt)("a",{parentName:"p",href:"http://igm.ucsd.edu/gme/index.php"},"Greater Middle East (GME) Variome")," Project is aimed at generating a coding base reference for the countries found in the Greater Middle East. Nirvana presents variant frequencies for the Greater Middle Eastern population."),(0,a.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,a.kt)("div",{parentName:"div",className:"admonition-heading"},(0,a.kt)("h5",{parentName:"div"},(0,a.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,a.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,a.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,a.kt)("div",{parentName:"div",className:"admonition-content"},(0,a.kt)("p",{parentName:"div"},"Scott, E. M., Halees, A., Itan, Y., Spencer, E. G., He, Y., Azab, M. A., Gabriel, S. B., Belkadi, A., Boisson, B., Abel, L., Clark, A. G., Greater Middle East Variome Consortium, Alkuraya, F. S., Casanova, J. L., & Gleeson, J. G. (2016). Characterization of Greater Middle Eastern genetic variation for enhanced disease gene discovery. ",(0,a.kt)("em",{parentName:"p"},"Nature genetics"),", 48(9), 1071\u20131076. ",(0,a.kt)("a",{parentName:"p",href:"https://doi.org/10.1038/ng.3592"},"https://doi.org/10.1038/ng.3592")))),(0,a.kt)("h3",{id:"tsv-extraction"},"TSV Extraction"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-scss"},"chrom pos ref alt AA filter FunctionGVS geneFunction Gene GeneID SIFT_pred GERP++ AF GME_GC GME_AC GME_AF NWA NEA AP Israel SD TP CA FunctionGVS_new Priority Polyphen2_HVAR_pred LRT_pred MutationTaster_pred rsid OMIM_MIM OMIM_Disease AA_AC EA_AC rsid_link position_link\n1 69134 A G A VQSRTrancheSNP99.90to100.00 nonsynonymous_SNV exonic OR4F5 79501 T 2.31 96:0:5 10,192 0.04950495049504951 4:0:0 59:0:2 12:0:0 0:0:0 6:0:0 9:0:2 13:0:2 nonsynonymous_SNV MODERATE B N N none - - none none - http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&org=human&position=chr1%3A69134-69133\n1 69270 A G A PASS synonymous_SNV exonic OR4F5 79501 . . 93:38:240 518,224 0.6981132075471698 5:5:11 63:30:86 12:5:28 1:0:2 2:2:18 7:3:46 7:2:52 synonymous_SNV LOW . . . rs201219564 - - none none http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?searchType=adhoc_search&type=rs&rs=rs201219564 http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&org=human&position=chr1%3A69270-69269\n1 69428 T G T PASS nonsynonymous_SNV exonic OR4F5 79501 D 0.891 676:44:15 74,1396 0.050340136054421766 43:0:2 313:16:10 88:7:3 6:0:0 44:8:0 102:9:0 102:4:2 nonsynonymous_SNV MODERATE D N N rs140739101 - - 14,3808 313,6535 http://www.ncbi.nlm.nih.gov/projects/SNP/snp_ref.cgi?searchType=adhoc_search&type=rs&rs=rs140739101 http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&org=human&position=chr1%3A69428-69427\n")),(0,a.kt)("h4",{id:"parsing"},"Parsing"),(0,a.kt)("p",null,"We parse the GME tsv file and extract the following columns:"),(0,a.kt)("ul",null,(0,a.kt)("li",{parentName:"ul"},"chrom"),(0,a.kt)("li",{parentName:"ul"},"pos"),(0,a.kt)("li",{parentName:"ul"},"ref"),(0,a.kt)("li",{parentName:"ul"},"alt"),(0,a.kt)("li",{parentName:"ul"},"filter"),(0,a.kt)("li",{parentName:"ul"},"GME_AC"),(0,a.kt)("li",{parentName:"ul"},"GME_AF")),(0,a.kt)("h2",{id:"grch37-liftover"},"GRCh37 liftover"),(0,a.kt)("p",null,"The data is not available for GRCh38 on GME website. We performed a liftover from GRCh37 to GRCh38 using CrossMap."),(0,a.kt)("h2",{id:"download-url"},"Download URL"),(0,a.kt)("p",null,(0,a.kt)("a",{parentName:"p",href:"http://igm.ucsd.edu/gme/download.shtml"},"http://igm.ucsd.edu/gme/download.shtml")),(0,a.kt)("h2",{id:"json-output"},"JSON output"),(0,a.kt)(o.default,{mdxType:"JSON"}))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/f7e8c160.3fec7173.js b/assets/js/f7e8c160.3fec7173.js deleted file mode 100644 index af9c8852..00000000 --- a/assets/js/f7e8c160.3fec7173.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[700],{3905:(t,e,n)=>{n.d(e,{Zo:()=>d,kt:()=>k});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function i(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function o(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var p=r.createContext({}),c=function(t){var e=r.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},d=function(t){var e=c(t.components);return r.createElement(p.Provider,{value:e},t.children)},m="mdxType",u={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},s=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,i=t.originalType,p=t.parentName,d=l(t,["components","mdxType","originalType","parentName"]),m=c(n),s=a,k=m["".concat(p,".").concat(s)]||m[s]||u[s]||i;return n?r.createElement(k,o(o({ref:e},d),{},{components:n})):r.createElement(k,o({ref:e},d))}));function k(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var i=n.length,o=new Array(i);o[0]=s;var l={};for(var p in e)hasOwnProperty.call(e,p)&&(l[p]=e[p]);l.originalType=t,l[m]="string"==typeof t?t:a,o[1]=l;for(var c=2;c{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>m,frontMatter:()=>i,metadata:()=>l,toc:()=>p});var r=n(87462),a=(n(67294),n(3905));const i={title:"Dependencies"},o=void 0,l={unversionedId:"introduction/dependencies",id:"introduction/dependencies",title:"Dependencies",description:"All of the following dependencies have been included in this repository.",source:"@site/docs/introduction/dependencies.md",sourceDirName:"introduction",slug:"/introduction/dependencies",permalink:"/IlluminaConnectedAnnotationsDocumentation/introduction/dependencies",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/introduction/dependencies.md",tags:[],version:"current",frontMatter:{title:"Dependencies"},sidebar:"docs",previous:{title:"Introduction",permalink:"/IlluminaConnectedAnnotationsDocumentation/"},next:{title:"Getting Started",permalink:"/IlluminaConnectedAnnotationsDocumentation/introduction/getting-started"}},p=[],c={toc:p},d="wrapper";function m(t){let{components:e,...n}=t;return(0,a.kt)(d,(0,r.Z)({},c,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("p",null,"All of the following dependencies have been included in this repository."),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Name"),(0,a.kt)("th",{parentName:"tr",align:"center"},"License"),(0,a.kt)("th",{parentName:"tr",align:null},"Usage"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/aws/aws-extensions-for-dotnet-cli"},"Amazon.Lambda")),(0,a.kt)("td",{parentName:"tr",align:"center"},"Apache"),(0,a.kt)("td",{parentName:"tr",align:null},"AWS extensions for .NET CLI")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/aws/aws-sdk-net/"},"AWSSDK")),(0,a.kt)("td",{parentName:"tr",align:"center"},"Apache"),(0,a.kt)("td",{parentName:"tr",align:null},"AWS Lambda, S3, SNS support")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://www.newtonsoft.com/json"},"Json.NET")),(0,a.kt)("td",{parentName:"tr",align:"center"},"MIT"),(0,a.kt)("td",{parentName:"tr",align:null},"JASIX utility")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/ebiggers/libdeflate"},"libdeflate")),(0,a.kt)("td",{parentName:"tr",align:"center"},"MIT"),(0,a.kt)("td",{parentName:"tr",align:null},"BlockCompression library")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/moq/moq4"},"Moq")),(0,a.kt)("td",{parentName:"tr",align:"center"},"BSD"),(0,a.kt)("td",{parentName:"tr",align:null},"Mocking framework for unit tests")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"http://www.ndesk.org/Options"},"NDesk.Options")),(0,a.kt)("td",{parentName:"tr",align:"center"},"MIT/X11"),(0,a.kt)("td",{parentName:"tr",align:null},"CommandLine library")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/xunit/xunit"},"xUnit")),(0,a.kt)("td",{parentName:"tr",align:"center"},"Apache"),(0,a.kt)("td",{parentName:"tr",align:null},"Unit testing framework")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/Dead2/zlib-ng"},"zlib-ng")),(0,a.kt)("td",{parentName:"tr",align:"center"},"zlib"),(0,a.kt)("td",{parentName:"tr",align:null},"BlockCompression library")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/facebook/zstd"},"zstd")),(0,a.kt)("td",{parentName:"tr",align:"center"},"BSD"),(0,a.kt)("td",{parentName:"tr",align:null},"BlockCompression library")))))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/f7e8c160.96218d4e.js b/assets/js/f7e8c160.96218d4e.js new file mode 100644 index 00000000..d5591064 --- /dev/null +++ b/assets/js/f7e8c160.96218d4e.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[700],{3905:(t,e,n)=>{n.d(e,{Zo:()=>d,kt:()=>k});var r=n(7294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function i(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function o(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var p=r.createContext({}),c=function(t){var e=r.useContext(p),n=e;return t&&(n="function"==typeof t?t(e):o(o({},e),t)),n},d=function(t){var e=c(t.components);return r.createElement(p.Provider,{value:e},t.children)},m="mdxType",u={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},s=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,i=t.originalType,p=t.parentName,d=l(t,["components","mdxType","originalType","parentName"]),m=c(n),s=a,k=m["".concat(p,".").concat(s)]||m[s]||u[s]||i;return n?r.createElement(k,o(o({ref:e},d),{},{components:n})):r.createElement(k,o({ref:e},d))}));function k(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var i=n.length,o=new Array(i);o[0]=s;var l={};for(var p in e)hasOwnProperty.call(e,p)&&(l[p]=e[p]);l.originalType=t,l[m]="string"==typeof t?t:a,o[1]=l;for(var c=2;c{n.r(e),n.d(e,{contentTitle:()=>o,default:()=>m,frontMatter:()=>i,metadata:()=>l,toc:()=>p});var r=n(7462),a=(n(7294),n(3905));const i={title:"Dependencies"},o=void 0,l={unversionedId:"introduction/dependencies",id:"introduction/dependencies",title:"Dependencies",description:"All of the following dependencies have been included in this repository.",source:"@site/docs/introduction/dependencies.md",sourceDirName:"introduction",slug:"/introduction/dependencies",permalink:"/IlluminaConnectedAnnotationsDocumentation/introduction/dependencies",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/introduction/dependencies.md",tags:[],version:"current",frontMatter:{title:"Dependencies"},sidebar:"docs",previous:{title:"Introduction",permalink:"/IlluminaConnectedAnnotationsDocumentation/"},next:{title:"Getting Started",permalink:"/IlluminaConnectedAnnotationsDocumentation/introduction/getting-started"}},p=[],c={toc:p},d="wrapper";function m(t){let{components:e,...n}=t;return(0,a.kt)(d,(0,r.Z)({},c,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("p",null,"All of the following dependencies have been included in this repository."),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Name"),(0,a.kt)("th",{parentName:"tr",align:"center"},"License"),(0,a.kt)("th",{parentName:"tr",align:null},"Usage"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/aws/aws-extensions-for-dotnet-cli"},"Amazon.Lambda")),(0,a.kt)("td",{parentName:"tr",align:"center"},"Apache"),(0,a.kt)("td",{parentName:"tr",align:null},"AWS extensions for .NET CLI")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/aws/aws-sdk-net/"},"AWSSDK")),(0,a.kt)("td",{parentName:"tr",align:"center"},"Apache"),(0,a.kt)("td",{parentName:"tr",align:null},"AWS Lambda, S3, SNS support")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://www.newtonsoft.com/json"},"Json.NET")),(0,a.kt)("td",{parentName:"tr",align:"center"},"MIT"),(0,a.kt)("td",{parentName:"tr",align:null},"JASIX utility")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/ebiggers/libdeflate"},"libdeflate")),(0,a.kt)("td",{parentName:"tr",align:"center"},"MIT"),(0,a.kt)("td",{parentName:"tr",align:null},"BlockCompression library")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/moq/moq4"},"Moq")),(0,a.kt)("td",{parentName:"tr",align:"center"},"BSD"),(0,a.kt)("td",{parentName:"tr",align:null},"Mocking framework for unit tests")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"http://www.ndesk.org/Options"},"NDesk.Options")),(0,a.kt)("td",{parentName:"tr",align:"center"},"MIT/X11"),(0,a.kt)("td",{parentName:"tr",align:null},"CommandLine library")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/xunit/xunit"},"xUnit")),(0,a.kt)("td",{parentName:"tr",align:"center"},"Apache"),(0,a.kt)("td",{parentName:"tr",align:null},"Unit testing framework")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/Dead2/zlib-ng"},"zlib-ng")),(0,a.kt)("td",{parentName:"tr",align:"center"},"zlib"),(0,a.kt)("td",{parentName:"tr",align:null},"BlockCompression library")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},(0,a.kt)("a",{parentName:"td",href:"https://github.com/facebook/zstd"},"zstd")),(0,a.kt)("td",{parentName:"tr",align:"center"},"BSD"),(0,a.kt)("td",{parentName:"tr",align:null},"BlockCompression library")))))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/f831acf2.76589949.js b/assets/js/f831acf2.76589949.js deleted file mode 100644 index 0dd4f970..00000000 --- a/assets/js/f831acf2.76589949.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[1212],{3905:(t,e,n)=>{n.d(e,{Zo:()=>s,kt:()=>f});var r=n(67294);function a(t,e,n){return e in t?Object.defineProperty(t,e,{value:n,enumerable:!0,configurable:!0,writable:!0}):t[e]=n,t}function o(t,e){var n=Object.keys(t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(t);e&&(r=r.filter((function(e){return Object.getOwnPropertyDescriptor(t,e).enumerable}))),n.push.apply(n,r)}return n}function i(t){for(var e=1;e=0||(a[n]=t[n]);return a}(t,e);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(t);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(t,n)&&(a[n]=t[n])}return a}var l=r.createContext({}),p=function(t){var e=r.useContext(l),n=e;return t&&(n="function"==typeof t?t(e):i(i({},e),t)),n},s=function(t){var e=p(t.components);return r.createElement(l.Provider,{value:e},t.children)},d="mdxType",m={inlineCode:"code",wrapper:function(t){var e=t.children;return r.createElement(r.Fragment,{},e)}},u=r.forwardRef((function(t,e){var n=t.components,a=t.mdxType,o=t.originalType,l=t.parentName,s=c(t,["components","mdxType","originalType","parentName"]),d=p(n),u=a,f=d["".concat(l,".").concat(u)]||d[u]||m[u]||o;return n?r.createElement(f,i(i({ref:e},s),{},{components:n})):r.createElement(f,i({ref:e},s))}));function f(t,e){var n=arguments,a=e&&e.mdxType;if("string"==typeof t||a){var o=n.length,i=new Array(o);i[0]=u;var c={};for(var l in e)hasOwnProperty.call(e,l)&&(c[l]=e[l]);c.originalType=t,c[d]="string"==typeof t?t:a,i[1]=c;for(var p=2;p{n.r(e),n.d(e,{contentTitle:()=>i,default:()=>d,frontMatter:()=>o,metadata:()=>c,toc:()=>l});var r=n(87462),a=(n(67294),n(3905));const o={},i=void 0,c={unversionedId:"data-sources/splice-ai-json",id:"version-3.16/data-sources/splice-ai-json",title:"splice-ai-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.16/data-sources/splice-ai-json.md",sourceDirName:"data-sources",slug:"/data-sources/splice-ai-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.16/data-sources/splice-ai-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.16/data-sources/splice-ai-json.md",tags:[],version:"3.16",frontMatter:{}},l=[],p={toc:l},s="wrapper";function d(t){let{components:e,...n}=t;return(0,a.kt)(s,(0,r.Z)({},p,n,{components:e,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"spliceAI":[ \n {\n "hgnc":"BLCAP",\n "acceptorGainDistance":-3,\n "acceptorGainScore":0.3,\n "donorLossDistance":7,\n "donorLossScore":0.9\n },\n { \n "hgnc":"NNAT",\n "acceptorGainDistance":-1,\n "acceptorGainScore":0.2,\n "donorGainDistance":-2,\n "donorGainScore":0.3\n }\n]\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:"left"},"Field"),(0,a.kt)("th",{parentName:"tr",align:"center"},"Type"),(0,a.kt)("th",{parentName:"tr",align:"left"},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"hgnc"),(0,a.kt)("td",{parentName:"tr",align:"center"},"string"),(0,a.kt)("td",{parentName:"tr",align:"left"},"HGNC gene symbol")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorGainDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorGainScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorLossDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"acceptorLossScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorGainDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorGainScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorLossDistance"),(0,a.kt)("td",{parentName:"tr",align:"center"},"int"),(0,a.kt)("td",{parentName:"tr",align:"left"},"\xb1 bp from current position")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:"left"},"donorLossScore"),(0,a.kt)("td",{parentName:"tr",align:"center"},"float"),(0,a.kt)("td",{parentName:"tr",align:"left"},"range: 0 - 1.0. 1 decimal place")))))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/e6ae9f90.3a66bb01.js b/assets/js/f98a4229.51629546.js similarity index 72% rename from assets/js/e6ae9f90.3a66bb01.js rename to assets/js/f98a4229.51629546.js index 1bb66c03..7f01303c 100644 --- a/assets/js/e6ae9f90.3a66bb01.js +++ b/assets/js/f98a4229.51629546.js @@ -1 +1 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[448],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function o(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var c=r.createContext({}),u=function(e){var t=r.useContext(c),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},p=function(e){var t=u(e.components);return r.createElement(c.Provider,{value:t},e.children)},s="mdxType",m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,l=e.originalType,c=e.parentName,p=i(e,["components","mdxType","originalType","parentName"]),s=u(n),d=a,f=s["".concat(c,".").concat(d)]||s[d]||m[d]||l;return n?r.createElement(f,o(o({ref:t},p),{},{components:n})):r.createElement(f,o({ref:t},p))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var l=n.length,o=new Array(l);o[0]=d;var i={};for(var c in t)hasOwnProperty.call(t,c)&&(i[c]=t[c]);i.originalType=e,i[s]="string"==typeof e?e:a,o[1]=i;for(var u=2;u{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>s,frontMatter:()=>l,metadata:()=>i,toc:()=>c});var r=n(87462),a=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/gme-json",id:"version-3.21/data-sources/gme-json",title:"gme-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/gme-json.md",sourceDirName:"data-sources",slug:"/data-sources/gme-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/gme-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/gme-json.md",tags:[],version:"3.21",frontMatter:{}},c=[],u={toc:c},p="wrapper";function s(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"gmeVariome":{\n "allAc":10,\n "allAn":202,\n "allAf":0.049504,\n "failedFilter":true\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAc"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"GME allele count")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAn"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"GME allele number")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAf"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"GME allele frequency")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,a.kt)("td",{parentName:"tr",align:null},"bool"),(0,a.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}s.isMDXComponent=!0}}]); \ No newline at end of file +"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8633],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>f});var r=n(7294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function o(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var c=r.createContext({}),u=function(e){var t=r.useContext(c),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},p=function(e){var t=u(e.components);return r.createElement(c.Provider,{value:t},e.children)},m="mdxType",s={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,l=e.originalType,c=e.parentName,p=i(e,["components","mdxType","originalType","parentName"]),m=u(n),d=a,f=m["".concat(c,".").concat(d)]||m[d]||s[d]||l;return n?r.createElement(f,o(o({ref:t},p),{},{components:n})):r.createElement(f,o({ref:t},p))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var l=n.length,o=new Array(l);o[0]=d;var i={};for(var c in t)hasOwnProperty.call(t,c)&&(i[c]=t[c]);i.originalType=e,i[m]="string"==typeof e?e:a,o[1]=i;for(var u=2;u{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>m,frontMatter:()=>l,metadata:()=>i,toc:()=>c});var r=n(7462),a=(n(7294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/gme-json",id:"data-sources/gme-json",title:"gme-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gme-json.md",sourceDirName:"data-sources",slug:"/data-sources/gme-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gme-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gme-json.md",tags:[],version:"current",frontMatter:{}},c=[],u={toc:c},p="wrapper";function m(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"gmeVariome":{\n "allAc":10,\n "allAn":202,\n "allAf":0.049504,\n "failedFilter":true\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAc"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"GME allele count")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAn"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"GME allele number")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAf"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"GME allele frequency")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,a.kt)("td",{parentName:"tr",align:null},"bool"),(0,a.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/f98a4229.7a4446e8.js b/assets/js/f98a4229.7a4446e8.js deleted file mode 100644 index f433d7e7..00000000 --- a/assets/js/f98a4229.7a4446e8.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[8633],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function o(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var c=r.createContext({}),u=function(e){var t=r.useContext(c),n=t;return e&&(n="function"==typeof e?e(t):o(o({},t),e)),n},p=function(e){var t=u(e.components);return r.createElement(c.Provider,{value:t},e.children)},m="mdxType",s={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,l=e.originalType,c=e.parentName,p=i(e,["components","mdxType","originalType","parentName"]),m=u(n),d=a,f=m["".concat(c,".").concat(d)]||m[d]||s[d]||l;return n?r.createElement(f,o(o({ref:t},p),{},{components:n})):r.createElement(f,o({ref:t},p))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var l=n.length,o=new Array(l);o[0]=d;var i={};for(var c in t)hasOwnProperty.call(t,c)&&(i[c]=t[c]);i.originalType=e,i[m]="string"==typeof e?e:a,o[1]=i;for(var u=2;u{n.r(t),n.d(t,{contentTitle:()=>o,default:()=>m,frontMatter:()=>l,metadata:()=>i,toc:()=>c});var r=n(87462),a=(n(67294),n(3905));const l={},o=void 0,i={unversionedId:"data-sources/gme-json",id:"data-sources/gme-json",title:"gme-json",description:"| Field | Type | Notes |",source:"@site/docs/data-sources/gme-json.md",sourceDirName:"data-sources",slug:"/data-sources/gme-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gme-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/docs/data-sources/gme-json.md",tags:[],version:"current",frontMatter:{}},c=[],u={toc:c},p="wrapper";function m(e){let{components:t,...n}=e;return(0,a.kt)(p,(0,r.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"gmeVariome":{\n "allAc":10,\n "allAn":202,\n "allAf":0.049504,\n "failedFilter":true\n}\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAc"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"GME allele count")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAn"),(0,a.kt)("td",{parentName:"tr",align:null},"int"),(0,a.kt)("td",{parentName:"tr",align:null},"GME allele number")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"allAf"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"GME allele frequency")),(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"failedFilter"),(0,a.kt)("td",{parentName:"tr",align:null},"bool"),(0,a.kt)("td",{parentName:"tr",align:null},"True if this variant failed any filters")))))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/fba7caeb.4137f0e5.js b/assets/js/fba7caeb.4137f0e5.js deleted file mode 100644 index f57e3a1c..00000000 --- a/assets/js/fba7caeb.4137f0e5.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[4226],{3905:(e,t,n)=>{n.d(t,{Zo:()=>u,kt:()=>f});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function l(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var i=r.createContext({}),s=function(e){var t=r.useContext(i),n=t;return e&&(n="function"==typeof e?e(t):l(l({},t),e)),n},u=function(e){var t=s(e.components);return r.createElement(i.Provider,{value:t},e.children)},p="mdxType",d={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},m=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,i=e.parentName,u=c(e,["components","mdxType","originalType","parentName"]),p=s(n),m=a,f=p["".concat(i,".").concat(m)]||p[m]||d[m]||o;return n?r.createElement(f,l(l({ref:t},u),{},{components:n})):r.createElement(f,l({ref:t},u))}));function f(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,l=new Array(o);l[0]=m;var c={};for(var i in t)hasOwnProperty.call(t,i)&&(c[i]=t[i]);c.originalType=e,c[p]="string"==typeof e?e:a,l[1]=c;for(var s=2;s{n.r(t),n.d(t,{contentTitle:()=>l,default:()=>p,frontMatter:()=>o,metadata:()=>c,toc:()=>i});var r=n(87462),a=(n(67294),n(3905));const o={},l=void 0,c={unversionedId:"data-sources/dann-json",id:"version-3.18/data-sources/dann-json",title:"dann-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.18/data-sources/dann-json.md",sourceDirName:"data-sources",slug:"/data-sources/dann-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.18/data-sources/dann-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.18/data-sources/dann-json.md",tags:[],version:"3.18",frontMatter:{}},i=[],s={toc:i},u="wrapper";function p(e){let{components:t,...n}=e;return(0,a.kt)(u,(0,r.Z)({},s,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-json"},'"dannScore": 0.27\n')),(0,a.kt)("table",null,(0,a.kt)("thead",{parentName:"table"},(0,a.kt)("tr",{parentName:"thead"},(0,a.kt)("th",{parentName:"tr",align:null},"Field"),(0,a.kt)("th",{parentName:"tr",align:null},"Type"),(0,a.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,a.kt)("tbody",{parentName:"table"},(0,a.kt)("tr",{parentName:"tbody"},(0,a.kt)("td",{parentName:"tr",align:null},"dannScore"),(0,a.kt)("td",{parentName:"tr",align:null},"float"),(0,a.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1.0")))))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/ff2b4987.2aa87f68.js b/assets/js/ff2b4987.2aa87f68.js deleted file mode 100644 index 587020f5..00000000 --- a/assets/js/ff2b4987.2aa87f68.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[2552,140,3057,7366],{3905:(e,t,n)=>{n.d(t,{Zo:()=>p,kt:()=>g});var a=n(67294);function i(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function l(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function r(e){for(var t=1;t=0||(i[n]=e[n]);return i}(e,t);if(Object.getOwnPropertySymbols){var l=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(i[n]=e[n])}return i}var s=a.createContext({}),d=function(e){var t=a.useContext(s),n=t;return e&&(n="function"==typeof e?e(t):r(r({},t),e)),n},p=function(e){var t=d(e.components);return a.createElement(s.Provider,{value:t},e.children)},c="mdxType",u={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},m=a.forwardRef((function(e,t){var n=e.components,i=e.mdxType,l=e.originalType,s=e.parentName,p=o(e,["components","mdxType","originalType","parentName"]),c=d(n),m=i,g=c["".concat(s,".").concat(m)]||c[m]||u[m]||l;return n?a.createElement(g,r(r({ref:t},p),{},{components:n})):a.createElement(g,r({ref:t},p))}));function g(e,t){var n=arguments,i=t&&t.mdxType;if("string"==typeof e||i){var l=n.length,r=new Array(l);r[0]=m;var o={};for(var s in t)hasOwnProperty.call(t,s)&&(o[s]=t[s]);o.originalType=e,o[c]="string"==typeof e?e:i,r[1]=o;for(var d=2;d{n.r(t),n.d(t,{contentTitle:()=>r,default:()=>c,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const l={},r=void 0,o={unversionedId:"data-sources/clingen-dosage-json",id:"version-3.21/data-sources/clingen-dosage-json",title:"clingen-dosage-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/clingen-dosage-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-dosage-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/clingen-dosage-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/clingen-dosage-json.md",tags:[],version:"3.21",frontMatter:{}},s=[],d={toc:s},p="wrapper";function c(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clingenDosageSensitivityMap": [{\n "chromosome": "15",\n "begin": 30900686,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "little evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 0.33994\n},\n{\n "chromosome": "15",\n "begin": 31727418,\n "end": 32153204,\n "haploinsufficiency": "sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype",\n "triplosensitivity": "dosage sensitivity unlikely",\n "reciprocalOverlap": 0.00147,\n "annotationOverlap": 1\n}]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Field"),(0,i.kt)("th",{parentName:"tr",align:null},"Type"),(0,i.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clingenDosageSensitivityMap"),(0,i.kt)("td",{parentName:"tr",align:null},"object array"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"begin"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"end"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"haploinsufficiency"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"triplosensitivity"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"(same as haploinsufficiency)\xa0")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"annotationOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"haploinsufficiency and triplosensitivity")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no evidence to suggest that dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"little evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"emerging evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype"),(0,i.kt)("li",{parentName:"ul"},"gene associated with autosomal recessive phenotype"),(0,i.kt)("li",{parentName:"ul"},"dosage sensitivity unlikely")))}c.isMDXComponent=!0},73791:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>r,default:()=>c,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const l={},r=void 0,o={unversionedId:"data-sources/clingen-gene-validity-json",id:"version-3.21/data-sources/clingen-gene-validity-json",title:"clingen-gene-validity-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/clingen-gene-validity-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-gene-validity-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/clingen-gene-validity-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/clingen-gene-validity-json.md",tags:[],version:"3.21",frontMatter:{}},s=[],d={toc:s},p="wrapper";function c(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clingenGeneValidity":[\n {\n "diseaseId":"MONDO_0007893",\n "disease":"Noonan syndrome with multiple lentigines",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n },\n {\n "diseaseId":"MONDO_0015280",\n "disease":"cardiofaciocutaneous syndrome",\n "classification":"no reported evidence",\n "classificationDate":"2018-06-07"\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Field"),(0,i.kt)("th",{parentName:"tr",align:null},"Type"),(0,i.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clingenGeneValidity"),(0,i.kt)("td",{parentName:"tr",align:null},"object"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"diseaseId"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Monarch Disease Ontology ID (MONDO)")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"disease"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"disease label")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"classification"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"see below for possible values")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"classificationDate"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"yyyy-MM-dd")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"classification")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"no reported evidence"),(0,i.kt)("li",{parentName:"ul"},"disputed"),(0,i.kt)("li",{parentName:"ul"},"limited"),(0,i.kt)("li",{parentName:"ul"},"moderate"),(0,i.kt)("li",{parentName:"ul"},"definitive"),(0,i.kt)("li",{parentName:"ul"},"strong"),(0,i.kt)("li",{parentName:"ul"},"refuted"),(0,i.kt)("li",{parentName:"ul"},"no known disease relationship")))}c.isMDXComponent=!0},76541:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>r,default:()=>c,frontMatter:()=>l,metadata:()=>o,toc:()=>s});var a=n(87462),i=(n(67294),n(3905));const l={},r=void 0,o={unversionedId:"data-sources/clingen-json",id:"version-3.21/data-sources/clingen-json",title:"clingen-json",description:"| Field | Type | Notes |",source:"@site/versioned_docs/version-3.21/data-sources/clingen-json.md",sourceDirName:"data-sources",slug:"/data-sources/clingen-json",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/clingen-json",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/clingen-json.md",tags:[],version:"3.21",frontMatter:{}},s=[],d={toc:s},p="wrapper";function c(e){let{components:t,...n}=e;return(0,i.kt)(p,(0,a.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-json"},'"clingen":[\n {\n "chromosome":"17",\n "begin":525,\n "end":14667519,\n "variantType":"copy_number_gain",\n "id":"nsv996083",\n "clinicalInterpretation":"pathogenic",\n "observedGains":1,\n "validated":true,\n "phenotypes":[\n "Intrauterine growth retardation"\n ],\n "phenotypeIds":[\n "HP:0001511",\n "MedGen:C1853481"\n ],\n "reciprocalOverlap":0.00131\n },\n {\n "chromosome":"17",\n "begin":45835,\n "end":7600330,\n "variantType":"copy_number_loss",\n "id":"nsv869419",\n "clinicalInterpretation":"pathogenic",\n "observedLosses":1,\n "validated":true,\n "phenotypes":[\n "Developmental delay AND/OR other significant developmental or morphological phenotypes"\n ],\n "reciprocalOverlap":0.00254\n }\n]\n')),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Field"),(0,i.kt)("th",{parentName:"tr",align:null},"Type"),(0,i.kt)("th",{parentName:"tr",align:null},"Notes"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clingen"),(0,i.kt)("td",{parentName:"tr",align:null},"object array"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"chromosome"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Ensembl-style chromosome names")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"begin"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"end"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"1-based position")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"variantType"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Any of the\xa0sequence alterations defined here.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"id"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"Identifier from the data source. Alternatively a VID")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"clinicalInterpretation"),(0,i.kt)("td",{parentName:"tr",align:null},"string"),(0,i.kt)("td",{parentName:"tr",align:null},"see possible values below")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"observedGains"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,i.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"observedLosses"),(0,i.kt)("td",{parentName:"tr",align:null},"integer"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - (2",(0,i.kt)("sup",null,"31"),"\xa0- 1). Only used if copy_number_variation, copy_number_loss, or copy_number_gain.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"validated"),(0,i.kt)("td",{parentName:"tr",align:null},"boolean"),(0,i.kt)("td",{parentName:"tr",align:null})),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"phenotypes"),(0,i.kt)("td",{parentName:"tr",align:null},"string array"),(0,i.kt)("td",{parentName:"tr",align:null},"Description of the phenotype.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"phenotypeIds"),(0,i.kt)("td",{parentName:"tr",align:null},"string array"),(0,i.kt)("td",{parentName:"tr",align:null},"Description of the phenotype IDs.")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"reciprocalOverlap"),(0,i.kt)("td",{parentName:"tr",align:null},"floating point"),(0,i.kt)("td",{parentName:"tr",align:null},"Range: 0 - 1. E.g. 0.57 would indicate a 57% reciprocal overlap. Specified up to 5 decimal places (Not reported for Insertions).")))),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"clinicalInterpretation")),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"benign"),(0,i.kt)("li",{parentName:"ul"},"curated benign"),(0,i.kt)("li",{parentName:"ul"},"curated pathogenic"),(0,i.kt)("li",{parentName:"ul"},"likely benign"),(0,i.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"path gain"),(0,i.kt)("li",{parentName:"ul"},"path loss"),(0,i.kt)("li",{parentName:"ul"},"pathogenic"),(0,i.kt)("li",{parentName:"ul"},"uncertain")))}c.isMDXComponent=!0},73490:(e,t,n)=>{n.r(t),n.d(t,{contentTitle:()=>d,default:()=>g,frontMatter:()=>s,metadata:()=>p,toc:()=>c});var a=n(87462),i=(n(67294),n(3905)),l=n(76541),r=n(33826),o=n(73791);const s={title:"ClinGen"},d=void 0,p={unversionedId:"data-sources/clingen",id:"version-3.21/data-sources/clingen",title:"ClinGen",description:"Overview",source:"@site/versioned_docs/version-3.21/data-sources/clingen.mdx",sourceDirName:"data-sources",slug:"/data-sources/clingen",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/clingen",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/versioned_docs/version-3.21/data-sources/clingen.mdx",tags:[],version:"3.21",frontMatter:{title:"ClinGen"},sidebar:"docs",previous:{title:"Cancer Hotspots",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/cancer-hotspots"},next:{title:"ClinVar",permalink:"/IlluminaConnectedAnnotationsDocumentation/3.21/data-sources/clinvar"}},c=[{value:"Overview",id:"overview",children:[],level:2},{value:"ISCA Regions",id:"isca-regions",children:[{value:"TSV Extraction",id:"tsv-extraction",children:[{value:"Status levels",id:"status-levels",children:[],level:4},{value:"Parsing",id:"parsing",children:[],level:4}],level:3}],level:2},{value:"Conflict Resolution",id:"conflict-resolution",children:[{value:"Clinical significance priority",id:"clinical-significance-priority",children:[],level:3},{value:"Validation Priority",id:"validation-priority",children:[],level:3},{value:"Download URL",id:"download-url",children:[],level:3},{value:"JSON Output",id:"json-output",children:[],level:3}],level:2},{value:"Dosage Sensitivity Map",id:"dosage-sensitivity-map",children:[{value:"TSV Source files",id:"tsv-source-files",children:[],level:3},{value:"Dosage Rating System",id:"dosage-rating-system",children:[],level:3},{value:"Download URL",id:"download-url-1",children:[],level:3},{value:"JSON Output",id:"json-output-1",children:[],level:3},{value:"Building the supplementary files",id:"building-the-supplementary-files",children:[],level:3}],level:2},{value:"Gene-Disease Validity",id:"gene-disease-validity",children:[{value:"Source TSV",id:"source-tsv",children:[],level:3},{value:"Download URL",id:"download-url-2",children:[],level:3},{value:"Conflict Resolution",id:"conflict-resolution-1",children:[{value:"Multiple Classifications",id:"multiple-classifications",children:[],level:4},{value:"Multiple Dates",id:"multiple-dates",children:[],level:4}],level:3},{value:"JSON Output",id:"json-output-2",children:[],level:3},{value:"Building the supplementary files",id:"building-the-supplementary-files-1",children:[],level:3}],level:2}],u={toc:c},m="wrapper";function g(e){let{components:t,...n}=e;return(0,i.kt)(m,(0,a.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,i.kt)("h2",{id:"overview"},"Overview"),(0,i.kt)("p",null,"ClinGen is a National Institutes of Health (NIH)-funded resource dedicated to building a central resource that defines the clinical relevance of genes and variants for use in precision medicine and research."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Heidi L. Rehm, Ph.D., Jonathan S. Berg, M.D., Ph.D., Lisa D. Brooks, Ph.D., Carlos D. Bustamante, Ph.D., James P. Evans, M.D., Ph.D., Melissa J. Landrum, Ph.D., David H. Ledbetter, Ph.D., Donna R. Maglott, Ph.D., Christa Lese Martin, Ph.D., Robert L. Nussbaum, M.D., Sharon E. Plon, M.D., Ph.D., Erin M. Ramos, Ph.D., Stephen T. Sherry, Ph.D., and Michael S. Watson, Ph.D., for ClinGen. ",(0,i.kt)("strong",{parentName:"p"},"ClinGen The Clinical Genome Resource.")," ",(0,i.kt)("em",{parentName:"p"},"N Engl J Med 2015; 372:2235-2242 June 4, 2015 DOI: 10.1056/NEJMsr1406261.")))),(0,i.kt)("h2",{id:"isca-regions"},"ISCA Regions"),(0,i.kt)("h3",{id:"tsv-extraction"},"TSV Extraction"),(0,i.kt)("p",null,"ClinGen contains only copy number variation variants, since the coordinates in ClinGen original file follow the same rule as BED format, the coordinates had to be adjusted to ","[BEGIN+1, END]","."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#bin chrom chromStart chromEnd name score strand thickStart thickEnd attrCount attrTags attrVals\nnsv530705 1 564405 8597804 0 1 copy_number_loss pathogenic False Developmental delay AND/OR other significant developmental or morphological phenotypes\nnsv530706 1 564424 3262790 0 1 copy_number_loss pathogenic False Abnormal facial shape,Abnormality of cardiac morphology,Global developmental delay,Muscular hypotonia HP:0001252,HP:0001263,HP:0001627,HP:0001999,MedGen:CN001147,MedGen:CN001157,MedGen:CN001482,MedGen:CN001810\nnsv530707 1 564424 7068738 0 1 copy_number_loss pathogenic False Abnormality of cardiac morphology,Cleft upper lip,Failure to thrive,Global developmental delay,Intrauterine growth retardation,Microcephaly,Short stature HP:0000204,HP:0000252,HP:0001263,HP:0001508,HP:0001511,HP:0001627,HP:0004322,MedGen:C0349588,MedGen:C1845868,MedGen:C1853481,MedGen:C2364119,MedGen:CN000197,MedGen:CN001157,MedGen:CN001482\nnsv533512 1 564435 649748 0 1 copy_number_loss benign False Developmental delay AND/OR other significant developmental or morphological phenotypes\nnsv931338 1 714078 4958499 0 1 copy_number_loss pathogenic False Developmental delay AND/OR other significant developmental or morphological phenotypes\nnsv530300 1 728138 5066371 1 0 copy_number_gain pathogenic False Abnormality of cardiac morphology,Cleft palate,Global developmental delay HP:0000175,HP:0001263,HP:0001627,MedGen:C2240378,MedGen:CN001157,MedGen:CN001482\n")),(0,i.kt)("h4",{id:"status-levels"},"Status levels"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"benign"),(0,i.kt)("li",{parentName:"ul"},"curated benign"),(0,i.kt)("li",{parentName:"ul"},"curated pathogenic"),(0,i.kt)("li",{parentName:"ul"},"likely benign"),(0,i.kt)("li",{parentName:"ul"},"likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"path gain"),(0,i.kt)("li",{parentName:"ul"},"path loss"),(0,i.kt)("li",{parentName:"ul"},"pathogenic"),(0,i.kt)("li",{parentName:"ul"},"uncertain")),(0,i.kt)("h4",{id:"parsing"},"Parsing"),(0,i.kt)("p",null,"We parse the ClinGen tsv file and extract the following:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"chrom"),(0,i.kt)("li",{parentName:"ul"},"chromStart (note this a 0-based coordinate)"),(0,i.kt)("li",{parentName:"ul"},"chromEnd"),(0,i.kt)("li",{parentName:"ul"},"attrTags"),(0,i.kt)("li",{parentName:"ul"},"attrVals")),(0,i.kt)("p",null,(0,i.kt)("inlineCode",{parentName:"p"},"attrTags")," and ",(0,i.kt)("inlineCode",{parentName:"p"},"attrVals")," are comma separated lists. ",(0,i.kt)("inlineCode",{parentName:"p"},"attrTags")," contains the field keys and ",(0,i.kt)("inlineCode",{parentName:"p"},"attrVals")," contains the field values. We will parse the following keys from the two fields:"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"parent (this will be used as the ID in our JSON output)"),(0,i.kt)("li",{parentName:"ul"},"clinical_int"),(0,i.kt)("li",{parentName:"ul"},"validated"),(0,i.kt)("li",{parentName:"ul"},"phenotype (this should be a string array)"),(0,i.kt)("li",{parentName:"ul"},"phenotype_id (this should be a string array)")),(0,i.kt)("p",null,"Observed losses and observed gains will be calculated from entries that share a common parent ID."),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"variants with a common parent ID and same coordinates are grouped",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"calculated observed losses, observed gains for each group"),(0,i.kt)("li",{parentName:"ul"},"Clinical significance and validation status are collapsed using the priority strategy described below"))),(0,i.kt)("li",{parentName:"ul"},"Variants with the same parent ID can have different coordinates (mapped to hg38)",(0,i.kt)("ul",{parentName:"li"},(0,i.kt)("li",{parentName:"ul"},"nsv491508 : chr14:105583663-106881350 and chr14:105605043-106766076 (only one example)"),(0,i.kt)("li",{parentName:"ul"},"we kept both variants")))),(0,i.kt)("h2",{id:"conflict-resolution"},"Conflict Resolution"),(0,i.kt)("h3",{id:"clinical-significance-priority"},"Clinical significance priority"),(0,i.kt)("p",null,"When there are a mixture of variants belonging to the same parent ID, we will choose the most pathogenic clinical significance from the available values. i.e. if 3 samples were deemed pathogenic and 2 samples were likely pathogenic, we would list the variant as pathogenic."),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Priority")," (high to low)"),(0,i.kt)("ul",null,(0,i.kt)("li",{parentName:"ul"},"Priority"),(0,i.kt)("li",{parentName:"ul"},"Pathogenic"),(0,i.kt)("li",{parentName:"ul"},"Likely pathogenic"),(0,i.kt)("li",{parentName:"ul"},"Benign"),(0,i.kt)("li",{parentName:"ul"},"Likely benign"),(0,i.kt)("li",{parentName:"ul"},"Uncertain significance")),(0,i.kt)("h3",{id:"validation-priority"},"Validation Priority"),(0,i.kt)("p",null,"When there are a mixture of variants belonging to same parent ID, we will set the validation status to true if any of the variants were validated."),(0,i.kt)("h3",{id:"download-url"},"Download URL"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"https://cirm.ucsc.edu/cgi-bin/hgTrackUi?db=hg19&g=iscaComposite"},"https://cirm.ucsc.edu/cgi-bin/hgTrackUi?db=hg19&g=iscaComposite")),(0,i.kt)("h3",{id:"json-output"},"JSON Output"),(0,i.kt)(l.default,{mdxType:"CLINGENJSON"}),(0,i.kt)("h2",{id:"dosage-sensitivity-map"},"Dosage Sensitivity Map"),(0,i.kt)("p",null,"The Clinical Genome Resource (ClinGen) consortium is curating genes and regions of the genome to assess whether there is evidence to support that these genes/regions are dosage sensitive and should be targeted on a cytogenomic array. Nirvana reports these annotations for overlapping SVs."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Riggs ER, Nelson T, Merz A, Ackley T, Bunke B, Collins CD, Collinson MN, Fan YS, Goodenberger ML, Golden DM, Haglund-Hazy L, Krgovic D, Lamb AN, Lewis Z, Li G, Liu Y, Meck J, Neufeld-Kaiser W, Runke CK, Sanmann JN, Stavropoulos DJ, Strong E, Su M, Tayeh MK, Kokalj Vokac N, Thorland EC, Andersen E, Martin CL. ",(0,i.kt)("strong",{parentName:"p"},"Copy number variant discrepancy resolution using the ClinGen dosage sensitivity map results in updated clinical interpretations in ClinVar.")," ",(0,i.kt)("em",{parentName:"p"},"Hum Mutat. 2018 Nov;39(11):1650-1659. doi: 10.1002/humu.23610. PMID: 30095202; PMCID: PMC7374944.")))),(0,i.kt)("h3",{id:"tsv-source-files"},"TSV Source files"),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Regions")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#ClinGen Region Curation Results\n#07 May,2019\n#Genomic Locations are reported on GRCh38 (hg38): GCF_000001405.36\n#https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen\n#to create link: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/clingen_region.cgi?id=key\n#ISCA ID ISCA Region Name cytoBand Genomic Location Haploinsufficiency Score Haploinsufficiency Description Haploinsufficiency PMID1 Haploinsufficiency PMID2 Haploinsufficiency PMID3 Triplosensitivity Score Triplosensitivity Description Triplosensitivity PMID1 Triplosensitivity PMID2 Triplosensitivity PMID3 Date Last Evaluated Loss phenotype OMIM ID Triplosensitive phenotype OMIM ID\nISCA-46299 Xp11.22 region (includes HUWE1) Xp11.22 tbd 0 No evidence available 3 Sufficient evidence for dosage pathogenicity 22840365 20655035 26692240 2018-11-19\nISCA-46295 15q13.3 recurrent region (D-CHRNA7 to BP5) (includes CHRNA7 and OTUD7A) 15q13.3 chr15:31727418-32153204 3 Sufficient evidence for dosage pathogenicity 19898479 20236110 22775350 40 Dosage sensitivity unlikely 26968334 22420048 2018-05-10\nISCA-46291 7q11.23 recurrent distal region (includes HIP1, YWHAG) 7q11.23 chr7:75528718-76433859 2 Some evidence for dosage pathogenicity 21109226 16971481 1 Little evidence for dosage pathogenicity 21109226 27867344 2018-12-31\nISCA-46290 Xp11.22p11.23 recurrent region (includes SHROOM4) Xp11.22-p11.23 chrX: 48447780-52444264 0 No evidence available 3 Sufficient evidence for dosage pathogenicity 19716111 21418194 25425167 2017-12-14 300801\n")),(0,i.kt)("p",null,(0,i.kt)("strong",{parentName:"p"},"Genes")),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"#ClinGen Gene Curation Results\n#24 May,2019\n#Genomic Locations are reported on GRCh37 (hg19): GCF_000001405.13\n#https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen\n#to create link: https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/clingen_gene.cgi?sym=Gene Symbol\n#Gene Symbol Gene ID cytoBand Genomic Location Haploinsufficiency Score Haploinsufficiency Description Haploinsufficiency PMID1 Haploinsufficiency PMID2 Haploinsufficiency PMID3 Triplosensitivity Score Triplosensitivity Description Triplosensitivity PMID1 Triplosensitivity PMID2 Triplosensitivity PMID3 Date Last Evaluated Loss phenotype OMIM ID Triplosensitive phenotype OMIM ID\nA4GALT 53947 22q13.2 chr22:43088121-43117307 30 Gene associated with autosomal recessive phenotype 0 No evidence available 2014-12-11 111400\nAAGAB 79719 15q23 chr15:67493013-67547536 3 Sufficient evidence for dosage pathogenicity 23064416 23000146 0 No evidence available 2013-02-28 148600\n")),(0,i.kt)("h3",{id:"dosage-rating-system"},"Dosage Rating System"),(0,i.kt)("table",null,(0,i.kt)("thead",{parentName:"table"},(0,i.kt)("tr",{parentName:"thead"},(0,i.kt)("th",{parentName:"tr",align:null},"Rating"),(0,i.kt)("th",{parentName:"tr",align:null},"Possible Clinical Interpretation"))),(0,i.kt)("tbody",{parentName:"table"},(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"0"),(0,i.kt)("td",{parentName:"tr",align:null},"No evidence to suggest that dosage sensitivity is associated with clinical phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"1"),(0,i.kt)("td",{parentName:"tr",align:null},"Little evidence suggesting dosage sensitivity is associated with clinical phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"2"),(0,i.kt)("td",{parentName:"tr",align:null},"Emerging evidence suggesting dosage sensitivity is associated with clinical phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"3"),(0,i.kt)("td",{parentName:"tr",align:null},"Sufficient evidence suggesting dosage sensitivity is associated with clinical phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"30"),(0,i.kt)("td",{parentName:"tr",align:null},"Gene associated with autosomal recessive phenotype")),(0,i.kt)("tr",{parentName:"tbody"},(0,i.kt)("td",{parentName:"tr",align:null},"40"),(0,i.kt)("td",{parentName:"tr",align:null},"Dosage sensitivity unlikely")))),(0,i.kt)("p",null,"Reference: ",(0,i.kt)("a",{parentName:"p",href:"https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/help.shtml"},"https://www.ncbi.nlm.nih.gov/projects/dbvar/clingen/help.shtml")),(0,i.kt)("h3",{id:"download-url-1"},"Download URL"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"ftp://ftp.clinicalgenome.org/"},"ftp://ftp.clinicalgenome.org/")),(0,i.kt)("h3",{id:"json-output-1"},"JSON Output"),(0,i.kt)(r.default,{mdxType:"ClinGenDosageJson"}),(0,i.kt)("h3",{id:"building-the-supplementary-files"},"Building the supplementary files"),(0,i.kt)("p",null,"The gene dosage sensitivity ",(0,i.kt)("inlineCode",{parentName:"p"},".nga")," for Nirvana can be built using the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's ",(0,i.kt)("inlineCode",{parentName:"p"},"DosageSensitivity")," subcommand. The required data file is ",(0,i.kt)("inlineCode",{parentName:"p"},"ClinGen_gene_curation_list_{ASSEMBLY}.tsv")," (url provided above) and its associated ",(0,i.kt)("inlineCode",{parentName:"p"},".version")," file."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"NAME=ClinGen Dosage Sensitivity Map\nVERSION=20211201\nDATE=2021-12-01\nDESCRIPTION=Dosage sensitivity map from ClinGen (dbVar)\n")),(0,i.kt)("p",null,"Here is a sample run:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet NirvanaBuild/SAUtils.dll DosageSensitivity\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll dosagesensitivity [options]\nCreates a gene annotation database from dbVar data\n\nOPTIONS:\n --tsv, -t input tsv file\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\ndotnet NirvanaBuild/SAUtils.dll DosageSensitivity --out SupplementaryDatabase/64/GRCh37 --tsv ClinGen_gene_curation_list_GRCh37.tsv\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\n\nTime: 00:00:00.1\n")),(0,i.kt)("p",null,"For building the ",(0,i.kt)("inlineCode",{parentName:"p"},".nsi")," files, we use the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's ",(0,i.kt)("inlineCode",{parentName:"p"},"DosageMapRegions")," subcommand. The required data file is ",(0,i.kt)("inlineCode",{parentName:"p"},"ClinGen_region_curation_list_{ASSEMBLY}.tsv")," (url provided above) and its associated ",(0,i.kt)("inlineCode",{parentName:"p"},".version")," file."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"NAME=ClinGen Dosage Sensitivity Map\nVERSION=20211201\nDATE=2021-12-01\nDESCRIPTION=Dosage sensitivity map from ClinGen (dbVar)\n")),(0,i.kt)("p",null,"Here is a sample run:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"dotnet NirvanaBuild/SAUtils.dll DosageMapRegions \n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll dosagemapregions [options]\nCreates an interval annotation database from dbVar data\n\nOPTIONS:\n --tsv, -t input tsv file\n --ref, -r input reference filename\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\ndotnet NirvanaBuild/SAUtils.dll DosageMapRegions --out SupplementaryDatabase/64/GRCh37 --ref References/7/Homo_sapiens.GRCh37.Nirvana.dat --tsv ClinGen_region_curation_list_GRCh37.tsv\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\nWriting 505 intervals to database...\n\nTime: 00:00:00.1\n")),(0,i.kt)("h2",{id:"gene-disease-validity"},"Gene-Disease Validity"),(0,i.kt)("p",null,"The ClinGen Gene-Disease Clinical Validity curation process involves evaluating the strength of evidence supporting or refuting a claim that variation in a particular gene causes a particular disease. Nirvana reports these annotations for genes in the genes section of the JSON."),(0,i.kt)("div",{className:"admonition admonition-info alert alert--info"},(0,i.kt)("div",{parentName:"div",className:"admonition-heading"},(0,i.kt)("h5",{parentName:"div"},(0,i.kt)("span",{parentName:"h5",className:"admonition-icon"},(0,i.kt)("svg",{parentName:"span",xmlns:"http://www.w3.org/2000/svg",width:"14",height:"16",viewBox:"0 0 14 16"},(0,i.kt)("path",{parentName:"svg",fillRule:"evenodd",d:"M7 2.3c3.14 0 5.7 2.56 5.7 5.7s-2.56 5.7-5.7 5.7A5.71 5.71 0 0 1 1.3 8c0-3.14 2.56-5.7 5.7-5.7zM7 1C3.14 1 0 4.14 0 8s3.14 7 7 7 7-3.14 7-7-3.14-7-7-7zm1 3H6v5h2V4zm0 6H6v2h2v-2z"}))),"Publication")),(0,i.kt)("div",{parentName:"div",className:"admonition-content"},(0,i.kt)("p",{parentName:"div"},"Strande NT, Riggs ER, Buchanan AH, et al. ",(0,i.kt)("strong",{parentName:"p"},"Evaluating the Clinical Validity of Gene-Disease Associations: An Evidence-Based Framework Developed by the Clinical Genome Resource.")," ",(0,i.kt)("em",{parentName:"p"},"Am J Hum Genet. 2017;100(6):895-906. doi:10.1016/j.ajhg.2017.04.015")))),(0,i.kt)("h3",{id:"source-tsv"},"Source TSV"),(0,i.kt)("p",null,"The source data comes in a CSV file that we convert to a TSV."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"CLINGEN GENE VALIDITY CURATIONS\nFILE CREATED: 2019-05-28\nWEBPAGE: https://search.clinicalgenome.org/kb/gene-validity\n+++++++++++,++++++++++++++,+++++++++++++,++++++++++++++++++,+++++++++,++++++++++++++,+++++++++++++,+++++++++++++++++++\nGENE SYMBOL,GENE ID (HGNC),DISEASE LABEL,DISEASE ID (MONDO),SOP,CLASSIFICATION,ONLINE REPORT,CLASSIFICATION DATE\n+++++++++++,++++++++++++++,+++++++++++++,++++++++++++++++++,+++++++++,++++++++++++++,+++++++++++++,+++++++++++++++++++\nA2ML1,HGNC:23336,Noonan syndrome with multiple lentigines,MONDO_0007893,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/59b87033-dd91-4f1e-aec1-c9b1f5124b16--2018-06-07T14:37:47,2018-06-07T14:37:47.175Z\nA2ML1,HGNC:23336,cardiofaciocutaneous syndrome,MONDO_0015280,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/fc3c41d8-8497-489b-a350-c9e30016bc6a--2018-06-07T14:31:03,2018-06-07T14:31:03.696Z\nA2ML1,HGNC:23336,Costello syndrome,MONDO_0009026,SOP5,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/ea72ba8d-cf62-44bc-86be-da64e3848eba--2018-06-07T14:34:05,2018-06-07T14:34:05.324Z\n")),(0,i.kt)("h3",{id:"download-url-2"},"Download URL"),(0,i.kt)("p",null,(0,i.kt)("a",{parentName:"p",href:"https://search.clinicalgenome.org/kb/downloads#section_gene-disease-validity"},"https://search.clinicalgenome.org/kb/downloads#section_gene-disease-validity")),(0,i.kt)("h3",{id:"conflict-resolution-1"},"Conflict Resolution"),(0,i.kt)("h4",{id:"multiple-classifications"},"Multiple Classifications"),(0,i.kt)("p",null,"Here is an example of multiple classifications."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"$ grep MONDO_0010192 ClinGen-Gene-Disease-Summary-2019-12-02.csv | grep EDNRB\nEDNRB,HGNC:3180,Waardenburg syndrome type 4A,MONDO_0010192,SOP6,Moderate,https://search.clinicalgenome.org/kb/gene-validity/d7abbd45-7915-437b-849b-dea876bfc2f5--2018-05-08T04:00:00,2018-05-08T04:00:00.000Z\nEDNRB,HGNC:3180,Waardenburg syndrome type 4A,MONDO_0010192,SOP6,Limited,https://search.clinicalgenome.org/kb/gene-validity/73ee9727-60c1-40fd-830f-08c2b513d2ee--2018-05-08T04:00:00,2018-05-08T04:00:00.000Z\n")),(0,i.kt)("p",null,"In such cases, we select the more severe classification."),(0,i.kt)("h4",{id:"multiple-dates"},"Multiple Dates"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"$ grep MONDO_0016419 ClinGen-Gene-Disease-Summary-2019-12-02.csv | grep MUTYH\nMUTYH,HGNC:7527,hereditary breast carcinoma,MONDO_0016419,SOP4,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/9904,2017-05-24T00:00:00\nMUTYH,HGNC:7527,hereditary breast carcinoma,MONDO_0016419,SOP4,No Reported Evidence,https://search.clinicalgenome.org/kb/gene-validity/9902,2017-05-25T00:00:00\n")),(0,i.kt)("p",null,"If the classifications are the same, we should select the latest classification date."),(0,i.kt)("h3",{id:"json-output-2"},"JSON Output"),(0,i.kt)(o.default,{mdxType:"ClinGenGeneValidity"}),(0,i.kt)("h3",{id:"building-the-supplementary-files-1"},"Building the supplementary files"),(0,i.kt)("p",null,"The gene disease validity ",(0,i.kt)("inlineCode",{parentName:"p"},".nga")," for Nirvana can be built using the ",(0,i.kt)("inlineCode",{parentName:"p"},"SAUtils")," command's ",(0,i.kt)("inlineCode",{parentName:"p"},"DiseaseValidity")," subcommand. The only required data file is ",(0,i.kt)("inlineCode",{parentName:"p"},"Clingen-Gene-Disease-Summary-2021-12-01.tsv")," (url provided above) and its associated ",(0,i.kt)("inlineCode",{parentName:"p"},".version")," file."),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"},"NAME=ClinGen disease validity curations\nVERSION=20211201\nDATE=2021-12-01\nDESCRIPTION=Disease validity curations from ClinGen (dbVar)\n")),(0,i.kt)("p",null,"Here is a sample run:"),(0,i.kt)("pre",null,(0,i.kt)("code",{parentName:"pre",className:"language-scss"}," dotnet NirvanaBuild/SAUtils.dll DiseaseValidity\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\nUSAGE: dotnet SAUtils.dll diseasevalidity [options]\nCreates a gene annotation database from ClinGen gene validity data\n\nOPTIONS:\n --csv, -i ClinGen gene validity file path\n --cache, -c \n input cache directory\n --ref, -r input reference filename\n --out, -o output directory\n --help, -h displays the help menu\n --version, -v displays the version\n\ndotnet NirvanaBuild/SAUtils.dll DiseaseValidity --tsv Clingen-Gene-Disease-Summary-2021-12-01.tsv \\\\\n--uga Cache --out SupplementaryDatabase\n---------------------------------------------------------------------------\nSAUtils (c) 2023 Illumina, Inc.\nStromberg, Roy, Platzer, Siddiqui, Ouyang, et al 3.21.0-0-gd2a0e953\n---------------------------------------------------------------------------\n\nNumber of geneIds missing from the cache:0 (0%)\n\nTime: 00:00:00.2\n")))}g.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/main.0de008ff.js b/assets/js/main.0de008ff.js new file mode 100644 index 00000000..c3551493 --- /dev/null +++ b/assets/js/main.0de008ff.js @@ -0,0 +1,2 @@ +/*! For license information please see main.0de008ff.js.LICENSE.txt */ +(self.webpackChunknirvana_documentation=self.webpackChunknirvana_documentation||[]).push([[179],{830:(e,t,n)=>{"use strict";n.d(t,{W:()=>o});var r=n(7294);function o(){return r.createElement("svg",{width:"20",height:"20",className:"DocSearch-Search-Icon",viewBox:"0 0 20 20"},r.createElement("path",{d:"M14.386 14.386l4.0877 4.0877-4.0877-4.0877c-2.9418 2.9419-7.7115 2.9419-10.6533 0-2.9419-2.9418-2.9419-7.7115 0-10.6533 2.9418-2.9419 7.7115-2.9419 10.6533 0 2.9419 2.9418 2.9419 7.7115 0 10.6533z",stroke:"currentColor",fill:"none",fillRule:"evenodd",strokeLinecap:"round",strokeLinejoin:"round"}))}},9782:(e,t,n)=>{"use strict";n.r(t),n.d(t,{default:()=>r});const r={title:"IlluminaConnectedAnnotations",tagline:"Translational researcy-grade variant annotation",url:"https://illumina.github.io",baseUrl:"/IlluminaConnectedAnnotationsDocumentation/",onBrokenLinks:"throw",favicon:"img/favicon.ico",organizationName:"illumina",projectName:"IlluminaConnectedAnnotationsDocumentation",themeConfig:{gtag:{trackingID:"G-5KXNW9LCD7"},algolia:{apiKey:"e908c17192dca08b01d9d994b576335b",indexName:"illumina_nirvana",contextualSearch:!0,appId:"BH4D9OD16A",searchParameters:{}},colorMode:{defaultMode:"light",disableSwitch:!0,respectPrefersColorScheme:!1,switchConfig:{darkIcon:"\ud83c\udf1c",darkIconStyle:{},lightIcon:"\ud83c\udf1e",lightIconStyle:{}}},navbar:{logo:{src:"img/ICAnnotations.png"},items:[{type:"docsVersionDropdown",position:"right",dropdownActiveClassDisabled:!0,dropdownItemsAfter:[{to:"/versions",label:"All versions"}],dropdownItemsBefore:[]}],hideOnScroll:!1},footer:{style:"dark",copyright:"\xa9 2023 Illumina, Inc. All rights reserved.",links:[]},docs:{versionPersistence:"localStorage"},metadata:[],prism:{additionalLanguages:[]},hideableSidebar:!1,tableOfContents:{minHeadingLevel:2,maxHeadingLevel:3}},stylesheets:["https://fonts.googleapis.com/css2?family=Open+Sans&family=Raleway&family=Source+Code+Pro&display=swap"],presets:[["@docusaurus/preset-classic",{docs:{routeBasePath:"/",sidebarPath:"/Users/zli3/Workspace/IlluminaConnectedAnnotationsDocumentation/sidebars.js",editUrl:"https://github.com/Illumina/IlluminaConnectedAnnotationsDocumentation/edit/master/",lastVersion:"current",onlyIncludeVersions:["current"],versions:{current:{label:"3.22 (unreleased)"}}},theme:{customCss:"/Users/zli3/Workspace/IlluminaConnectedAnnotationsDocumentation/src/css/custom.css"}}]],baseUrlIssueBanner:!0,i18n:{defaultLocale:"en",locales:["en"],localeConfigs:{}},onBrokenMarkdownLinks:"warn",onDuplicateRoutes:"warn",staticDirectories:["static"],customFields:{},plugins:[],themes:[],titleDelimiter:"|",noIndex:!1}},2067:(e,t,n)=>{"use strict";var r=n(7294),o=n(3935),a=n(3727),i=n(8356),l=n.n(i);function s(e){let{error:t,retry:n,pastDelay:o}=e;return t?r.createElement("div",{style:{align:"center",color:"#fff",backgroundColor:"#fa383e",borderColor:"#fa383e",borderStyle:"solid",borderRadius:"0.25rem",borderWidth:"1px",boxSizing:"border-box",display:"block",padding:"1rem",flex:"0 0 50%",marginLeft:"25%",marginRight:"25%",marginTop:"5rem",maxWidth:"50%",width:"100%"}},r.createElement("p",null,t.message),r.createElement("div",null,r.createElement("button",{type:"button",onClick:n},"Retry"))):o?r.createElement("div",{style:{display:"flex",justifyContent:"center",alignItems:"center",height:"100vh"}},r.createElement("svg",{id:"loader",style:{width:128,height:110,position:"absolute",top:"calc(100vh - 64%)"},viewBox:"0 0 45 45",xmlns:"http://www.w3.org/2000/svg",stroke:"#61dafb"},r.createElement("g",{fill:"none",fillRule:"evenodd",transform:"translate(1 1)",strokeWidth:"2"},r.createElement("circle",{cx:"22",cy:"22",r:"6",strokeOpacity:"0"},r.createElement("animate",{attributeName:"r",begin:"1.5s",dur:"3s",values:"6;22",calcMode:"linear",repeatCount:"indefinite"}),r.createElement("animate",{attributeName:"stroke-opacity",begin:"1.5s",dur:"3s",values:"1;0",calcMode:"linear",repeatCount:"indefinite"}),r.createElement("animate",{attributeName:"stroke-width",begin:"1.5s",dur:"3s",values:"2;0",calcMode:"linear",repeatCount:"indefinite"})),r.createElement("circle",{cx:"22",cy:"22",r:"6",strokeOpacity:"0"},r.createElement("animate",{attributeName:"r",begin:"3s",dur:"3s",values:"6;22",calcMode:"linear",repeatCount:"indefinite"}),r.createElement("animate",{attributeName:"stroke-opacity",begin:"3s",dur:"3s",values:"1;0",calcMode:"linear",repeatCount:"indefinite"}),r.createElement("animate",{attributeName:"stroke-width",begin:"3s",dur:"3s",values:"2;0",calcMode:"linear",repeatCount:"indefinite"})),r.createElement("circle",{cx:"22",cy:"22",r:"8"},r.createElement("animate",{attributeName:"r",begin:"0s",dur:"1.5s",values:"6;1;2;3;4;5;6",calcMode:"linear",repeatCount:"indefinite"}))))):null}const u=JSON.parse('{"/IlluminaConnectedAnnotationsDocumentation/blog/archive-192":{"component":"9e4087bc","archive":"7674fa56"},"/IlluminaConnectedAnnotationsDocumentation/search-f66":{"component":"d0cd84a1"},"/IlluminaConnectedAnnotationsDocumentation/versions-4b9":{"component":"18b93cb3","config":"5e9f5e1a"},"/IlluminaConnectedAnnotationsDocumentation/-cf7":{"component":"1be78505","versionMetadata":"935f2afb"},"/IlluminaConnectedAnnotationsDocumentation/-0a5":{"component":"17896441","content":"ef4059aa"},"/IlluminaConnectedAnnotationsDocumentation/core-functionality/canonical-transcripts-0f3":{"component":"17896441","content":"463e69e4"},"/IlluminaConnectedAnnotationsDocumentation/core-functionality/gene-fusions-9a5":{"component":"17896441","content":"e95cadfe"},"/IlluminaConnectedAnnotationsDocumentation/core-functionality/transcript-consequence-impacts-572":{"component":"17896441","content":"75a3a2eb"},"/IlluminaConnectedAnnotationsDocumentation/core-functionality/variant-ids-8d0":{"component":"17896441","content":"a5e136a1"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes-6a1":{"component":"17896441","content":"a9ecceb6"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes-snv-json-c39":{"component":"17896441","content":"9620026c"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes-sv-json-4fd":{"component":"17896441","content":"440d17b3"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/amino-acid-conservation-d35":{"component":"17896441","content":"7b3bfa5e"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/amino-acid-conservation-json-8bf":{"component":"17896441","content":"a8504dcf"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/cancer-hotspots-b95":{"component":"17896441","content":"9a946f68"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-fa4":{"component":"17896441","content":"771fd362"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-dosage-json-58e":{"component":"17896441","content":"abda0f14"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-gene-validity-json-547":{"component":"17896441","content":"82e726f2"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-json-286":{"component":"17896441","content":"b4210c11"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/clinvar-bed":{"component":"17896441","content":"cd35fae7"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/clinvar-json-d9e":{"component":"17896441","content":"7bc16216"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-0af":{"component":"17896441","content":"08a089c6"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-cancer-gene-census-bb8":{"component":"17896441","content":"666ea911"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-gene-fusion-json-094":{"component":"17896441","content":"4397ec05"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-json-cbc":{"component":"17896441","content":"b6dcd8b7"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/dann-a22":{"component":"17896441","content":"988d0ae8"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/dann-json-5bd":{"component":"17896441","content":"57cffed1"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/dbsnp-bd6":{"component":"17896441","content":"18946b76"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/dbsnp-json-f1d":{"component":"17896441","content":"a8da062f"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/decipher-569":{"component":"17896441","content":"5dd9300a"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/decipher-json-f45":{"component":"17896441","content":"0be5de6c"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/fusioncatcher-4e3":{"component":"17896441","content":"cd0802b4"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/fusioncatcher-json-774":{"component":"17896441","content":"601929e3"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/gerp-03f":{"component":"17896441","content":"f262a5f6"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/gerp-json-c31":{"component":"17896441","content":"539175fb"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/gme-95e":{"component":"17896441","content":"07bac56e"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/gme-json-45b":{"component":"17896441","content":"f98a4229"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-6b3":{"component":"17896441","content":"98bbf06c"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-lof-json-249":{"component":"17896441","content":"833bd66e"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-small-variants-json-3d5":{"component":"17896441","content":"8ae16000"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-structural-variants-data_description-1e7":{"component":"17896441","content":"85047af6"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-structural-variants-json-57f":{"component":"17896441","content":"e39dd739"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/mito-heteroplasmy-068":{"component":"17896441","content":"5d1e2784"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap-540":{"component":"17896441","content":"5d851e34"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap-small-variants-json-3d1":{"component":"17896441","content":"0bd2af6a"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap-structural-variants-json-00d":{"component":"17896441","content":"494b7fcc"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/omim-7c3":{"component":"17896441","content":"5b7bb28d"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/omim-json-83f":{"component":"17896441","content":"644aa76c"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/phylop-3ef":{"component":"17896441","content":"a26ba82d"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/phylop-json-98b":{"component":"17896441","content":"a2ab8500"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/primate-ai-fc4":{"component":"17896441","content":"915fca76"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/primate-ai-json-3e2":{"component":"17896441","content":"34e55124"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/revel-172":{"component":"17896441","content":"b51ccab7"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/revel-json-997":{"component":"17896441","content":"42c73b29"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/splice-ai-7de":{"component":"17896441","content":"ba2982bf"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/splice-ai-json-1d4":{"component":"17896441","content":"191d3c1c"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/topmed-ea7":{"component":"17896441","content":"51ec9460"},"/IlluminaConnectedAnnotationsDocumentation/data-sources/topmed-json-014":{"component":"17896441","content":"cd8220b1"},"/IlluminaConnectedAnnotationsDocumentation/file-formats/custom-annotations-4b4":{"component":"17896441","content":"e286457f"},"/IlluminaConnectedAnnotationsDocumentation/file-formats/illumina-annotator-json-file-format-76b":{"component":"17896441","content":"b2e466e8"},"/IlluminaConnectedAnnotationsDocumentation/introduction/dependencies-1c5":{"component":"17896441","content":"f7e8c160"},"/IlluminaConnectedAnnotationsDocumentation/introduction/getting-started-ab0":{"component":"17896441","content":"f048ed9e"},"/IlluminaConnectedAnnotationsDocumentation/introduction/parsing-json-67e":{"component":"17896441","content":"e1e7c361"},"/IlluminaConnectedAnnotationsDocumentation/utilities/jasix-b46":{"component":"17896441","content":"eef24e02"},"/IlluminaConnectedAnnotationsDocumentation/utilities/sautils-b75":{"component":"17896441","content":"2973af85"}}'),c={"07bac56e":[()=>n.e(1342).then(n.bind(n,2812)),"@site/docs/data-sources/gme.mdx",2812],"08a089c6":[()=>n.e(3957).then(n.bind(n,1335)),"@site/docs/data-sources/cosmic.mdx",1335],"0bd2af6a":[()=>n.e(5160).then(n.bind(n,8181)),"@site/docs/data-sources/mitomap-small-variants-json.md",8181],"0be5de6c":[()=>n.e(1912).then(n.bind(n,4072)),"@site/docs/data-sources/decipher-json.md",4072],17896441:[()=>Promise.all([n.e(532),n.e(7918)]).then(n.bind(n,2319)),"@theme/DocItem",2319],"18946b76":[()=>n.e(3305).then(n.bind(n,4266)),"@site/docs/data-sources/dbsnp.mdx",4266],"18b93cb3":[()=>n.e(3042).then(n.bind(n,351)),"@site/src/pages/versions.js",351],"191d3c1c":[()=>n.e(4899).then(n.bind(n,9838)),"@site/docs/data-sources/splice-ai-json.md",9838],"1be78505":[()=>Promise.all([n.e(532),n.e(9514)]).then(n.bind(n,3042)),"@theme/DocPage",3042],"2973af85":[()=>n.e(5111).then(n.bind(n,224)),"@site/docs/utilities/sautils.mdx",224],"34e55124":[()=>n.e(7942).then(n.bind(n,737)),"@site/docs/data-sources/primate-ai-json.md",737],"42c73b29":[()=>n.e(2508).then(n.bind(n,591)),"@site/docs/data-sources/revel-json.md",591],"4397ec05":[()=>n.e(5360).then(n.bind(n,7997)),"@site/docs/data-sources/cosmic-gene-fusion-json.md",7997],"440d17b3":[()=>n.e(4648).then(n.bind(n,2590)),"@site/docs/data-sources/1000Genomes-sv-json.md",2590],"463e69e4":[()=>n.e(7278).then(n.bind(n,1027)),"@site/docs/core-functionality/canonical-transcripts.md",1027],"494b7fcc":[()=>n.e(8462).then(n.bind(n,8898)),"@site/docs/data-sources/mitomap-structural-variants-json.md",8898],"51ec9460":[()=>n.e(5697).then(n.bind(n,6891)),"@site/docs/data-sources/topmed.mdx",6891],"539175fb":[()=>n.e(5702).then(n.bind(n,5538)),"@site/docs/data-sources/gerp-json.md",5538],"57cffed1":[()=>n.e(6192).then(n.bind(n,540)),"@site/docs/data-sources/dann-json.md",540],"5b7bb28d":[()=>n.e(8943).then(n.bind(n,1927)),"@site/docs/data-sources/omim.mdx",1927],"5d1e2784":[()=>n.e(1311).then(n.bind(n,6762)),"@site/docs/data-sources/mito-heteroplasmy.md",6762],"5d851e34":[()=>n.e(7795).then(n.bind(n,7763)),"@site/docs/data-sources/mitomap.mdx",7763],"5dd9300a":[()=>n.e(8907).then(n.bind(n,1389)),"@site/docs/data-sources/decipher.mdx",1389],"5e9f5e1a":[()=>Promise.resolve().then(n.bind(n,9782)),"@generated/docusaurus.config",9782],"601929e3":[()=>n.e(1266).then(n.bind(n,8202)),"@site/docs/data-sources/fusioncatcher-json.md",8202],"644aa76c":[()=>n.e(216).then(n.bind(n,8010)),"@site/docs/data-sources/omim-json.md",8010],"666ea911":[()=>n.e(6635).then(n.bind(n,1273)),"@site/docs/data-sources/cosmic-cancer-gene-census.md",1273],"75a3a2eb":[()=>n.e(9767).then(n.bind(n,1062)),"@site/docs/core-functionality/transcript-consequence-impacts.md",1062],"7674fa56":[()=>n.e(975).then(n.t.bind(n,3982,19)),"~blog/default/illumina-connected-annotations-documentation-blog-archive-009.json",3982],"771fd362":[()=>n.e(7850).then(n.bind(n,599)),"@site/docs/data-sources/clingen.mdx",599],"7b3bfa5e":[()=>n.e(3389).then(n.bind(n,1877)),"@site/docs/data-sources/amino-acid-conservation.mdx",1877],"7bc16216":[()=>n.e(3232).then(n.bind(n,212)),"@site/docs/data-sources/clinvar-json.md",212],"82e726f2":[()=>n.e(12).then(n.bind(n,949)),"@site/docs/data-sources/clingen-gene-validity-json.md",949],"833bd66e":[()=>n.e(9082).then(n.bind(n,4859)),"@site/docs/data-sources/gnomad-lof-json.md",4859],"85047af6":[()=>n.e(7860).then(n.bind(n,6335)),"@site/docs/data-sources/gnomad-structural-variants-data_description.md",6335],"8ae16000":[()=>n.e(4105).then(n.bind(n,3827)),"@site/docs/data-sources/gnomad-small-variants-json.md",3827],"915fca76":[()=>n.e(9639).then(n.bind(n,3556)),"@site/docs/data-sources/primate-ai.mdx",3556],"935f2afb":[()=>n.e(53).then(n.t.bind(n,1109,19)),"~docs/default/version-current-metadata-prop-751.json",1109],"9620026c":[()=>n.e(6602).then(n.bind(n,1888)),"@site/docs/data-sources/1000Genomes-snv-json.md",1888],"988d0ae8":[()=>n.e(472).then(n.bind(n,5771)),"@site/docs/data-sources/dann.mdx",5771],"98bbf06c":[()=>n.e(4858).then(n.bind(n,1106)),"@site/docs/data-sources/gnomad.mdx",1106],"9a946f68":[()=>n.e(5198).then(n.bind(n,6959)),"@site/docs/data-sources/cancer-hotspots.mdx",6959],"9e4087bc":[()=>n.e(3608).then(n.bind(n,3012)),"@theme/BlogArchivePage",3012],a26ba82d:[()=>n.e(7706).then(n.bind(n,1702)),"@site/docs/data-sources/phylop.mdx",1702],a2ab8500:[()=>n.e(2865).then(n.bind(n,4133)),"@site/docs/data-sources/phylop-json.md",4133],a5e136a1:[()=>n.e(8111).then(n.bind(n,3814)),"@site/docs/core-functionality/variant-ids.md",3814],a8504dcf:[()=>n.e(1633).then(n.bind(n,9729)),"@site/docs/data-sources/amino-acid-conservation-json.md",9729],a8da062f:[()=>n.e(2630).then(n.bind(n,9156)),"@site/docs/data-sources/dbsnp-json.md",9156],a9ecceb6:[()=>n.e(4203).then(n.bind(n,7234)),"@site/docs/data-sources/1000Genomes.mdx",7234],abda0f14:[()=>n.e(829).then(n.bind(n,7356)),"@site/docs/data-sources/clingen-dosage-json.md",7356],b2e466e8:[()=>n.e(8577).then(n.bind(n,120)),"@site/docs/file-formats/illumina-annotator-json-file-format.mdx",120],b4210c11:[()=>n.e(7870).then(n.bind(n,4674)),"@site/docs/data-sources/clingen-json.md",4674],b51ccab7:[()=>n.e(611).then(n.bind(n,1562)),"@site/docs/data-sources/revel.mdx",1562],b6dcd8b7:[()=>n.e(6458).then(n.bind(n,525)),"@site/docs/data-sources/cosmic-json.md",525],ba2982bf:[()=>n.e(2038).then(n.bind(n,8295)),"@site/docs/data-sources/splice-ai.mdx",8295],cd0802b4:[()=>n.e(1144).then(n.bind(n,3468)),"@site/docs/data-sources/fusioncatcher.mdx",3468],cd35fae7:[()=>n.e(5490).then(n.bind(n,1396)),"@site/docs/data-sources/clinvar.mdx",1396],cd8220b1:[()=>n.e(4246).then(n.bind(n,9819)),"@site/docs/data-sources/topmed-json.md",9819],d0cd84a1:[()=>Promise.all([n.e(532),n.e(8955)]).then(n.bind(n,9172)),"/Users/zli3/Workspace/IlluminaConnectedAnnotationsDocumentation/node_modules/@docusaurus/theme-search-algolia/lib/theme/SearchPage/index.js",9172],e1e7c361:[()=>n.e(1443).then(n.bind(n,2791)),"@site/docs/introduction/parsing-json.md",2791],e286457f:[()=>n.e(4773).then(n.bind(n,19)),"@site/docs/file-formats/custom-annotations.md",19],e39dd739:[()=>n.e(3805).then(n.bind(n,818)),"@site/docs/data-sources/gnomad-structural-variants-json.md",818],e95cadfe:[()=>n.e(5277).then(n.bind(n,1533)),"@site/docs/core-functionality/gene-fusions.md",1533],eef24e02:[()=>n.e(4974).then(n.bind(n,6220)),"@site/docs/utilities/jasix.mdx",6220],ef4059aa:[()=>n.e(3790).then(n.bind(n,3273)),"@site/docs/introduction/introduction.mdx",3273],f048ed9e:[()=>n.e(9962).then(n.bind(n,5675)),"@site/docs/introduction/getting-started.md",5675],f262a5f6:[()=>n.e(6969).then(n.bind(n,1969)),"@site/docs/data-sources/gerp.mdx",1969],f7e8c160:[()=>n.e(700).then(n.bind(n,1043)),"@site/docs/introduction/dependencies.md",1043],f98a4229:[()=>n.e(8633).then(n.bind(n,8036)),"@site/docs/data-sources/gme-json.md",8036]};const d=function(e){const t={};return function e(n,r){Object.keys(n).forEach((o=>{const a=n[o],i=r?`${r}.${o}`:o;var l;"object"==typeof(l=a)&&l&&Object.keys(l).length>0?e(a,i):t[i]=a}))}(e),t};const f=function(e,t){if("*"===e)return l()({loading:s,loader:()=>n.e(4608).then(n.bind(n,4608))});const o=u[`${e}-${t}`],a=[],i=[],f={},p=d(o);return Object.keys(p).forEach((e=>{const t=c[p[e]];t&&(f[e]=t[0],a.push(t[1]),i.push(t[2]))})),l().Map({loading:s,loader:f,modules:a,webpack:()=>i,render:(e,t)=>{const n=JSON.parse(JSON.stringify(o));Object.keys(e).forEach((t=>{let r=n;const o=t.split(".");for(let e=0;e"default"!==e));a&&a.length&&a.forEach((n=>{r[o[o.length-1]][n]=e[t][n]}))}));const a=n.component;return delete n.component,r.createElement(a,{...n,...t})}})},p=[{path:"/IlluminaConnectedAnnotationsDocumentation/blog/archive",component:f("/IlluminaConnectedAnnotationsDocumentation/blog/archive","192"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/search",component:f("/IlluminaConnectedAnnotationsDocumentation/search","f66"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/versions",component:f("/IlluminaConnectedAnnotationsDocumentation/versions","4b9"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/",component:f("/IlluminaConnectedAnnotationsDocumentation/","cf7"),routes:[{path:"/IlluminaConnectedAnnotationsDocumentation/",component:f("/IlluminaConnectedAnnotationsDocumentation/","0a5"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/canonical-transcripts",component:f("/IlluminaConnectedAnnotationsDocumentation/core-functionality/canonical-transcripts","0f3"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/gene-fusions",component:f("/IlluminaConnectedAnnotationsDocumentation/core-functionality/gene-fusions","9a5"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/transcript-consequence-impacts",component:f("/IlluminaConnectedAnnotationsDocumentation/core-functionality/transcript-consequence-impacts","572"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/core-functionality/variant-ids",component:f("/IlluminaConnectedAnnotationsDocumentation/core-functionality/variant-ids","8d0"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes","6a1"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes-snv-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes-snv-json","c39"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes-sv-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes-sv-json","4fd"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/amino-acid-conservation",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/amino-acid-conservation","d35"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/amino-acid-conservation-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/amino-acid-conservation-json","8bf"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cancer-hotspots",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/cancer-hotspots","b95"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen","fa4"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-dosage-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-dosage-json","58e"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-gene-validity-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-gene-validity-json","547"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-json","286"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clinvar",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/clinvar","bed"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/clinvar-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/clinvar-json","d9e"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic","0af"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-cancer-gene-census",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-cancer-gene-census","bb8"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-gene-fusion-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-gene-fusion-json","094"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-json","cbc"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dann",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/dann","a22"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dann-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/dann-json","5bd"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dbsnp",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/dbsnp","bd6"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/dbsnp-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/dbsnp-json","f1d"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/decipher",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/decipher","569"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/decipher-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/decipher-json","f45"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/fusioncatcher",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/fusioncatcher","4e3"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/fusioncatcher-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/fusioncatcher-json","774"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gerp",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/gerp","03f"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gerp-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/gerp-json","c31"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gme",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/gme","95e"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gme-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/gme-json","45b"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad","6b3"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-lof-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-lof-json","249"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-small-variants-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-small-variants-json","3d5"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-structural-variants-data_description",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-structural-variants-data_description","1e7"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-structural-variants-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-structural-variants-json","57f"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mito-heteroplasmy",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/mito-heteroplasmy","068"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap","540"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap-small-variants-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap-small-variants-json","3d1"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap-structural-variants-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap-structural-variants-json","00d"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/omim",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/omim","7c3"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/omim-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/omim-json","83f"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/phylop",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/phylop","3ef"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/phylop-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/phylop-json","98b"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/primate-ai",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/primate-ai","fc4"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/primate-ai-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/primate-ai-json","3e2"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/revel",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/revel","172"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/revel-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/revel-json","997"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/splice-ai",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/splice-ai","7de"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/splice-ai-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/splice-ai-json","1d4"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/topmed",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/topmed","ea7"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/data-sources/topmed-json",component:f("/IlluminaConnectedAnnotationsDocumentation/data-sources/topmed-json","014"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/file-formats/custom-annotations",component:f("/IlluminaConnectedAnnotationsDocumentation/file-formats/custom-annotations","4b4"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/file-formats/illumina-annotator-json-file-format",component:f("/IlluminaConnectedAnnotationsDocumentation/file-formats/illumina-annotator-json-file-format","76b"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/introduction/dependencies",component:f("/IlluminaConnectedAnnotationsDocumentation/introduction/dependencies","1c5"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/introduction/getting-started",component:f("/IlluminaConnectedAnnotationsDocumentation/introduction/getting-started","ab0"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/introduction/parsing-json",component:f("/IlluminaConnectedAnnotationsDocumentation/introduction/parsing-json","67e"),exact:!0},{path:"/IlluminaConnectedAnnotationsDocumentation/utilities/jasix",component:f("/IlluminaConnectedAnnotationsDocumentation/utilities/jasix","b46"),exact:!0,sidebar:"docs"},{path:"/IlluminaConnectedAnnotationsDocumentation/utilities/sautils",component:f("/IlluminaConnectedAnnotationsDocumentation/utilities/sautils","b75"),exact:!0,sidebar:"docs"}]},{path:"*",component:f("*")}];var m=n(412),h=n(6291),g=n(9913),v=n(7041),b=n(6550),y=n(4865),w=n.n(y);const k=[n(2497),n(2448),n(6743),n(2295)];function E(e){for(var t=arguments.length,n=new Array(t>1?t-1:0),r=1;r{var r,o;const a=null!==(o=null===(r=null==t?void 0:t.default)||void 0===r?void 0:r[e])&&void 0!==o?o:t[e];a&&a(...n)}))}const S={onRouteUpdate(){for(var e=arguments.length,t=new Array(e),n=0;n{const{component:t}=e.route;if(t&&t.preload)return t.preload()})))}const A={};const _=function(e){if(A[e.pathname])return{...e,pathname:A[e.pathname]};let t=e.pathname||"/";return t=t.trim().replace(/\/index\.html$/,""),""===t&&(t="/"),A[e.pathname]=t,{...e,pathname:t}};w().configure({showSpinner:!1});class D extends r.Component{constructor(e){super(e),this.previousLocation=null,this.progressBarTimeout=null,this.state={nextRouteHasLoaded:!0}}shouldComponentUpdate(e,t){const n=e.location!==this.props.location,{routes:r,delay:o}=this.props;if(n){const t=_(e.location);return this.startProgressBar(o),this.previousLocation=_(this.props.location),this.setState({nextRouteHasLoaded:!1}),x(r,t.pathname).then((()=>{S.onRouteUpdate({previousLocation:this.previousLocation,location:t}),this.previousLocation=null,this.setState({nextRouteHasLoaded:!0},this.stopProgressBar);const{hash:e}=t;if(e){const t=decodeURIComponent(e.substring(1)),n=document.getElementById(t);n&&n.scrollIntoView()}else window.scrollTo(0,0)})).catch((e=>console.warn(e))),!1}return!!t.nextRouteHasLoaded}clearProgressBarTimeout(){this.progressBarTimeout&&(clearTimeout(this.progressBarTimeout),this.progressBarTimeout=null)}startProgressBar(e){this.clearProgressBarTimeout(),this.progressBarTimeout=setTimeout((()=>{S.onRouteUpdateDelayed({location:_(this.props.location)}),w().start()}),e)}stopProgressBar(){this.clearProgressBarTimeout(),w().done()}render(){const{children:e,location:t}=this.props;return r.createElement(b.AW,{location:_(t),render:()=>e})}}const T=(0,b.EN)(D);var I=n(2859),O=n(2263);const P="docusaurus-base-url-issue-banner-container",L="docusaurus-base-url-issue-banner",R="docusaurus-base-url-issue-banner-suggestion-container",N="__DOCUSAURUS_INSERT_BASEURL_BANNER";function j(e){return`\nwindow['${N}'] = true;\n\ndocument.addEventListener('DOMContentLoaded', maybeInsertBanner);\n\nfunction maybeInsertBanner() {\n var shouldInsert = window['${N}'];\n shouldInsert && insertBanner();\n}\n\nfunction insertBanner() {\n var bannerContainer = document.getElementById('${P}');\n if (!bannerContainer) {\n return;\n }\n var bannerHtml = ${JSON.stringify(function(e){return`\n
\n

Your Docusaurus site did not load properly.

\n

A very common reason is a wrong site baseUrl configuration.

\n

Current configured baseUrl = ${e} ${"/"===e?" (default value)":""}

\n

We suggest trying baseUrl =

\n
\n`}(e)).replace(/{window[N]=!1}),[]),r.createElement(r.Fragment,null,!m.Z.canUseDOM&&r.createElement(I.Z,null,r.createElement("script",null,j(e))),r.createElement("div",{id:P}))}function F(){const{siteConfig:{baseUrl:e,baseUrlIssueBanner:t}}=(0,O.Z)(),{pathname:n}=(0,b.TH)();return t&&n===e?r.createElement(M,null):null}const B=function(e){let{children:t}=e;return t};var U=n(780),z=n(4953);const $=function(){return r.createElement(U.Z,{fallback:z.Z},r.createElement(v.M,null,r.createElement(g.t,null,r.createElement(B,null,r.createElement(F,null),r.createElement(T,{routes:p,delay:1e3},(0,h.Z)(p))))))};const q=function(e){if("undefined"==typeof document)return!1;const t=document.createElement("link");try{if(t.relList&&"function"==typeof t.relList.supports)return t.relList.supports(e)}catch(n){return!1}return!1}("prefetch")?function(e){return new Promise(((t,n)=>{if("undefined"==typeof document)return void n();const r=document.createElement("link");r.setAttribute("rel","prefetch"),r.setAttribute("href",e),r.onload=t,r.onerror=n;(document.getElementsByTagName("head")[0]||document.getElementsByName("script")[0].parentNode).appendChild(r)}))}:function(e){return new Promise(((t,n)=>{const r=new XMLHttpRequest;r.open("GET",e,!0),r.withCredentials=!0,r.onload=()=>{200===r.status?t():n()},r.send(null)}))},G={};const Z=function(e){return new Promise((t=>{G[e]?t():q(e).then((()=>{t(),G[e]=!0})).catch((()=>{}))}))},H={},V={},W=()=>{var e,t;return(null===(e=navigator.connection)||void 0===e?void 0:e.effectiveType.includes("2g"))&&(null===(t=navigator.connection)||void 0===t?void 0:t.saveData)},K={prefetch:e=>{if(!(e=>!W()&&!V[e]&&!H[e])(e))return!1;H[e]=!0;return(0,C.f)(p,e).flatMap((e=>{return t=e.route.path,Object.entries(u).filter((e=>{let[n]=e;return n.replace(/(-[^-]+)$/,"")===t})).flatMap((e=>{let[,t]=e;return Object.values(d(t))}));var t})).forEach((e=>{const t=n.gca(e);t&&!/undefined/.test(t)&&Z(t)})),!0},preload:e=>!!(e=>!W()&&!V[e])(e)&&(V[e]=!0,x(p,e),!0)};if(m.Z.canUseDOM){window.docusaurus=K;const e=o.hydrate;x(p,window.location.pathname).then((()=>{e(r.createElement(a.VK,null,r.createElement($,null)),document.getElementById("__docusaurus"))}))}},780:(e,t,n)=>{"use strict";n.d(t,{Z:()=>l});var r=n(7294),o=n(412),a=n(4953);class i extends r.Component{constructor(e){super(e),this.state={error:null}}componentDidCatch(e){o.Z.canUseDOM&&this.setState({error:e})}render(){var e;const{children:t}=this.props,{error:n}=this.state;if(n){return(null!==(e=this.props.fallback)&&void 0!==e?e:a.Z)({error:n,tryAgain:()=>this.setState({error:null})})}return t}}const l=i},412:(e,t,n)=>{"use strict";n.d(t,{Z:()=>o});const r=!("undefined"==typeof window||!window.document||!window.document.createElement),o={canUseDOM:r,canUseEventListeners:r&&!(!window.addEventListener&&!window.attachEvent),canUseIntersectionObserver:r&&"IntersectionObserver"in window,canUseViewport:r&&!!window.screen}},2859:(e,t,n)=>{"use strict";n.d(t,{Z:()=>me});var r,o,a,i,l=n(7294),s=n(5697),u=n.n(s),c=n(3524),d=n.n(c),f=n(9590),p=n.n(f),m=n(7418),h=n.n(m),g="bodyAttributes",v="htmlAttributes",b="titleAttributes",y={BASE:"base",BODY:"body",HEAD:"head",HTML:"html",LINK:"link",META:"meta",NOSCRIPT:"noscript",SCRIPT:"script",STYLE:"style",TITLE:"title"},w=(Object.keys(y).map((function(e){return y[e]})),"charset"),k="cssText",E="href",S="http-equiv",C="innerHTML",x="itemprop",A="name",_="property",D="rel",T="src",I="target",O={accesskey:"accessKey",charset:"charSet",class:"className",contenteditable:"contentEditable",contextmenu:"contextMenu","http-equiv":"httpEquiv",itemprop:"itemProp",tabindex:"tabIndex"},P="defaultTitle",L="defer",R="encodeSpecialCharacters",N="onChangeClientState",j="titleTemplate",M=Object.keys(O).reduce((function(e,t){return e[O[t]]=t,e}),{}),F=[y.NOSCRIPT,y.SCRIPT,y.STYLE],B="data-react-helmet",U="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(e){return typeof e}:function(e){return e&&"function"==typeof Symbol&&e.constructor===Symbol&&e!==Symbol.prototype?"symbol":typeof e},z=function(){function e(e,t){for(var n=0;n=0||Object.prototype.hasOwnProperty.call(e,r)&&(n[r]=e[r]);return n},G=function(e){return!1===(!(arguments.length>1&&void 0!==arguments[1])||arguments[1])?String(e):String(e).replace(/&/g,"&").replace(//g,">").replace(/"/g,""").replace(/'/g,"'")},Z=function(e){var t=Y(e,y.TITLE),n=Y(e,j);if(n&&t)return n.replace(/%s/g,(function(){return Array.isArray(t)?t.join(""):t}));var r=Y(e,P);return t||r||void 0},H=function(e){return Y(e,N)||function(){}},V=function(e,t){return t.filter((function(t){return void 0!==t[e]})).map((function(t){return t[e]})).reduce((function(e,t){return $({},e,t)}),{})},W=function(e,t){return t.filter((function(e){return void 0!==e[y.BASE]})).map((function(e){return e[y.BASE]})).reverse().reduce((function(t,n){if(!t.length)for(var r=Object.keys(n),o=0;o=0;n--){var r=e[n];if(r.hasOwnProperty(t))return r[t]}return null},Q=(r=Date.now(),function(e){var t=Date.now();t-r>16?(r=t,e(t)):setTimeout((function(){Q(e)}),0)}),X=function(e){return clearTimeout(e)},J="undefined"!=typeof window?window.requestAnimationFrame&&window.requestAnimationFrame.bind(window)||window.webkitRequestAnimationFrame||window.mozRequestAnimationFrame||Q:n.g.requestAnimationFrame||Q,ee="undefined"!=typeof window?window.cancelAnimationFrame||window.webkitCancelAnimationFrame||window.mozCancelAnimationFrame||X:n.g.cancelAnimationFrame||X,te=function(e){return console&&"function"==typeof console.warn&&console.warn(e)},ne=null,re=function(e,t){var n=e.baseTag,r=e.bodyAttributes,o=e.htmlAttributes,a=e.linkTags,i=e.metaTags,l=e.noscriptTags,s=e.onChangeClientState,u=e.scriptTags,c=e.styleTags,d=e.title,f=e.titleAttributes;ie(y.BODY,r),ie(y.HTML,o),ae(d,f);var p={baseTag:le(y.BASE,n),linkTags:le(y.LINK,a),metaTags:le(y.META,i),noscriptTags:le(y.NOSCRIPT,l),scriptTags:le(y.SCRIPT,u),styleTags:le(y.STYLE,c)},m={},h={};Object.keys(p).forEach((function(e){var t=p[e],n=t.newTags,r=t.oldTags;n.length&&(m[e]=n),r.length&&(h[e]=p[e].oldTags)})),t&&t(),s(e,m,h)},oe=function(e){return Array.isArray(e)?e.join(""):e},ae=function(e,t){void 0!==e&&document.title!==e&&(document.title=oe(e)),ie(y.TITLE,t)},ie=function(e,t){var n=document.getElementsByTagName(e)[0];if(n){for(var r=n.getAttribute(B),o=r?r.split(","):[],a=[].concat(o),i=Object.keys(t),l=0;l=0;d--)n.removeAttribute(a[d]);o.length===a.length?n.removeAttribute(B):n.getAttribute(B)!==i.join(",")&&n.setAttribute(B,i.join(","))}},le=function(e,t){var n=document.head||document.querySelector(y.HEAD),r=n.querySelectorAll(e+"["+B+"]"),o=Array.prototype.slice.call(r),a=[],i=void 0;return t&&t.length&&t.forEach((function(t){var n=document.createElement(e);for(var r in t)if(t.hasOwnProperty(r))if(r===C)n.innerHTML=t.innerHTML;else if(r===k)n.styleSheet?n.styleSheet.cssText=t.cssText:n.appendChild(document.createTextNode(t.cssText));else{var l=void 0===t[r]?"":t[r];n.setAttribute(r,l)}n.setAttribute(B,"true"),o.some((function(e,t){return i=t,n.isEqualNode(e)}))?o.splice(i,1):a.push(n)})),o.forEach((function(e){return e.parentNode.removeChild(e)})),a.forEach((function(e){return n.appendChild(e)})),{oldTags:o,newTags:a}},se=function(e){return Object.keys(e).reduce((function(t,n){var r=void 0!==e[n]?n+'="'+e[n]+'"':""+n;return t?t+" "+r:r}),"")},ue=function(e){var t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{};return Object.keys(e).reduce((function(t,n){return t[O[n]||n]=e[n],t}),t)},ce=function(e,t,n){switch(e){case y.TITLE:return{toComponent:function(){return e=t.title,n=t.titleAttributes,(r={key:e})[B]=!0,o=ue(n,r),[l.createElement(y.TITLE,o,e)];var e,n,r,o},toString:function(){return function(e,t,n,r){var o=se(n),a=oe(t);return o?"<"+e+" "+B+'="true" '+o+">"+G(a,r)+"":"<"+e+" "+B+'="true">'+G(a,r)+""}(e,t.title,t.titleAttributes,n)}};case g:case v:return{toComponent:function(){return ue(t)},toString:function(){return se(t)}};default:return{toComponent:function(){return function(e,t){return t.map((function(t,n){var r,o=((r={key:n})[B]=!0,r);return Object.keys(t).forEach((function(e){var n=O[e]||e;if(n===C||n===k){var r=t.innerHTML||t.cssText;o.dangerouslySetInnerHTML={__html:r}}else o[n]=t[e]})),l.createElement(e,o)}))}(e,t)},toString:function(){return function(e,t,n){return t.reduce((function(t,r){var o=Object.keys(r).filter((function(e){return!(e===C||e===k)})).reduce((function(e,t){var o=void 0===r[t]?t:t+'="'+G(r[t],n)+'"';return e?e+" "+o:o}),""),a=r.innerHTML||r.cssText||"",i=-1===F.indexOf(e);return t+"<"+e+" "+B+'="true" '+o+(i?"/>":">"+a+"")}),"")}(e,t,n)}}}},de=function(e){var t=e.baseTag,n=e.bodyAttributes,r=e.encode,o=e.htmlAttributes,a=e.linkTags,i=e.metaTags,l=e.noscriptTags,s=e.scriptTags,u=e.styleTags,c=e.title,d=void 0===c?"":c,f=e.titleAttributes;return{base:ce(y.BASE,t,r),bodyAttributes:ce(g,n,r),htmlAttributes:ce(v,o,r),link:ce(y.LINK,a,r),meta:ce(y.META,i,r),noscript:ce(y.NOSCRIPT,l,r),script:ce(y.SCRIPT,s,r),style:ce(y.STYLE,u,r),title:ce(y.TITLE,{title:d,titleAttributes:f},r)}},fe=d()((function(e){return{baseTag:W([E,I],e),bodyAttributes:V(g,e),defer:Y(e,L),encode:Y(e,R),htmlAttributes:V(v,e),linkTags:K(y.LINK,[D,E],e),metaTags:K(y.META,[A,w,S,_,x],e),noscriptTags:K(y.NOSCRIPT,[C],e),onChangeClientState:H(e),scriptTags:K(y.SCRIPT,[T,C],e),styleTags:K(y.STYLE,[k],e),title:Z(e),titleAttributes:V(b,e)}}),(function(e){ne&&ee(ne),e.defer?ne=J((function(){re(e,(function(){ne=null}))})):(re(e),ne=null)}),de)((function(){return null})),pe=(o=fe,i=a=function(e){function t(){return function(e,t){if(!(e instanceof t))throw new TypeError("Cannot call a class as a function")}(this,t),function(e,t){if(!e)throw new ReferenceError("this hasn't been initialised - super() hasn't been called");return!t||"object"!=typeof t&&"function"!=typeof t?e:t}(this,e.apply(this,arguments))}return function(e,t){if("function"!=typeof t&&null!==t)throw new TypeError("Super expression must either be null or a function, not "+typeof t);e.prototype=Object.create(t&&t.prototype,{constructor:{value:e,enumerable:!1,writable:!0,configurable:!0}}),t&&(Object.setPrototypeOf?Object.setPrototypeOf(e,t):e.__proto__=t)}(t,e),t.prototype.shouldComponentUpdate=function(e){return!p()(this.props,e)},t.prototype.mapNestedChildrenToProps=function(e,t){if(!t)return null;switch(e.type){case y.SCRIPT:case y.NOSCRIPT:return{innerHTML:t};case y.STYLE:return{cssText:t}}throw new Error("<"+e.type+" /> elements are self-closing and can not contain children. Refer to our API for more information.")},t.prototype.flattenArrayTypeChildren=function(e){var t,n=e.child,r=e.arrayTypeChildren,o=e.newChildProps,a=e.nestedChildren;return $({},r,((t={})[n.type]=[].concat(r[n.type]||[],[$({},o,this.mapNestedChildrenToProps(n,a))]),t))},t.prototype.mapObjectTypeChildren=function(e){var t,n,r=e.child,o=e.newProps,a=e.newChildProps,i=e.nestedChildren;switch(r.type){case y.TITLE:return $({},o,((t={})[r.type]=i,t.titleAttributes=$({},a),t));case y.BODY:return $({},o,{bodyAttributes:$({},a)});case y.HTML:return $({},o,{htmlAttributes:$({},a)})}return $({},o,((n={})[r.type]=$({},a),n))},t.prototype.mapArrayTypeChildrenToProps=function(e,t){var n=$({},t);return Object.keys(e).forEach((function(t){var r;n=$({},n,((r={})[t]=e[t],r))})),n},t.prototype.warnOnInvalidChildren=function(e,t){return!0},t.prototype.mapChildrenToProps=function(e,t){var n=this,r={};return l.Children.forEach(e,(function(e){if(e&&e.props){var o=e.props,a=o.children,i=function(e){var t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:{};return Object.keys(e).reduce((function(t,n){return t[M[n]||n]=e[n],t}),t)}(q(o,["children"]));switch(n.warnOnInvalidChildren(e,a),e.type){case y.LINK:case y.META:case y.NOSCRIPT:case y.SCRIPT:case y.STYLE:r=n.flattenArrayTypeChildren({child:e,arrayTypeChildren:r,newChildProps:i,nestedChildren:a});break;default:t=n.mapObjectTypeChildren({child:e,newProps:t,newChildProps:i,nestedChildren:a})}}})),t=this.mapArrayTypeChildrenToProps(r,t)},t.prototype.render=function(){var e=this.props,t=e.children,n=q(e,["children"]),r=$({},n);return t&&(r=this.mapChildrenToProps(t,r)),l.createElement(o,r)},z(t,null,[{key:"canUseDOM",set:function(e){o.canUseDOM=e}}]),t}(l.Component),a.propTypes={base:u().object,bodyAttributes:u().object,children:u().oneOfType([u().arrayOf(u().node),u().node]),defaultTitle:u().string,defer:u().bool,encodeSpecialCharacters:u().bool,htmlAttributes:u().object,link:u().arrayOf(u().object),meta:u().arrayOf(u().object),noscript:u().arrayOf(u().object),onChangeClientState:u().func,script:u().arrayOf(u().object),style:u().arrayOf(u().object),title:u().string,titleAttributes:u().object,titleTemplate:u().string},a.defaultProps={defer:!0,encodeSpecialCharacters:!0},a.peek=o.peek,a.rewind=function(){var e=o.rewind();return e||(e=de({baseTag:[],bodyAttributes:{},encodeSpecialCharacters:!0,htmlAttributes:{},linkTags:[],metaTags:[],noscriptTags:[],scriptTags:[],styleTags:[],title:"",titleAttributes:{}})),e},i);pe.renderStatic=pe.rewind;const me=function(e){return l.createElement(pe,{...e})}},9960:(e,t,n)=>{"use strict";n.d(t,{Z:()=>d});var r=n(7294),o=n(3727),a=n(2263),i=n(3919),l=n(412);const s=(0,r.createContext)({collectLink:()=>{}});var u=n(4996),c=n(8780);const d=function(e){let{isNavLink:t,to:n,href:d,activeClassName:f,isActive:p,"data-noBrokenLinkCheck":m,autoAddBaseUrl:h=!0,...g}=e;var v;const{siteConfig:{trailingSlash:b,baseUrl:y}}=(0,a.Z)(),{withBaseUrl:w}=(0,u.C)(),k=(0,r.useContext)(s),E=n||d,S=(0,i.Z)(E),C=null==E?void 0:E.replace("pathname://","");let x=void 0!==C?(A=C,h&&(e=>e.startsWith("/"))(A)?w(A):A):void 0;var A;x&&S&&(x=(0,c.applyTrailingSlash)(x,{trailingSlash:b,baseUrl:y}));const _=(0,r.useRef)(!1),D=t?o.OL:o.rU,T=l.Z.canUseIntersectionObserver,I=(0,r.useRef)();(0,r.useEffect)((()=>(!T&&S&&null!=x&&window.docusaurus.prefetch(x),()=>{T&&I.current&&I.current.disconnect()})),[I,x,T,S]);const O=null!==(v=null==x?void 0:x.startsWith("#"))&&void 0!==v&&v,P=!x||!S||O;return x&&S&&!O&&!m&&k.collectLink(x),P?r.createElement("a",{href:x,...E&&!S&&{target:"_blank",rel:"noopener noreferrer"},...g}):r.createElement(D,{...g,onMouseEnter:()=>{_.current||null==x||(window.docusaurus.preload(x),_.current=!0)},innerRef:e=>{var t,n;T&&e&&S&&(t=e,n=()=>{null!=x&&window.docusaurus.prefetch(x)},I.current=new window.IntersectionObserver((e=>{e.forEach((e=>{t===e.target&&(e.isIntersecting||e.intersectionRatio>0)&&(I.current.unobserve(t),I.current.disconnect(),n())}))})),I.current.observe(t))},to:x||"",...t&&{isActive:p,activeClassName:f}})}},5999:(e,t,n)=>{"use strict";n.d(t,{Z:()=>c,I:()=>u});var r=n(7294);const o=/{\w+}/g,a="{}";function i(e,t){const n=[],i=e.replace(o,(e=>{const o=e.substring(1,e.length-1),i=null==t?void 0:t[o];if(void 0!==i){const e=r.isValidElement(i)?i:String(i);return n.push(e),a}return e}));return 0===n.length?e:n.every((e=>"string"==typeof e))?i.split(a).reduce(((e,t,r)=>{var o;return e.concat(t).concat(null!==(o=n[r])&&void 0!==o?o:"")}),""):i.split(a).reduce(((e,t,o)=>[...e,r.createElement(r.Fragment,{key:o},t,n[o])]),[])}var l=n(7529);function s(e){let{id:t,message:n}=e;var r,o;if(void 0===t&&void 0===n)throw new Error("Docusaurus translation declarations must have at least a translation id or a default translation message");return null!==(o=null!==(r=l[null!=t?t:n])&&void 0!==r?r:n)&&void 0!==o?o:t}function u(e,t){let{message:n,id:r}=e;return i(s({message:n,id:r}),t)}function c(e){let{children:t,id:n,values:r}=e;if(t&&"string"!=typeof t)throw console.warn("Illegal children",t),new Error("The Docusaurus component only accept simple string values");return i(s({message:t,id:n}),r)}},9913:(e,t,n)=>{"use strict";n.d(t,{_:()=>o,t:()=>a});var r=n(7294);const o=r.createContext(!1);function a(e){let{children:t}=e;const[n,a]=(0,r.useState)(!1);return(0,r.useEffect)((()=>{a(!0)}),[]),r.createElement(o.Provider,{value:n},t)}},9935:(e,t,n)=>{"use strict";n.d(t,{m:()=>r});const r="default"},7041:(e,t,n)=>{"use strict";n.d(t,{_:()=>c,M:()=>d});var r=n(7294),o=n(9782);const a=JSON.parse('{"docusaurus-plugin-content-docs":{"default":{"path":"/IlluminaConnectedAnnotationsDocumentation/","versions":[{"name":"current","label":"3.22 (unreleased)","isLast":true,"path":"/IlluminaConnectedAnnotationsDocumentation/","mainDocId":"introduction/introduction","docs":[{"id":"core-functionality/canonical-transcripts","path":"/IlluminaConnectedAnnotationsDocumentation/core-functionality/canonical-transcripts","sidebar":"docs"},{"id":"core-functionality/gene-fusions","path":"/IlluminaConnectedAnnotationsDocumentation/core-functionality/gene-fusions","sidebar":"docs"},{"id":"core-functionality/transcript-consequence-impacts","path":"/IlluminaConnectedAnnotationsDocumentation/core-functionality/transcript-consequence-impacts","sidebar":"docs"},{"id":"core-functionality/variant-ids","path":"/IlluminaConnectedAnnotationsDocumentation/core-functionality/variant-ids","sidebar":"docs"},{"id":"data-sources/1000Genomes","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes","sidebar":"docs"},{"id":"data-sources/1000Genomes-snv-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes-snv-json"},{"id":"data-sources/1000Genomes-sv-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/1000Genomes-sv-json"},{"id":"data-sources/amino-acid-conservation","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/amino-acid-conservation","sidebar":"docs"},{"id":"data-sources/amino-acid-conservation-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/amino-acid-conservation-json"},{"id":"data-sources/cancer-hotspots","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/cancer-hotspots","sidebar":"docs"},{"id":"data-sources/clingen","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen","sidebar":"docs"},{"id":"data-sources/clingen-dosage-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-dosage-json"},{"id":"data-sources/clingen-gene-validity-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-gene-validity-json"},{"id":"data-sources/clingen-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/clingen-json"},{"id":"data-sources/clinvar","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/clinvar","sidebar":"docs"},{"id":"data-sources/clinvar-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/clinvar-json"},{"id":"data-sources/cosmic","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic","sidebar":"docs"},{"id":"data-sources/cosmic-cancer-gene-census","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-cancer-gene-census"},{"id":"data-sources/cosmic-gene-fusion-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-gene-fusion-json"},{"id":"data-sources/cosmic-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/cosmic-json"},{"id":"data-sources/dann","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/dann","sidebar":"docs"},{"id":"data-sources/dann-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/dann-json"},{"id":"data-sources/dbsnp","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/dbsnp","sidebar":"docs"},{"id":"data-sources/dbsnp-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/dbsnp-json"},{"id":"data-sources/decipher","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/decipher","sidebar":"docs"},{"id":"data-sources/decipher-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/decipher-json"},{"id":"data-sources/fusioncatcher","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/fusioncatcher","sidebar":"docs"},{"id":"data-sources/fusioncatcher-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/fusioncatcher-json"},{"id":"data-sources/gerp","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/gerp","sidebar":"docs"},{"id":"data-sources/gerp-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/gerp-json"},{"id":"data-sources/gme","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/gme","sidebar":"docs"},{"id":"data-sources/gme-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/gme-json"},{"id":"data-sources/gnomad","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad","sidebar":"docs"},{"id":"data-sources/gnomad-lof-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-lof-json"},{"id":"data-sources/gnomad-small-variants-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-small-variants-json"},{"id":"data-sources/gnomad-structural-variants-data_description","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-structural-variants-data_description"},{"id":"data-sources/gnomad-structural-variants-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/gnomad-structural-variants-json"},{"id":"data-sources/mito-heteroplasmy","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/mito-heteroplasmy","sidebar":"docs"},{"id":"data-sources/mitomap","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap","sidebar":"docs"},{"id":"data-sources/mitomap-small-variants-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap-small-variants-json"},{"id":"data-sources/mitomap-structural-variants-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/mitomap-structural-variants-json"},{"id":"data-sources/omim","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/omim","sidebar":"docs"},{"id":"data-sources/omim-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/omim-json"},{"id":"data-sources/phylop","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/phylop","sidebar":"docs"},{"id":"data-sources/phylop-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/phylop-json"},{"id":"data-sources/primate-ai","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/primate-ai","sidebar":"docs"},{"id":"data-sources/primate-ai-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/primate-ai-json"},{"id":"data-sources/revel","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/revel","sidebar":"docs"},{"id":"data-sources/revel-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/revel-json"},{"id":"data-sources/splice-ai","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/splice-ai","sidebar":"docs"},{"id":"data-sources/splice-ai-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/splice-ai-json"},{"id":"data-sources/topmed","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/topmed","sidebar":"docs"},{"id":"data-sources/topmed-json","path":"/IlluminaConnectedAnnotationsDocumentation/data-sources/topmed-json"},{"id":"file-formats/custom-annotations","path":"/IlluminaConnectedAnnotationsDocumentation/file-formats/custom-annotations","sidebar":"docs"},{"id":"file-formats/illumina-annotator-json-file-format","path":"/IlluminaConnectedAnnotationsDocumentation/file-formats/illumina-annotator-json-file-format","sidebar":"docs"},{"id":"introduction/dependencies","path":"/IlluminaConnectedAnnotationsDocumentation/introduction/dependencies","sidebar":"docs"},{"id":"introduction/getting-started","path":"/IlluminaConnectedAnnotationsDocumentation/introduction/getting-started","sidebar":"docs"},{"id":"introduction/introduction","path":"/IlluminaConnectedAnnotationsDocumentation/","sidebar":"docs"},{"id":"introduction/parsing-json","path":"/IlluminaConnectedAnnotationsDocumentation/introduction/parsing-json"},{"id":"utilities/jasix","path":"/IlluminaConnectedAnnotationsDocumentation/utilities/jasix","sidebar":"docs"},{"id":"utilities/sautils","path":"/IlluminaConnectedAnnotationsDocumentation/utilities/sautils","sidebar":"docs"}]}]}}}'),i=JSON.parse('{"defaultLocale":"en","locales":["en"],"currentLocale":"en","localeConfigs":{"en":{"label":"English","direction":"ltr"}}}');var l=n(7529);const s=JSON.parse('{"docusaurusVersion":"2.0.0-beta.13","siteVersion":"0.0.0","pluginVersions":{"docusaurus-plugin-content-docs":{"type":"package","name":"@docusaurus/plugin-content-docs","version":"2.0.0-beta.13"},"docusaurus-plugin-content-blog":{"type":"package","name":"@docusaurus/plugin-content-blog","version":"2.0.0-beta.13"},"docusaurus-plugin-content-pages":{"type":"package","name":"@docusaurus/plugin-content-pages","version":"2.0.0-beta.13"},"docusaurus-plugin-sitemap":{"type":"package","name":"@docusaurus/plugin-sitemap","version":"2.0.0-beta.13"},"docusaurus-theme-classic":{"type":"package","name":"@docusaurus/theme-classic","version":"2.0.0-beta.13"},"docusaurus-theme-search-algolia":{"type":"package","name":"@docusaurus/theme-search-algolia","version":"2.0.0-beta.13"}}}'),u={siteConfig:o.default,siteMetadata:s,globalData:a,i18n:i,codeTranslations:l},c=r.createContext(u);function d(e){let{children:t}=e;return r.createElement(c.Provider,{value:u},t)}},3919:(e,t,n)=>{"use strict";function r(e){return!0===/^(\w*:|\/\/)/.test(e)}function o(e){return void 0!==e&&!r(e)}n.d(t,{Z:()=>o,b:()=>r})},6291:(e,t,n)=>{"use strict";n.d(t,{Z:()=>r});const r=n(8790).H},8143:(e,t,n)=>{"use strict";n.r(t),n.d(t,{BrowserRouter:()=>r.VK,HashRouter:()=>r.UT,Link:()=>r.rU,MemoryRouter:()=>r.VA,NavLink:()=>r.OL,Prompt:()=>r.NL,Redirect:()=>r.l_,Route:()=>r.AW,Router:()=>r.F0,StaticRouter:()=>r.gx,Switch:()=>r.rs,generatePath:()=>r.Gn,matchPath:()=>r.LX,useHistory:()=>r.k6,useLocation:()=>r.TH,useParams:()=>r.UO,useRouteMatch:()=>r.$B,withRouter:()=>r.EN});var r=n(3727)},4996:(e,t,n)=>{"use strict";n.d(t,{C:()=>a,Z:()=>i});var r=n(2263),o=n(3919);function a(){const{siteConfig:{baseUrl:e="/",url:t}={}}=(0,r.Z)();return{withBaseUrl:(n,r)=>function(e,t,n,r){let{forcePrependBaseUrl:a=!1,absolute:i=!1}=void 0===r?{}:r;if(!n)return n;if(n.startsWith("#"))return n;if((0,o.b)(n))return n;if(a)return t+n;const l=n.startsWith(t)?n:t+n.replace(/^\//,"");return i?e+l:l}(t,e,n,r)}}function i(e,t){void 0===t&&(t={});const{withBaseUrl:n}=a();return n(e,t)}},2263:(e,t,n)=>{"use strict";n.d(t,{Z:()=>a});var r=n(7294),o=n(7041);const a=function(){return(0,r.useContext)(o._)}},8084:(e,t,n)=>{"use strict";n.r(t),n.d(t,{default:()=>a,useAllPluginInstancesData:()=>i,usePluginData:()=>l});var r=n(2263),o=n(9935);function a(){const{globalData:e}=(0,r.Z)();if(!e)throw new Error("Docusaurus global data not found.");return e}function i(e){const t=a()[e];if(!t)throw new Error(`Docusaurus plugin global data not found for "${e}" plugin.`);return t}function l(e,t){void 0===t&&(t=o.m);const n=i(e)[t];if(!n)throw new Error(`Docusaurus plugin global data not found for "${e}" plugin with id "${t}".`);return n}},2389:(e,t,n)=>{"use strict";n.d(t,{Z:()=>a});var r=n(7294),o=n(9913);function a(){return(0,r.useContext)(o._)}},4953:(e,t,n)=>{"use strict";n.d(t,{Z:()=>l});var r=n(7294),o=n(8882),a=n(780);function i(e){let{error:t,tryAgain:n}=e;return r.createElement("div",{style:{display:"flex",flexDirection:"column",justifyContent:"center",alignItems:"center",height:"50vh",width:"100%",fontSize:"20px"}},r.createElement("h1",null,"This page crashed."),r.createElement("p",null,t.message),r.createElement("button",{type:"button",onClick:n},"Try again"))}const l=function(e){let{error:t,tryAgain:n}=e;return r.createElement(a.Z,{fallback:()=>r.createElement(i,{error:t,tryAgain:n})},r.createElement(o.Z,{title:"Page Error"},r.createElement(i,{error:t,tryAgain:n})))}},8408:(e,t,n)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.getDocVersionSuggestions=t.getActiveDocContext=t.getActiveVersion=t.getLatestVersion=t.getActivePlugin=void 0;const r=n(8143);t.getActivePlugin=function(e,t,n){void 0===n&&(n={});const o=Object.entries(e).find((e=>{let[n,o]=e;return!!(0,r.matchPath)(t,{path:o.path,exact:!1,strict:!1})})),a=o?{pluginId:o[0],pluginData:o[1]}:void 0;if(!a&&n.failfast)throw new Error(`Can't find active docs plugin for "${t}" pathname, while it was expected to be found. Maybe you tried to use a docs feature that can only be used on a docs-related page? Existing docs plugin paths are: ${Object.values(e).map((e=>e.path)).join(", ")}`);return a};t.getLatestVersion=e=>e.versions.find((e=>e.isLast));t.getActiveVersion=(e,n)=>{const o=(0,t.getLatestVersion)(e);return[...e.versions.filter((e=>e!==o)),o].find((e=>!!(0,r.matchPath)(n,{path:e.path,exact:!1,strict:!1})))};t.getActiveDocContext=(e,n)=>{const o=(0,t.getActiveVersion)(e,n),a=null==o?void 0:o.docs.find((e=>!!(0,r.matchPath)(n,{path:e.path,exact:!0,strict:!1})));return{activeVersion:o,activeDoc:a,alternateDocVersions:a?function(t){const n={};return e.versions.forEach((e=>{e.docs.forEach((r=>{r.id===t&&(n[e.name]=r)}))})),n}(a.id):{}}};t.getDocVersionSuggestions=(e,n)=>{const r=(0,t.getLatestVersion)(e),o=(0,t.getActiveDocContext)(e,n);return{latestDocSuggestion:null==o?void 0:o.alternateDocVersions[r.name],latestVersionSuggestion:r}}},6730:(e,t,n)=>{"use strict";t.Jo=t.Iw=t.zu=t.yW=t.gB=t.WS=t.gA=t.zh=t._r=void 0;const r=n(7582),o=n(8143),a=(0,r.__importStar)(n(8084)),i=n(8408),l={};t._r=()=>{var e;return null!==(e=(0,a.default)()["docusaurus-plugin-content-docs"])&&void 0!==e?e:l};t.zh=e=>(0,a.usePluginData)("docusaurus-plugin-content-docs",e);t.gA=function(e){void 0===e&&(e={});const n=(0,t._r)(),{pathname:r}=(0,o.useLocation)();return(0,i.getActivePlugin)(n,r,e)};t.WS=function(e){void 0===e&&(e={});const n=(0,t.gA)(e),{pathname:r}=(0,o.useLocation)();if(n){return{activePlugin:n,activeVersion:(0,i.getActiveVersion)(n.pluginData,r)}}};t.gB=e=>(0,t.zh)(e).versions;t.yW=e=>{const n=(0,t.zh)(e);return(0,i.getLatestVersion)(n)};t.zu=e=>{const n=(0,t.zh)(e),{pathname:r}=(0,o.useLocation)();return(0,i.getActiveVersion)(n,r)};t.Iw=e=>{const n=(0,t.zh)(e),{pathname:r}=(0,o.useLocation)();return(0,i.getActiveDocContext)(n,r)};t.Jo=e=>{const n=(0,t.zh)(e),{pathname:r}=(0,o.useLocation)();return(0,i.getDocVersionSuggestions)(n,r)}},541:(e,t,n)=>{"use strict";n.d(t,{Z:()=>a});var r=n(7294);const o="iconExternalLink_wgqa";const a=function(e){let{width:t=13.5,height:n=13.5}=e;return r.createElement("svg",{width:t,height:n,"aria-hidden":"true",viewBox:"0 0 24 24",className:o},r.createElement("path",{fill:"currentColor",d:"M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"}))}},8882:(e,t,n)=>{"use strict";n.d(t,{Z:()=>xe});var r=n(7294),o=n(6010),a=n(780),i=n(6550),l=n(5999),s=n(3810);const u="skipToContent_OuoZ";function c(e){e.setAttribute("tabindex","-1"),e.focus(),e.removeAttribute("tabindex")}const d=function(){const e=(0,r.useRef)(null),{action:t}=(0,i.k6)();return(0,s.SL)((n=>{let{location:r}=n;e.current&&!r.hash&&"PUSH"===t&&c(e.current)})),r.createElement("div",{ref:e},r.createElement("a",{href:"#",className:u,onClick:e=>{e.preventDefault();const t=document.querySelector("main:first-of-type")||document.querySelector(".main-wrapper");t&&c(t)}},r.createElement(l.Z,{id:"theme.common.skipToMainContent",description:"The skip to content label used for accessibility, allowing to rapidly navigate to main content with keyboard tab/enter navigation"},"Skip to main content")))};var f=n(7462);function p(e){let{width:t=21,height:n=21,color:o="currentColor",strokeWidth:a=1.2,className:i,...l}=e;return r.createElement("svg",(0,f.Z)({viewBox:"0 0 15 15",width:t,height:n},l),r.createElement("g",{stroke:o,strokeWidth:a},r.createElement("path",{d:"M.75.75l13.5 13.5M14.25.75L.75 14.25"})))}const m="announcementBar_axC9",h="announcementBarPlaceholder_xYHE",g="announcementBarClose_A3A1",v="announcementBarContent_6uhP";const b=function(){const{isActive:e,close:t}=(0,s.nT)(),{announcementBar:n}=(0,s.LU)();if(!e)return null;const{content:a,backgroundColor:i,textColor:u,isCloseable:c}=n;return r.createElement("div",{className:m,style:{backgroundColor:i,color:u},role:"banner"},c&&r.createElement("div",{className:h}),r.createElement("div",{className:v,dangerouslySetInnerHTML:{__html:a}}),c?r.createElement("button",{type:"button",className:(0,o.Z)("clean-btn close",g),onClick:t,"aria-label":(0,l.I)({id:"theme.AnnouncementBar.closeButtonAriaLabel",message:"Close",description:"The ARIA label for close button of announcement bar"})},r.createElement(p,{width:14,height:14,strokeWidth:3.1})):null)};var y=n(9166),w=n(2389);const k="toggle_iYfV",E="toggleScreenReader_h9qa",S="toggleDisabled_xj38",C="toggleTrack_t-f2",x="toggleTrackCheck_mk7D",A="toggleChecked_a04y",_="toggleTrackX_dm8H",D="toggleTrackThumb_W6To",T="toggleFocused_pRSw",I="toggleIcon_pHJ9",O=(0,r.memo)((e=>{let{className:t,switchConfig:n,checked:a,disabled:i,onChange:l}=e;const{darkIcon:s,darkIconStyle:u,lightIcon:c,lightIconStyle:d}=n,[f,p]=(0,r.useState)(a),[m,h]=(0,r.useState)(!1),g=(0,r.useRef)(null);return r.createElement("div",{className:(0,o.Z)(k,t,{[A]:f,[T]:m,[S]:i})},r.createElement("div",{className:C,role:"button",tabIndex:-1,onClick:()=>g.current?.click()},r.createElement("div",{className:x},r.createElement("span",{className:I,style:u},s)),r.createElement("div",{className:_},r.createElement("span",{className:I,style:d},c)),r.createElement("div",{className:D})),r.createElement("input",{ref:g,checked:f,type:"checkbox",className:E,"aria-label":"Switch between dark and light mode",onChange:l,onClick:()=>p(!f),onFocus:()=>h(!0),onBlur:()=>h(!1),onKeyDown:e=>{"Enter"===e.key&&g.current?.click()}}))}));function P(e){const{colorMode:{switchConfig:t}}=(0,s.LU)(),n=(0,w.Z)();return r.createElement(O,(0,f.Z)({switchConfig:t,disabled:!n},e))}var L=n(5350);const R=e=>{const[t,n]=(0,r.useState)(e),o=(0,r.useRef)(!1),a=(0,r.useRef)(0),i=(0,r.useCallback)((e=>{null!==e&&(a.current=e.getBoundingClientRect().height)}),[]);return(0,s.RF)(((t,r)=>{if(!e)return;const i=t.scrollY;if(i=l?n(!1):i+u{if(e)return t.location.hash?(o.current=!0,void n(!1)):void n(!0)})),{navbarRef:i,isNavbarVisible:t}};const N=function(e){void 0===e&&(e=!0),(0,r.useEffect)((()=>(document.body.style.overflow=e?"hidden":"visible",()=>{document.body.style.overflow="visible"})),[e])};var j=n(3783),M=n(907),F=n(2207),B=n(5537);const U=function(e){let{width:t=30,height:n=30,className:o,...a}=e;return r.createElement("svg",(0,f.Z)({className:o,width:t,height:n,viewBox:"0 0 30 30","aria-hidden":"true"},a),r.createElement("path",{stroke:"currentColor",strokeLinecap:"round",strokeMiterlimit:"10",strokeWidth:"2",d:"M4 7h22M4 15h22M4 23h22"}))},z={toggle:"toggle_2i4l",navbarHideable:"navbarHideable_RReh",navbarHidden:"navbarHidden_FBwS",navbarSidebarToggle:"navbarSidebarToggle_AVbO"},$="right";function q(){return(0,s.LU)().navbar.items}function G(){const{colorMode:{disableSwitch:e}}=(0,s.LU)(),{isDarkTheme:t,setLightTheme:n,setDarkTheme:o}=(0,L.Z)();return{isDarkTheme:t,toggle:(0,r.useCallback)((e=>e.target.checked?o():n()),[n,o]),disabled:e}}function Z(e){let{sidebarShown:t,toggleSidebar:n}=e;N(t);const a=q(),i=G(),u=function(e){let{sidebarShown:t,toggleSidebar:n}=e;const o=(0,s.g8)()?.({toggleSidebar:n}),a=(0,s.D9)(o),[i,l]=(0,r.useState)((()=>!1));(0,r.useEffect)((()=>{o&&!a&&l(!0)}),[o,a]);const u=!!o;return(0,r.useEffect)((()=>{u?t||l(!0):l(!1)}),[t,u]),{shown:i,hide:(0,r.useCallback)((()=>{l(!1)}),[]),content:o}}({sidebarShown:t,toggleSidebar:n});return r.createElement("div",{className:"navbar-sidebar"},r.createElement("div",{className:"navbar-sidebar__brand"},r.createElement(B.Z,{className:"navbar__brand",imageClassName:"navbar__logo",titleClassName:"navbar__title"}),!i.disabled&&r.createElement(P,{className:z.navbarSidebarToggle,checked:i.isDarkTheme,onChange:i.toggle}),r.createElement("button",{type:"button",className:"clean-btn navbar-sidebar__close",onClick:n},r.createElement(p,{color:"var(--ifm-color-emphasis-600)",className:z.navbarSidebarCloseSvg}))),r.createElement("div",{className:(0,o.Z)("navbar-sidebar__items",{"navbar-sidebar__items--show-secondary":u.shown})},r.createElement("div",{className:"navbar-sidebar__item menu"},r.createElement("ul",{className:"menu__list"},a.map(((e,t)=>r.createElement(F.Z,(0,f.Z)({mobile:!0},e,{onClick:n,key:t})))))),r.createElement("div",{className:"navbar-sidebar__item menu"},a.length>0&&r.createElement("button",{type:"button",className:"clean-btn navbar-sidebar__back",onClick:u.hide},r.createElement(l.Z,{id:"theme.navbar.mobileSidebarSecondaryMenu.backButtonLabel",description:"The label of the back button to return to main menu, inside the mobile navbar sidebar secondary menu (notably used to display the docs sidebar)"},"\u2190 Back to main menu")),u.content)))}const H=function(){const{navbar:{hideOnScroll:e,style:t}}=(0,s.LU)(),n=function(){const e=(0,j.Z)(),t="mobile"===e,[n,o]=(0,r.useState)(!1);(0,s.Rb)((()=>{if(n)return o(!1),!1}));const a=(0,r.useCallback)((()=>{o((e=>!e))}),[]);return(0,r.useEffect)((()=>{"desktop"===e&&o(!1)}),[e]),{shouldRender:t,toggle:a,shown:n}}(),a=G(),i=(0,M.gA)(),{navbarRef:l,isNavbarVisible:u}=R(e),c=q(),d=c.some((e=>"search"===e.type)),{leftItems:p,rightItems:m}=function(e){return{leftItems:e.filter((e=>"left"===(e.position??$))),rightItems:e.filter((e=>"right"===(e.position??$)))}}(c);return r.createElement("nav",{ref:l,className:(0,o.Z)("navbar","navbar--fixed-top",{"navbar--dark":"dark"===t,"navbar--primary":"primary"===t,"navbar-sidebar--show":n.shown,[z.navbarHideable]:e,[z.navbarHidden]:e&&!u})},r.createElement("div",{className:"navbar__inner"},r.createElement("div",{className:"navbar__items"},(c?.length>0||i)&&r.createElement("button",{"aria-label":"Navigation bar toggle",className:"navbar__toggle clean-btn",type:"button",tabIndex:0,onClick:n.toggle,onKeyDown:n.toggle},r.createElement(U,null)),r.createElement(B.Z,{className:"navbar__brand",imageClassName:"navbar__logo",titleClassName:"navbar__title"}),p.map(((e,t)=>r.createElement(F.Z,(0,f.Z)({},e,{key:t}))))),r.createElement("div",{className:"navbar__items navbar__items--right"},m.map(((e,t)=>r.createElement(F.Z,(0,f.Z)({},e,{key:t})))),!a.disabled&&r.createElement(P,{className:z.toggle,checked:a.isDarkTheme,onChange:a.toggle}),!d&&r.createElement(y.Z,null))),r.createElement("div",{role:"presentation",className:"navbar-sidebar__backdrop",onClick:n.toggle}),n.shouldRender&&r.createElement(Z,{sidebarShown:n.shown,toggleSidebar:n.toggle}))};var V=n(9960),W=n(4996),K=n(3919);const Y="footerLogoLink_SRtH";var Q=n(9750),X=n(541);function J(e){let{to:t,href:n,label:o,prependBaseUrlToHref:a,...i}=e;const l=(0,W.Z)(t),s=(0,W.Z)(n,{forcePrependBaseUrl:!0});return r.createElement(V.Z,(0,f.Z)({className:"footer__link-item"},n?{href:a?s:n}:{to:l},i),n&&!(0,K.Z)(n)?r.createElement("span",null,o,r.createElement(X.Z,null)):o)}function ee(e){let{sources:t,alt:n,width:o,height:a}=e;return r.createElement(Q.Z,{className:"footer__logo",alt:n,sources:t,width:o,height:a})}const te=function(){const{footer:e}=(0,s.LU)(),{copyright:t,links:n=[],logo:a={}}=e||{},i={light:(0,W.Z)(a.src),dark:(0,W.Z)(a.srcDark||a.src)};return e?r.createElement("footer",{className:(0,o.Z)("footer",{"footer--dark":"dark"===e.style})},r.createElement("div",{className:"container"},n&&n.length>0&&r.createElement("div",{className:"row footer__links"},n.map(((e,t)=>r.createElement("div",{key:t,className:"col footer__col"},null!=e.title?r.createElement("div",{className:"footer__title"},e.title):null,null!=e.items&&Array.isArray(e.items)&&e.items.length>0?r.createElement("ul",{className:"footer__items"},e.items.map(((e,t)=>e.html?r.createElement("li",{key:t,className:"footer__item",dangerouslySetInnerHTML:{__html:e.html}}):r.createElement("li",{key:e.href||e.to,className:"footer__item"},r.createElement(J,e))))):null)))),(a||t)&&r.createElement("div",{className:"footer__bottom text--center"},a&&(a.src||a.srcDark)&&r.createElement("div",{className:"margin-bottom--sm"},a.href?r.createElement(V.Z,{href:a.href,className:Y},r.createElement(ee,{alt:a.alt,sources:i,width:a.width,height:a.height})):r.createElement(ee,{alt:a.alt,sources:i})),t?r.createElement("div",{className:"footer__copyright",dangerouslySetInnerHTML:{__html:t}}):null))):null};var ne=n(412);const re=(0,s.WA)("theme"),oe="light",ae="dark",ie=e=>e===ae?ae:oe,le=e=>{(0,s.WA)("theme").set(ie(e))},se=()=>{const{colorMode:{defaultMode:e,disableSwitch:t,respectPrefersColorScheme:n}}=(0,s.LU)(),[o,a]=(0,r.useState)((e=>ne.Z.canUseDOM?ie(document.documentElement.getAttribute("data-theme")):ie(e))(e)),i=(0,r.useCallback)((()=>{a(oe),le(oe)}),[]),l=(0,r.useCallback)((()=>{a(ae),le(ae)}),[]);return(0,r.useEffect)((()=>{document.documentElement.setAttribute("data-theme",ie(o))}),[o]),(0,r.useEffect)((()=>{if(!t)try{const e=re.get();null!==e&&a(ie(e))}catch(e){console.error(e)}}),[t,a]),(0,r.useEffect)((()=>{t&&!n||window.matchMedia("(prefers-color-scheme: dark)").addListener((e=>{let{matches:t}=e;a(t?ae:oe)}))}),[t,n]),{isDarkTheme:o===ae,setLightTheme:i,setDarkTheme:l}};var ue=n(2924);const ce=function(e){const{isDarkTheme:t,setLightTheme:n,setDarkTheme:o}=se(),a=(0,r.useMemo)((()=>({isDarkTheme:t,setLightTheme:n,setDarkTheme:o})),[t,n,o]);return r.createElement(ue.Z.Provider,{value:a},e.children)},de="docusaurus.tab.",fe=()=>{const[e,t]=(0,r.useState)({}),n=(0,r.useCallback)(((e,t)=>{(0,s.WA)(`${de}${e}`).set(t)}),[]);return(0,r.useEffect)((()=>{try{const e={};(0,s._f)().forEach((t=>{if(t.startsWith(de)){const n=t.substring(15);e[n]=(0,s.WA)(t).get()}})),t(e)}catch(e){console.error(e)}}),[]),{tabGroupChoices:e,setTabGroupChoices:(e,r)=>{t((t=>({...t,[e]:r}))),n(e,r)}}},pe=(0,r.createContext)(void 0);const me=function(e){const{tabGroupChoices:t,setTabGroupChoices:n}=fe(),o=(0,r.useMemo)((()=>({tabGroupChoices:t,setTabGroupChoices:n})),[t,n]);return r.createElement(pe.Provider,{value:o},e.children)};function he(e){let{children:t}=e;return r.createElement(ce,null,r.createElement(s.pl,null,r.createElement(me,null,r.createElement(s.OC,null,r.createElement(s.L5,null,r.createElement(s.Cn,null,t))))))}var ge=n(2859),ve=n(2263);const be=function(e){let{locale:t,version:n,tag:o}=e;const a=t;return r.createElement(ge.Z,null,a&&r.createElement("meta",{name:"docsearch:language",content:a}),n&&r.createElement("meta",{name:"docsearch:version",content:n}),o&&r.createElement("meta",{name:"docsearch:docusaurus_tag",content:o}))};var ye=n(1217);function we(){const{i18n:{defaultLocale:e,locales:t}}=(0,ve.Z)(),n=(0,s.l5)();return r.createElement(ge.Z,null,t.map((e=>r.createElement("link",{key:e,rel:"alternate",href:n.createUrl({locale:e,fullyQualified:!0}),hrefLang:e}))),r.createElement("link",{rel:"alternate",href:n.createUrl({locale:e,fullyQualified:!0}),hrefLang:"x-default"}))}function ke(e){let{permalink:t}=e;const{siteConfig:{url:n}}=(0,ve.Z)(),o=function(){const{siteConfig:{url:e}}=(0,ve.Z)(),{pathname:t}=(0,i.TH)();return e+(0,W.Z)(t)}(),a=t?`${n}${t}`:o;return r.createElement(ge.Z,null,r.createElement("meta",{property:"og:url",content:a}),r.createElement("link",{rel:"canonical",href:a}))}function Ee(e){const{siteConfig:{favicon:t},i18n:{currentLocale:n,localeConfigs:o}}=(0,ve.Z)(),{metadata:a,image:i}=(0,s.LU)(),{title:l,description:u,image:c,keywords:d,searchMetadata:p}=e,m=(0,W.Z)(t),h=(0,s.pe)(l),g=n,v=o[n].direction;return r.createElement(r.Fragment,null,r.createElement(ge.Z,null,r.createElement("html",{lang:g,dir:v}),t&&r.createElement("link",{rel:"icon",href:m}),r.createElement("title",null,h),r.createElement("meta",{property:"og:title",content:h}),r.createElement("meta",{name:"twitter:card",content:"summary_large_image"})),i&&r.createElement(ye.Z,{image:i}),c&&r.createElement(ye.Z,{image:c}),r.createElement(ye.Z,{description:u,keywords:d}),r.createElement(ke,null),r.createElement(we,null),r.createElement(be,(0,f.Z)({tag:s.HX,locale:n},p)),r.createElement(ge.Z,null,a.map(((e,t)=>r.createElement("meta",(0,f.Z)({key:`metadata_${t}`},e))))))}const Se=function(){(0,r.useEffect)((()=>{const e="navigation-with-keyboard";function t(t){"keydown"===t.type&&"Tab"===t.key&&document.body.classList.add(e),"mousedown"===t.type&&document.body.classList.remove(e)}return document.addEventListener("keydown",t),document.addEventListener("mousedown",t),()=>{document.body.classList.remove(e),document.removeEventListener("keydown",t),document.removeEventListener("mousedown",t)}}),[])};function Ce(e){let{error:t,tryAgain:n}=e;return r.createElement("main",{className:"container margin-vert--xl"},r.createElement("div",{className:"row"},r.createElement("div",{className:"col col--6 col--offset-3"},r.createElement("h1",{className:"hero__title"},r.createElement(l.Z,{id:"theme.ErrorPageContent.title",description:"The title of the fallback page when the page crashed"},"This page crashed.")),r.createElement("p",null,t.message),r.createElement("div",null,r.createElement("button",{type:"button",onClick:n},r.createElement(l.Z,{id:"theme.ErrorPageContent.tryAgain",description:"The label of the button to try again when the page crashed"},"Try again"))))))}const xe=function(e){const{children:t,noFooter:n,wrapperClassName:i,pageClassName:l}=e;return Se(),r.createElement(he,null,r.createElement(Ee,e),r.createElement(d,null),r.createElement(b,null),r.createElement(H,null),r.createElement("div",{className:(0,o.Z)(s.kM.wrapper.main,i,l)},r.createElement(a.Z,{fallback:Ce},t)),!n&&r.createElement(te,null))}},5537:(e,t,n)=>{"use strict";n.d(t,{Z:()=>c});var r=n(7462),o=n(7294),a=n(9960),i=n(9750),l=n(4996),s=n(2263),u=n(3810);const c=function(e){const{siteConfig:{title:t}}=(0,s.Z)(),{navbar:{title:n,logo:c={src:""}}}=(0,u.LU)(),{imageClassName:d,titleClassName:f,...p}=e,m=(0,l.Z)(c.href||"/"),h={light:(0,l.Z)(c.src),dark:(0,l.Z)(c.srcDark||c.src)},g=o.createElement(i.Z,{sources:h,height:c.height,width:c.width,alt:c.alt||n||t});return o.createElement(a.Z,(0,r.Z)({to:m},p,c.target&&{target:c.target}),c.src&&(d?o.createElement("div",{className:d},g):g),null!=n&&o.createElement("b",{className:f},n))}},5525:(e,t,n)=>{"use strict";n.d(t,{O:()=>p,Z:()=>g});var r=n(7462),o=n(7294),a=n(6010),i=n(9960),l=n(4996),s=n(541),u=n(3919),c=n(3810),d=n(2207);const f="dropdown__link--active";function p(e){let{activeBasePath:t,activeBaseRegex:n,to:a,href:d,label:p,activeClassName:m="",prependBaseUrlToHref:h,...g}=e;const v=(0,l.Z)(a),b=(0,l.Z)(t),y=(0,l.Z)(d,{forcePrependBaseUrl:!0}),w=p&&d&&!(0,u.Z)(d),k=m===f;return o.createElement(i.Z,(0,r.Z)({},d?{href:h?y:d}:{isNavLink:!0,activeClassName:g.className?.includes(m)?"":m,to:v,...t||n?{isActive:(e,t)=>n?(0,c.Fx)(n,t.pathname):t.pathname.startsWith(b)}:null},g),w?o.createElement("span",null,p,o.createElement(s.Z,k&&{width:12,height:12})):p)}function m(e){let{className:t,isDropdownItem:n=!1,...i}=e;const l=o.createElement(p,(0,r.Z)({className:(0,a.Z)(n?"dropdown__link":"navbar__item navbar__link",t)},i));return n?o.createElement("li",null,l):l}function h(e){let{className:t,isDropdownItem:n,...i}=e;return o.createElement("li",{className:"menu__list-item"},o.createElement(p,(0,r.Z)({className:(0,a.Z)("menu__link",t)},i)))}const g=function(e){let{mobile:t=!1,position:n,...a}=e;const i=t?h:m;return o.createElement(i,(0,r.Z)({},a,{activeClassName:a.activeClassName??(0,d.E)(t)}))}},6400:(e,t,n)=>{"use strict";n.d(t,{Z:()=>c});var r=n(7462),o=n(7294),a=n(5525),i=n(907),l=n(6010),s=n(2207),u=n(3810);function c(e){let{docId:t,label:n,docsPluginId:c,...d}=e;const{activeVersion:f,activeDoc:p}=(0,i.Iw)(c),{preferredVersion:m}=(0,u.J)(c),h=(0,i.yW)(c),g=function(e,t){const n=e.flatMap((e=>e.docs)),r=n.find((e=>e.id===t));if(!r){const r=n.map((e=>e.id)).join("\n- ");throw new Error(`DocNavbarItem: couldn't find any doc with id "${t}" in version${e.length?"s":""} ${e.map((e=>e.name)).join(", ")}".\nAvailable doc ids are:\n- ${r}`)}return r}((0,u.jj)([f,m,h].filter(Boolean)),t),v=(0,s.E)(d.mobile);return o.createElement(a.Z,(0,r.Z)({exact:!0},d,{className:(0,l.Z)(d.className,{[v]:p?.sidebar&&p.sidebar===g.sidebar}),activeClassName:v,label:n??g.id,to:g.path}))}},9308:(e,t,n)=>{"use strict";n.d(t,{Z:()=>d});var r=n(7462),o=n(7294),a=n(5525),i=n(3154),l=n(907),s=n(3810),u=n(5999);const c=e=>e.docs.find((t=>t.id===e.mainDocId));function d(e){let{mobile:t,docsPluginId:n,dropdownActiveClassDisabled:d,dropdownItemsBefore:f,dropdownItemsAfter:p,...m}=e;const h=(0,l.Iw)(n),g=(0,l.gB)(n),v=(0,l.yW)(n),{preferredVersion:b,savePreferredVersionName:y}=(0,s.J)(n);const w=function(){const e=g.map((e=>{const t=h?.alternateDocVersions[e.name]||c(e);return{isNavLink:!0,label:e.label,to:t.path,isActive:()=>e===h?.activeVersion,onClick:()=>{y(e.name)}}}));return[...f,...e,...p]}(),k=h.activeVersion??b??v,E=t&&w?(0,u.I)({id:"theme.navbar.mobileVersionsDropdown.label",message:"Versions",description:"The label for the navbar versions dropdown on mobile view"}):k.label,S=t&&w?void 0:c(k).path;return w.length<=1?o.createElement(a.Z,(0,r.Z)({},m,{mobile:t,label:E,to:S,isActive:d?()=>!1:void 0})):o.createElement(i.Z,(0,r.Z)({},m,{mobile:t,label:E,to:S,items:w,isActive:d?()=>!1:void 0}))}},7250:(e,t,n)=>{"use strict";n.d(t,{Z:()=>u});var r=n(7462),o=n(7294),a=n(5525),i=n(907),l=n(3810);const s=e=>e.docs.find((t=>t.id===e.mainDocId));function u(e){let{label:t,to:n,docsPluginId:u,...c}=e;const d=(0,i.zu)(u),{preferredVersion:f}=(0,l.J)(u),p=(0,i.yW)(u),m=d??f??p,h=t??m.label,g=n??s(m).path;return o.createElement(a.Z,(0,r.Z)({},c,{label:h,to:g}))}},3154:(e,t,n)=>{"use strict";n.d(t,{Z:()=>p});var r=n(7462),o=n(7294),a=n(6010),i=n(3810),l=n(5525),s=n(2207);const u="dropdown__link--active";function c(e,t){return e.some((e=>function(e,t){return!!(0,i.Mg)(e.to,t)||!!(0,i.Fx)(e.activeBaseRegex,t)||!(!e.activeBasePath||!t.startsWith(e.activeBasePath))}(e,t)))}function d(e){let{items:t,position:n,className:i,...c}=e;const d=(0,o.useRef)(null),f=(0,o.useRef)(null),[p,m]=(0,o.useState)(!1);return(0,o.useEffect)((()=>{const e=e=>{d.current&&!d.current.contains(e.target)&&m(!1)};return document.addEventListener("mousedown",e),document.addEventListener("touchstart",e),()=>{document.removeEventListener("mousedown",e),document.removeEventListener("touchstart",e)}}),[d]),o.createElement("div",{ref:d,className:(0,a.Z)("navbar__item","dropdown","dropdown--hoverable",{"dropdown--right":"right"===n,"dropdown--show":p})},o.createElement(l.O,(0,r.Z)({href:c.to?void 0:"#",className:(0,a.Z)("navbar__link",i)},c,{onClick:c.to?void 0:e=>e.preventDefault(),onKeyDown:e=>{"Enter"===e.key&&(e.preventDefault(),m(!p))}}),c.children??c.label),o.createElement("ul",{ref:f,className:"dropdown__menu"},t.map(((e,n)=>o.createElement(s.Z,(0,r.Z)({isDropdownItem:!0,onKeyDown:e=>{if(n===t.length-1&&"Tab"===e.key){e.preventDefault(),m(!1);const t=d.current.nextElementSibling;t&&t.focus()}},activeClassName:u},e,{key:n}))))))}function f(e){let{items:t,className:n,position:u,...d}=e;const f=(0,i.be)(),p=c(t,f),{collapsed:m,toggleCollapsed:h,setCollapsed:g}=(0,i.uR)({initialState:()=>!p});return(0,o.useEffect)((()=>{p&&g(!p)}),[f,p,g]),o.createElement("li",{className:(0,a.Z)("menu__list-item",{"menu__list-item--collapsed":m})},o.createElement(l.O,(0,r.Z)({role:"button",className:(0,a.Z)("menu__link menu__link--sublist",n)},d,{onClick:e=>{e.preventDefault(),h()}}),d.children??d.label),o.createElement(i.zF,{lazy:!0,as:"ul",className:"menu__list",collapsed:m},t.map(((e,t)=>o.createElement(s.Z,(0,r.Z)({mobile:!0,isDropdownItem:!0,onClick:d.onClick,activeClassName:"menu__link--active"},e,{key:t}))))))}const p=function(e){let{mobile:t=!1,...n}=e;const r=t?f:d;return o.createElement(r,n)}},2207:(e,t,n)=>{"use strict";n.d(t,{Z:()=>v,E:()=>g});var r=n(7294),o=n(5525),a=n(3154),i=n(7462);const l=function(e){let{width:t=20,height:n=20,...o}=e;return r.createElement("svg",(0,i.Z)({viewBox:"0 0 20 20",width:t,height:n,"aria-hidden":"true"},o),r.createElement("path",{fill:"currentColor",d:"M19.753 10.909c-.624-1.707-2.366-2.726-4.661-2.726-.09 0-.176.002-.262.006l-.016-2.063 3.525-.607c.115-.019.133-.119.109-.231-.023-.111-.167-.883-.188-.976-.027-.131-.102-.127-.207-.109-.104.018-3.25.461-3.25.461l-.013-2.078c-.001-.125-.069-.158-.194-.156l-1.025.016c-.105.002-.164.049-.162.148l.033 2.307s-3.061.527-3.144.543c-.084.014-.17.053-.151.143.019.09.19 1.094.208 1.172.018.08.072.129.188.107l2.924-.504.035 2.018c-1.077.281-1.801.824-2.256 1.303-.768.807-1.207 1.887-1.207 2.963 0 1.586.971 2.529 2.328 2.695 3.162.387 5.119-3.06 5.769-4.715 1.097 1.506.256 4.354-2.094 5.98-.043.029-.098.129-.033.207l.619.756c.08.096.206.059.256.023 2.51-1.73 3.661-4.515 2.869-6.683zm-7.386 3.188c-.966-.121-.944-.914-.944-1.453 0-.773.327-1.58.876-2.156a3.21 3.21 0 011.229-.799l.082 4.277a2.773 2.773 0 01-1.243.131zm2.427-.553l.046-4.109c.084-.004.166-.01.252-.01.773 0 1.494.145 1.885.361.391.217-1.023 2.713-2.183 3.758zm-8.95-7.668a.196.196 0 00-.196-.145h-1.95a.194.194 0 00-.194.144L.008 16.916c-.017.051-.011.076.062.076h1.733c.075 0 .099-.023.114-.072l1.008-3.318h3.496l1.008 3.318c.016.049.039.072.113.072h1.734c.072 0 .078-.025.062-.076-.014-.05-3.083-9.741-3.494-11.04zm-2.618 6.318l1.447-5.25 1.447 5.25H3.226z"}))};var s=n(2263),u=n(3810);const c="iconLanguage_EbrZ";function d(e){let{mobile:t,dropdownItemsBefore:n,dropdownItemsAfter:o,...d}=e;const{i18n:{currentLocale:f,locales:p,localeConfigs:m}}=(0,s.Z)(),h=(0,u.l5)();function g(e){return m[e].label}const v=[...n,...p.map((e=>{const t=`pathname://${h.createUrl({locale:e,fullyQualified:!1})}`;return{isNavLink:!0,label:g(e),to:t,target:"_self",autoAddBaseUrl:!1,className:e===f?"dropdown__link--active":""}})),...o],b=t?"Languages":g(f);return r.createElement(a.Z,(0,i.Z)({},d,{mobile:t,label:r.createElement("span",null,r.createElement(l,{className:c}),r.createElement("span",null,b)),items:v}))}var f=n(9166);function p(e){let{mobile:t}=e;return t?null:r.createElement(f.Z,null)}const m={default:()=>o.Z,localeDropdown:()=>d,search:()=>p,dropdown:()=>a.Z,docsVersion:()=>n(7250).Z,docsVersionDropdown:()=>n(9308).Z,doc:()=>n(6400).Z},h=e=>{const t=m[e];if(!t)throw new Error(`No NavbarItem component found for type "${e}".`);return t()};const g=e=>e?"menu__link--active":"navbar__link--active";function v(e){let{type:t,...n}=e;const o=function(e,t){return e&&"default"!==e?e:t?"dropdown":"default"}(t,void 0!==n.items),a=h(o);return r.createElement(a,n)}},1217:(e,t,n)=>{"use strict";n.d(t,{Z:()=>l});var r=n(7294),o=n(2859),a=n(3810),i=n(4996);function l(e){let{title:t,description:n,keywords:l,image:s,children:u}=e;const c=(0,a.pe)(t),{withBaseUrl:d}=(0,i.C)(),f=s?d(s,{absolute:!0}):void 0;return r.createElement(o.Z,null,t&&r.createElement("title",null,c),t&&r.createElement("meta",{property:"og:title",content:c}),n&&r.createElement("meta",{name:"description",content:n}),n&&r.createElement("meta",{property:"og:description",content:n}),l&&r.createElement("meta",{name:"keywords",content:Array.isArray(l)?l.join(","):l}),f&&r.createElement("meta",{property:"og:image",content:f}),f&&r.createElement("meta",{name:"twitter:image",content:f}),u)}},2924:(e,t,n)=>{"use strict";n.d(t,{Z:()=>r});const r=n(7294).createContext(void 0)},9750:(e,t,n)=>{"use strict";n.d(t,{Z:()=>u});var r=n(7462),o=n(7294),a=n(6010),i=n(2389),l=n(5350);const s={themedImage:"themedImage_TMUO","themedImage--light":"themedImage--light_4Vu1","themedImage--dark":"themedImage--dark_uzRr"};const u=function(e){const t=(0,i.Z)(),{isDarkTheme:n}=(0,l.Z)(),{sources:u,className:c,alt:d="",...f}=e,p=t?n?["dark"]:["light"]:["light","dark"];return o.createElement(o.Fragment,null,p.map((e=>o.createElement("img",(0,r.Z)({key:e,src:u[e],alt:d,className:(0,a.Z)(s.themedImage,s[`themedImage--${e}`],c)},f)))))}},907:(e,t,n)=>{"use strict";n.d(t,{Iw:()=>r.Iw,Jo:()=>r.Jo,WS:()=>r.WS,_r:()=>r._r,gA:()=>r.gA,gB:()=>r.gB,yW:()=>r.yW,zh:()=>r.zh,zu:()=>r.zu});var r=n(6730)},5350:(e,t,n)=>{"use strict";n.d(t,{Z:()=>a});var r=n(7294),o=n(2924);const a=function(){const e=(0,r.useContext)(o.Z);if(null==e)throw new Error('"useThemeContext" is used outside of "Layout" component. Please see https://docusaurus.io/docs/api/themes/configuration#usethemecontext.');return e}},3783:(e,t,n)=>{"use strict";n.d(t,{Z:()=>s});var r=n(7294),o=n(412);const a={desktop:"desktop",mobile:"mobile",ssr:"ssr"},i=996;function l(){return o.Z.canUseDOM?window.innerWidth>i?a.desktop:a.mobile:a.ssr}const s=function(){const[e,t]=(0,r.useState)((()=>l()));return(0,r.useEffect)((()=>{function e(){t(l())}return window.addEventListener("resize",e),()=>{window.removeEventListener("resize",e),clearTimeout(undefined)}}),[]),e}},467:(e,t,n)=>{"use strict";n.r(t),n.d(t,{default:()=>a});var r=n(412),o=n(9782);const a=e=>{if(r.Z.canUseDOM){const{themeConfig:{prism:t={}}}=o.default,{additionalLanguages:r=[]}=t;window.Prism=e,r.forEach((e=>{n(6726)(`./prism-${e}`)})),delete window.Prism}}},2448:(e,t,n)=>{"use strict";var r=o(n(7410));function o(e){return e&&e.__esModule?e:{default:e}}(0,o(n(467)).default)(r.default)},3810:(e,t,n)=>{"use strict";n.d(t,{pl:()=>Ue,zF:()=>be,HX:()=>M,PO:()=>Ae,L5:()=>O,bT:()=>E,qu:()=>y,Cv:()=>Oe,Cn:()=>De,OC:()=>Xe,kM:()=>Le,WA:()=>u,os:()=>F,Wl:()=>C,_F:()=>x,Fx:()=>nt,Mg:()=>h,_f:()=>c,bc:()=>K,Vo:()=>Y,nZ:()=>Q,jj:()=>Pe,l5:()=>f,nT:()=>ze,uR:()=>ce,_q:()=>B,J:()=>N,Vq:()=>S,E6:()=>w,ed:()=>ae,Rb:()=>qe,be:()=>$e,SL:()=>le,g8:()=>Ie,c2:()=>re,D9:()=>ie,RF:()=>tt,DA:()=>Ke,Si:()=>Ve,LU:()=>o,pe:()=>X});var r=n(2263);function o(){return(0,r.Z)().siteConfig.themeConfig}const a="localStorage";function i(e){if(void 0===e&&(e=a),"undefined"==typeof window)throw new Error("Browser storage is not available on Node.js/Docusaurus SSR process.");if("none"===e)return null;try{return window[e]}catch(n){return t=n,l||(console.warn("Docusaurus browser storage is not available.\nPossible reasons: running Docusaurus in an iframe, in an incognito browser session, or using too strict browser privacy settings.",t),l=!0),null}var t}let l=!1;const s={get:()=>null,set:()=>{},del:()=>{}};const u=(e,t)=>{if("undefined"==typeof window)return function(e){function t(){throw new Error(`Illegal storage API usage for storage key "${e}".\nDocusaurus storage APIs are not supposed to be called on the server-rendering process.\nPlease only call storage APIs in effects and event handlers.`)}return{get:t,set:t,del:t}}(e);const n=i(null==t?void 0:t.persistence);return null===n?s:{get:()=>{try{return n.getItem(e)}catch(t){return console.error(`Docusaurus storage error, can't get key=${e}`,t),null}},set:t=>{try{n.setItem(e,t)}catch(r){console.error(`Docusaurus storage error, can't set ${e}=${t}`,r)}},del:()=>{try{n.removeItem(e)}catch(t){console.error(`Docusaurus storage error, can't delete key=${e}`,t)}}}};function c(e){void 0===e&&(e=a);const t=i(e);if(!t)return[];const n=[];for(let r=0;r{const n=e=>!e||(null==e?void 0:e.endsWith("/"))?e:`${e}/`;return n(e)===n(t)},g=!!p._r,v=Symbol("EmptyContext"),b=(0,m.createContext)(v);function y(e){let{children:t,version:n}=e;return m.createElement(b.Provider,{value:n},t)}function w(){const e=(0,m.useContext)(b);if(e===v)throw new Error("This hook requires usage of ");return e}const k=(0,m.createContext)(v);function E(e){let{children:t,sidebar:n}=e;return m.createElement(k.Provider,{value:n},t)}function S(){const e=(0,m.useContext)(k);if(e===v)throw new Error("This hook requires usage of ");return e}function C(e){if(e.href)return e.href;for(const t of e.items){if("link"===t.type)return t.href;if("category"!==t.type)throw new Error(`Unexpected category item type for ${JSON.stringify(t)}`);{const e=C(t);if(e)return e}}}function x(e,t){const n=e=>void 0!==e&&h(e,t);return"link"===e.type?n(e.href):"category"===e.type&&(n(e.href)||function(e,t){return e.some((e=>x(e,t)))}(e.items,t))}const A=e=>`docs-preferred-version-${e}`,_={save:(e,t,n)=>{u(A(e),{persistence:t}).set(n)},read:(e,t)=>u(A(e),{persistence:t}).get(),clear:(e,t)=>{u(A(e),{persistence:t}).del()}};function D(e){let{pluginIds:t,versionPersistence:n,allDocsData:r}=e;const o={};return t.forEach((e=>{o[e]=function(e){const t=_.read(e,n);return r[e].versions.some((e=>e.name===t))?{preferredVersionName:t}:(_.clear(e,n),{preferredVersionName:null})}(e)})),o}function T(){const e=(0,p._r)(),t=o().docs.versionPersistence,n=(0,m.useMemo)((()=>Object.keys(e)),[e]),[r,a]=(0,m.useState)((()=>function(e){const t={};return e.forEach((e=>{t[e]={preferredVersionName:null}})),t}(n)));(0,m.useEffect)((()=>{a(D({allDocsData:e,versionPersistence:t,pluginIds:n}))}),[e,t,n]);return[r,(0,m.useMemo)((()=>({savePreferredVersion:function(e,n){_.save(e,t,n),a((t=>({...t,[e]:{preferredVersionName:n}})))}})),[t])]}const I=(0,m.createContext)(null);function O(e){let{children:t}=e;return g?m.createElement(P,null,t):t}function P(e){let{children:t}=e;const n=T();return m.createElement(I.Provider,{value:n},t)}function L(){const e=(0,m.useContext)(I);if(!e)throw new Error('Can\'t find docs preferred context, maybe you forgot to use the "DocsPreferredVersionContextProvider"?');return e}var R=n(9935);function N(e){void 0===e&&(e=R.m);const t=(0,p.zh)(e),[n,r]=L(),{preferredVersionName:o}=n[e];return{preferredVersion:o?t.versions.find((e=>e.name===o)):null,savePreferredVersionName:(0,m.useCallback)((t=>{r.savePreferredVersion(e,t)}),[r,e])}}function j(){const e=(0,p._r)(),[t]=L();const n=Object.keys(e),r={};return n.forEach((n=>{r[n]=function(n){const r=e[n],{preferredVersionName:o}=t[n];return o?r.versions.find((e=>e.name===o)):null}(n)})),r}const M="default";function F(e,t){return`docs-${e}-${t}`}function B(){const{i18n:e}=(0,r.Z)(),t=(0,p._r)(),n=(0,p.WS)(),o=j();const a=[M,...Object.keys(t).map((function(e){var r,a;const i=(null===(r=null==n?void 0:n.activePlugin)||void 0===r?void 0:r.pluginId)===e?n.activeVersion:void 0,l=o[e],s=t[e].versions.find((e=>e.isLast));return F(e,(null!==(a=null!=i?i:l)&&void 0!==a?a:s).name)}))];return{locale:e.currentLocale,tags:a}}var U=n(7594),z=n.n(U);const $=/title=(["'])(.*?)\1/,q=/{([\d,-]+)}/,G=["js","jsBlock","jsx","python","html"],Z={js:{start:"\\/\\/",end:""},jsBlock:{start:"\\/\\*",end:"\\*\\/"},jsx:{start:"\\{\\s*\\/\\*",end:"\\*\\/\\s*\\}"},python:{start:"#",end:""},html:{start:"\x3c!--",end:"--\x3e"}},H=["highlight-next-line","highlight-start","highlight-end"],V=function(e){void 0===e&&(e=G);const t=e.map((e=>{const{start:t,end:n}=Z[e];return`(?:${t}\\s*(${H.join("|")})\\s*${n})`})).join("|");return new RegExp(`^\\s*(?:${t})\\s*$`)},W=e=>{switch(e){case"js":case"javascript":case"ts":case"typescript":return V(["js","jsBlock"]);case"jsx":case"tsx":return V(["js","jsBlock","jsx"]);case"html":return V(["js","jsBlock","html"]);case"python":case"py":return V(["python"]);default:return V()}};function K(e){var t,n;return null!==(n=null===(t=null==e?void 0:e.match($))||void 0===t?void 0:t[2])&&void 0!==n?n:""}function Y(e){const t=null==e?void 0:e.split(" ").find((e=>e.startsWith("language-")));return null==t?void 0:t.replace(/language-/,"")}function Q(e,t,n){let r=e.replace(/\n$/,"");if(t&&q.test(t)){const e=t.match(q)[1];return{highlightLines:z()(e).filter((e=>e>0)).map((e=>e-1)),code:r}}if(void 0===n)return{highlightLines:[],code:r};const o=W(n),a=r.split("\n");let i,l="";for(let u=0;uvoid 0!==e))){case"highlight-next-line":l+=`${u},`;break;case"highlight-start":i=u;break;case"highlight-end":l+=`${i}-${u-1},`}a.splice(u,1)}else u+=1}const s=z()(l);return r=a.join("\n"),{highlightLines:s,code:r}}const X=e=>{const{siteConfig:t}=(0,r.Z)(),{title:n,titleDelimiter:o}=t;return e&&e.trim().length?`${e.trim()} ${o} ${n}`:n},J=["zero","one","two","few","many","other"];function ee(e){return J.filter((t=>e.includes(t)))}const te={locale:"en",pluralForms:ee(["one","other"]),select:e=>1===e?"one":"other"};function ne(){const{i18n:{currentLocale:e}}=(0,r.Z)();return(0,m.useMemo)((()=>{if(!Intl.PluralRules)return console.error("Intl.PluralRules not available!\nDocusaurus will fallback to a default/fallback (English) Intl.PluralRules implementation.\n "),te;try{return function(e){const t=new Intl.PluralRules(e);return{locale:e,pluralForms:ee(t.resolvedOptions().pluralCategories),select:e=>t.select(e)}}(e)}catch(t){return console.error(`Failed to use Intl.PluralRules for locale "${e}".\nDocusaurus will fallback to a default/fallback (English) Intl.PluralRules implementation.\n`),te}}),[e])}function re(){const e=ne();return{selectMessage:(t,n)=>function(e,t,n){const r=e.split("|");if(1===r.length)return r[0];{r.length>n.pluralForms.length&&console.error(`For locale=${n.locale}, a maximum of ${n.pluralForms.length} plural forms are expected (${n.pluralForms}), but the message contains ${r.length} plural forms: ${e} `);const o=n.select(t),a=n.pluralForms.indexOf(o);return r[Math.min(a,r.length-1)]}}(n,t,e)}}const oe="undefined"!=typeof window?m.useLayoutEffect:m.useEffect;function ae(e){const t=(0,m.useRef)(e);return oe((()=>{t.current=e}),[e]),(0,m.useCallback)((function(){return t.current(...arguments)}),[])}function ie(e){const t=(0,m.useRef)();return oe((()=>{t.current=e})),t.current}function le(e){const t=(0,d.TH)(),n=ie(t),r=ae(e);(0,m.useEffect)((()=>{t!==n&&r({location:t,previousLocation:n})}),[r,t,n])}var se=n(412);const ue="ease-in-out";function ce(e){let{initialState:t}=e;const[n,r]=(0,m.useState)(null!=t&&t),o=(0,m.useCallback)((()=>{r((e=>!e))}),[]);return{collapsed:n,setCollapsed:r,toggleCollapsed:o}}const de={display:"none",overflow:"hidden",height:"0px"},fe={display:"block",overflow:"visible",height:"auto"};function pe(e,t){const n=t?de:fe;e.style.display=n.display,e.style.overflow=n.overflow,e.style.height=n.height}function me(e){let{collapsibleRef:t,collapsed:n,animation:r}=e;const o=(0,m.useRef)(!1);(0,m.useEffect)((()=>{const e=t.current;function a(){var t,n;const o=e.scrollHeight,a=null!==(t=null==r?void 0:r.duration)&&void 0!==t?t:function(e){const t=e/36;return Math.round(10*(4+15*t**.25+t/5))}(o);return{transition:`height ${a}ms ${null!==(n=null==r?void 0:r.easing)&&void 0!==n?n:ue}`,height:`${o}px`}}function i(){const t=a();e.style.transition=t.transition,e.style.height=t.height}if(!o.current)return pe(e,n),void(o.current=!0);return e.style.willChange="height",function(){const t=requestAnimationFrame((()=>{n?(i(),requestAnimationFrame((()=>{e.style.height=de.height,e.style.overflow=de.overflow}))):(e.style.display="block",requestAnimationFrame((()=>{i()})))}));return()=>cancelAnimationFrame(t)}()}),[t,n,r])}function he(e){if(!se.Z.canUseDOM)return e?de:fe}function ge(e){let{as:t="div",collapsed:n,children:r,animation:o,onCollapseTransitionEnd:a,className:i,disableSSRStyle:l}=e;const s=(0,m.useRef)(null);return me({collapsibleRef:s,collapsed:n,animation:o}),m.createElement(t,{ref:s,style:l?void 0:he(n),onTransitionEnd:e=>{"height"===e.propertyName&&(pe(s.current,n),null==a||a(n))},className:i},r)}function ve(e){let{collapsed:t,...n}=e;const[r,o]=(0,m.useState)(!t);(0,m.useLayoutEffect)((()=>{t||o(!0)}),[t]);const[a,i]=(0,m.useState)(t);return(0,m.useLayoutEffect)((()=>{r&&i(t)}),[r,t]),r?m.createElement(ge,{...n,collapsed:a}):null}function be(e){let{lazy:t,...n}=e;const r=t?ve:ge;return m.createElement(r,{...n})}var ye=n(2389),we=n(6010);const ke="details_Q743",Ee="isBrowser_rWTL",Se="collapsibleContent_K5uX";function Ce(e){return!!e&&("SUMMARY"===e.tagName||Ce(e.parentElement))}function xe(e,t){return!!e&&(e===t||xe(e.parentElement,t))}const Ae=function(e){let{summary:t,children:n,...r}=e;const o=(0,ye.Z)(),a=(0,m.useRef)(null),{collapsed:i,setCollapsed:l}=ce({initialState:!r.open}),[s,u]=(0,m.useState)(r.open);return m.createElement("details",{...r,ref:a,open:s,"data-collapsed":i,className:(0,we.Z)(ke,{[Ee]:o},r.className),onMouseDown:e=>{Ce(e.target)&&e.detail>1&&e.preventDefault()},onClick:e=>{e.stopPropagation();const t=e.target;Ce(t)&&xe(t,a.current)&&(e.preventDefault(),i?(l(!1),u(!0)):l(!0))}},t,m.createElement(be,{lazy:!1,collapsed:i,disableSSRStyle:!0,onCollapseTransitionEnd:e=>{l(e),u(!e)}},m.createElement("div",{className:Se},n)))};const _e=(0,m.createContext)(null);function De(e){let{children:t}=e;return m.createElement(_e.Provider,{value:(0,m.useState)(null)},t)}function Te(){const e=(0,m.useContext)(_e);if(null===e)throw new Error("MobileSecondaryMenuProvider was not used correctly, context value is null");return e}function Ie(){const[e]=Te();if(e){const t=e.component;return function(n){return m.createElement(t,{...e.props,...n})}}return()=>{}}function Oe(e){let{component:t,props:n}=e;const[,r]=Te(),o=(a=n,(0,m.useMemo)((()=>a),[...Object.keys(a),...Object.values(a)]));var a;return(0,m.useEffect)((()=>{r({component:t,props:o})}),[r,t,o]),(0,m.useEffect)((()=>()=>r(null)),[r]),null}function Pe(e){return Array.from(new Set(e))}const Le={page:{blogListPage:"blog-list-page",blogPostPage:"blog-post-page",blogTagsListPage:"blog-tags-list-page",blogTagPostListPage:"blog-tags-post-list-page",docsDocPage:"docs-doc-page",docsTagsListPage:"docs-tags-list-page",docsTagDocListPage:"docs-tags-doc-list-page",mdxPage:"mdx-page"},wrapper:{main:"main-wrapper",blogPages:"blog-wrapper",docsPages:"docs-wrapper",mdxPages:"mdx-wrapper"},common:{editThisPage:"theme-edit-this-page",lastUpdated:"theme-last-updated",backToTopButton:"theme-back-to-top-button"},layout:{},docs:{docVersionBanner:"theme-doc-version-banner",docVersionBadge:"theme-doc-version-badge",docMarkdown:"theme-doc-markdown",docTocMobile:"theme-doc-toc-mobile",docTocDesktop:"theme-doc-toc-desktop",docFooter:"theme-doc-footer",docFooterTagsRow:"theme-doc-footer-tags-row",docFooterEditMetaRow:"theme-doc-footer-edit-meta-row",docSidebarMenu:"theme-doc-sidebar-menu",docSidebarItemCategory:"theme-doc-sidebar-item-category",docSidebarItemLink:"theme-doc-sidebar-item-link",docSidebarItemCategoryLevel:e=>`theme-doc-sidebar-item-category-level-${e}`,docSidebarItemLinkLevel:e=>`theme-doc-sidebar-item-link-level-${e}`},blog:{}},Re=u("docusaurus.announcement.dismiss"),Ne=u("docusaurus.announcement.id"),je=()=>"true"===Re.get(),Me=e=>Re.set(String(e)),Fe=()=>{const{announcementBar:e}=o(),t=(0,ye.Z)(),[n,r]=(0,m.useState)((()=>!!t&&je()));(0,m.useEffect)((()=>{r(je())}),[]);const a=(0,m.useCallback)((()=>{Me(!0),r(!0)}),[]);return(0,m.useEffect)((()=>{if(!e)return;const{id:t}=e;let n=Ne.get();"annoucement-bar"===n&&(n="announcement-bar");const o=t!==n;Ne.set(t),o&&Me(!1),!o&&je()||r(!1)}),[e]),(0,m.useMemo)((()=>({isActive:!!e&&!n,close:a})),[e,n,a])},Be=(0,m.createContext)(null);function Ue(e){let{children:t}=e;const n=Fe();return m.createElement(Be.Provider,{value:n},t)}const ze=()=>{const e=(0,m.useContext)(Be);if(!e)throw new Error("useAnnouncementBar(): AnnouncementBar not found in React context: make sure to use the AnnouncementBarProvider on top of the tree");return e};function $e(){const{siteConfig:{baseUrl:e}}=(0,r.Z)(),{pathname:t}=(0,d.TH)();return t.replace(e,"/")}n(5999);function qe(e){!function(e){const{block:t}=(0,d.k6)(),n=(0,m.useRef)(e);(0,m.useEffect)((()=>{n.current=e}),[e]),(0,m.useEffect)((()=>t(((e,t)=>n.current(e,t)))),[t,n])}(((t,n)=>{if("POP"===n)return e(t,n)}))}function Ge(e){const t=e.getBoundingClientRect();return t.top===t.bottom?Ge(e.parentNode):t}function Ze(e,t){let{anchorTopOffset:n}=t;var r;const o=e.find((e=>Ge(e).top>=n));if(o){return function(e){return e.top>0&&e.bottom{e.current=t?0:document.querySelector(".navbar").clientHeight}),[t]),e}const Ve=function(e){const t=(0,m.useRef)(void 0),n=He();(0,m.useEffect)((()=>{if(!e)return()=>{};const{linkClassName:r,linkActiveClassName:o,minHeadingLevel:a,maxHeadingLevel:i}=e;function l(){const e=function(e){return Array.from(document.getElementsByClassName(e))}(r),l=function(e){let{minHeadingLevel:t,maxHeadingLevel:n}=e;const r=[];for(let o=t;o<=n;o+=1)r.push(`h${o}.anchor`);return Array.from(document.querySelectorAll(r.join()))}({minHeadingLevel:a,maxHeadingLevel:i}),s=Ze(l,{anchorTopOffset:n.current}),u=e.find((e=>s&&s.id===function(e){return decodeURIComponent(e.href.substring(e.href.indexOf("#")+1))}(e)));e.forEach((e=>{!function(e,n){var r;n?(t.current&&t.current!==e&&(null===(r=t.current)||void 0===r||r.classList.remove(o)),e.classList.add(o),t.current=e):e.classList.remove(o)}(e,e===u)}))}return document.addEventListener("scroll",l),document.addEventListener("resize",l),l(),()=>{document.removeEventListener("scroll",l),document.removeEventListener("resize",l)}}),[e,n])};function We(e){let{toc:t,minHeadingLevel:n,maxHeadingLevel:r}=e;return t.flatMap((e=>{const t=We({toc:e.children,minHeadingLevel:n,maxHeadingLevel:r});return function(e){return e.level>=n&&e.level<=r}(e)?[{...e,children:t}]:t}))}function Ke(e){let{toc:t,minHeadingLevel:n,maxHeadingLevel:r}=e;return(0,m.useMemo)((()=>We({toc:t,minHeadingLevel:n,maxHeadingLevel:r})),[t,n,r])}function Ye(){const e=(0,m.useRef)(!0);return(0,m.useMemo)((()=>({scrollEventsEnabledRef:e,enableScrollEvents:()=>{e.current=!0},disableScrollEvents:()=>{e.current=!1}})),[])}const Qe=(0,m.createContext)(void 0);function Xe(e){let{children:t}=e;return m.createElement(Qe.Provider,{value:Ye()},t)}function Je(){const e=(0,m.useContext)(Qe);if(null==e)throw new Error('"useScrollController" is used but no context provider was found in the React tree.');return e}const et=()=>se.Z.canUseDOM?{scrollX:window.pageXOffset,scrollY:window.pageYOffset}:null;function tt(e,t){void 0===t&&(t=[]);const{scrollEventsEnabledRef:n}=Je(),r=(0,m.useRef)(et()),o=ae(e);(0,m.useEffect)((()=>{const e=()=>{if(!n.current)return;const e=et();o&&o(e,r.current),r.current=e},t={passive:!0};return e(),window.addEventListener("scroll",e,t),()=>window.removeEventListener("scroll",e,t)}),[o,n,...t])}function nt(e,t){return void 0!==e&&void 0!==t&&new RegExp(e,"gi").test(t)}},9166:(e,t,n)=>{"use strict";n.d(t,{Z:()=>D});var r=n(7462),o=n(7294),a=n(3935),i=n(2263),l=n(6550),s=n(4996),u=n(9960),c=n(2859),d=n(9565),f=n(3810);function p(){return o.createElement("svg",{width:"15",height:"15",className:"DocSearch-Control-Key-Icon"},o.createElement("path",{d:"M4.505 4.496h2M5.505 5.496v5M8.216 4.496l.055 5.993M10 7.5c.333.333.5.667.5 1v2M12.326 4.5v5.996M8.384 4.496c1.674 0 2.116 0 2.116 1.5s-.442 1.5-2.116 1.5M3.205 9.303c-.09.448-.277 1.21-1.241 1.203C1 10.5.5 9.513.5 8V7c0-1.57.5-2.5 1.464-2.494.964.006 1.134.598 1.24 1.342M12.553 10.5h1.953",strokeWidth:"1.2",stroke:"currentColor",fill:"none",strokeLinecap:"square"}))}var m=n(830),h=["translations"];function g(){return g=Object.assign||function(e){for(var t=1;te.length)&&(t=e.length);for(var n=0,r=new Array(t);n=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}var w="Ctrl";var k=o.forwardRef((function(e,t){var n=e.translations,r=void 0===n?{}:n,a=y(e,h),i=r.buttonText,l=void 0===i?"Search":i,s=r.buttonAriaLabel,u=void 0===s?"Search":s,c=v((0,o.useState)(null),2),d=c[0],f=c[1];return(0,o.useEffect)((function(){"undefined"!=typeof navigator&&(/(Mac|iPhone|iPod|iPad)/i.test(navigator.platform)?f("\u2318"):f(w))}),[]),o.createElement("button",g({type:"button",className:"DocSearch DocSearch-Button","aria-label":u},a,{ref:t}),o.createElement("span",{className:"DocSearch-Button-Container"},o.createElement(m.W,null),o.createElement("span",{className:"DocSearch-Button-Placeholder"},l)),o.createElement("span",{className:"DocSearch-Button-Keys"},null!==d&&o.createElement(o.Fragment,null,o.createElement("kbd",{className:"DocSearch-Button-Key"},d===w?o.createElement(p,null):d),o.createElement("kbd",{className:"DocSearch-Button-Key"},"K"))))}));var E=n(5999);const S={searchBox:"searchBox_Utm0"};let C=null;function x(e){let{hit:t,children:n}=e;return o.createElement(u.Z,{to:t.url},n)}function A(e){let{state:t,onClose:n}=e;const{generateSearchPageLink:r}=(0,d.Z)();return o.createElement(u.Z,{to:r(t.query),onClick:n},"See all ",t.context.nbHits," results")}function _(e){let{contextualSearch:t,externalUrlRegex:u,...d}=e;var p,m;const{siteMetadata:h}=(0,i.Z)(),g=function(){const{locale:e,tags:t}=(0,f._q)();return[`language:${e}`,t.map((e=>`docusaurus_tag:${e}`))]}(),v=null!==(m=null===(p=d.searchParameters)||void 0===p?void 0:p.facetFilters)&&void 0!==m?m:[],b=t?[...g,...v]:v,y={...d.searchParameters,facetFilters:b},{withBaseUrl:w}=(0,s.C)(),_=(0,l.k6)(),D=(0,o.useRef)(null),T=(0,o.useRef)(null),[I,O]=(0,o.useState)(!1),[P,L]=(0,o.useState)(void 0),R=(0,o.useCallback)((()=>C?Promise.resolve():Promise.all([n.e(1426).then(n.bind(n,1426)),Promise.all([n.e(532),n.e(6945)]).then(n.bind(n,6945)),Promise.all([n.e(532),n.e(8894)]).then(n.bind(n,8894))]).then((e=>{let[{DocSearchModal:t}]=e;C=t}))),[]),N=(0,o.useCallback)((()=>{R().then((()=>{D.current=document.createElement("div"),document.body.insertBefore(D.current,document.body.firstChild),O(!0)}))}),[R,O]),j=(0,o.useCallback)((()=>{var e;O(!1),null===(e=D.current)||void 0===e||e.remove()}),[O]),M=(0,o.useCallback)((e=>{R().then((()=>{O(!0),L(e.key)}))}),[R,O,L]),F=(0,o.useRef)({navigate(e){let{itemUrl:t}=e;(0,f.Fx)(u,t)?window.location.href=t:_.push(t)}}).current,B=(0,o.useRef)((e=>e.map((e=>{if((0,f.Fx)(u,e.url))return e;const t=new URL(e.url);return{...e,url:w(`${t.pathname}${t.hash}`)}})))).current,U=(0,o.useMemo)((()=>e=>o.createElement(A,(0,r.Z)({},e,{onClose:j}))),[j]),z=(0,o.useCallback)((e=>(e.addAlgoliaAgent("docusaurus",h.docusaurusVersion),e)),[h.docusaurusVersion]);!function(e){var t=e.isOpen,n=e.onOpen,r=e.onClose,a=e.onInput,i=e.searchButtonRef;o.useEffect((function(){function e(e){var o;(27===e.keyCode&&t||"k"===(null===(o=e.key)||void 0===o?void 0:o.toLowerCase())&&(e.metaKey||e.ctrlKey)||!function(e){var t=e.target,n=t.tagName;return t.isContentEditable||"INPUT"===n||"SELECT"===n||"TEXTAREA"===n}(e)&&"/"===e.key&&!t)&&(e.preventDefault(),t?r():document.body.classList.contains("DocSearch--active")||document.body.classList.contains("DocSearch--active")||n()),i&&i.current===document.activeElement&&a&&/[a-zA-Z0-9]/.test(String.fromCharCode(e.keyCode))&&a(e)}return window.addEventListener("keydown",e),function(){window.removeEventListener("keydown",e)}}),[t,n,r,a,i])}({isOpen:I,onOpen:N,onClose:j,onInput:M,searchButtonRef:T});const $=(0,E.I)({id:"theme.SearchBar.label",message:"Search",description:"The ARIA label and placeholder for search button"});return o.createElement(o.Fragment,null,o.createElement(c.Z,null,o.createElement("link",{rel:"preconnect",href:`https://${d.appId}-dsn.algolia.net`,crossOrigin:"anonymous"})),o.createElement("div",{className:S.searchBox},o.createElement(k,{onTouchStart:R,onFocus:R,onMouseOver:R,onClick:N,ref:T,translations:{buttonText:$,buttonAriaLabel:$}})),I&&C&&D.current&&(0,a.createPortal)(o.createElement(C,(0,r.Z)({onClose:j,initialScrollY:window.scrollY,initialQuery:P,navigator:F,transformItems:B,hitComponent:x,resultsFooterComponent:U,transformSearchClient:z},d,{searchParameters:y})),D.current))}const D=function(){const{siteConfig:e}=(0,i.Z)();return o.createElement(_,e.themeConfig.algolia)}},9565:(e,t,n)=>{"use strict";n.d(t,{Z:()=>i});var r=n(6550),o=n(2263),a=n(7294);const i=function(){const e=(0,r.k6)(),{siteConfig:{baseUrl:t}}=(0,o.Z)(),[n,i]=(0,a.useState)("");return(0,a.useEffect)((()=>{var e;const t=null!==(e=new URLSearchParams(window.location.search).get("q"))&&void 0!==e?e:"";i(t)}),[]),{searchQuery:n,setSearchQuery:(0,a.useCallback)((t=>{const n=new URLSearchParams(window.location.search);t?n.set("q",t):n.delete("q"),e.replace({search:n.toString()}),i(t)}),[e]),generateSearchPageLink:(0,a.useCallback)((e=>`${t}search?q=${encodeURIComponent(e)}`),[t])}}},8802:(e,t)=>{"use strict";Object.defineProperty(t,"__esModule",{value:!0}),t.default=function(e,t){const{trailingSlash:n,baseUrl:r}=t;if(e.startsWith("#"))return e;if(void 0===n)return e;const[o]=e.split(/[#?]/),a="/"===o||o===r?o:(i=o,n?function(e){return e.endsWith("/")?e:`${e}/`}(i):function(e){return e.endsWith("/")?e.slice(0,-1):e}(i));var i;return e.replace(o,a)}},8780:function(e,t,n){"use strict";var r=this&&this.__importDefault||function(e){return e&&e.__esModule?e:{default:e}};Object.defineProperty(t,"__esModule",{value:!0}),t.applyTrailingSlash=void 0;var o=n(8802);Object.defineProperty(t,"applyTrailingSlash",{enumerable:!0,get:function(){return r(o).default}})},6010:(e,t,n)=>{"use strict";function r(e){var t,n,o="";if("string"==typeof e||"number"==typeof e)o+=e;else if("object"==typeof e)if(Array.isArray(e))for(t=0;to});const o=function(){for(var e,t,n=0,o="";n{"use strict";n.d(t,{lX:()=>S,q_:()=>T,ob:()=>h,PP:()=>O,Ep:()=>m,Hp:()=>g});var r=n(7462);function o(e){return"/"===e.charAt(0)}function a(e,t){for(var n=t,r=n+1,o=e.length;r=0;f--){var p=i[f];"."===p?a(i,f):".."===p?(a(i,f),d++):d&&(a(i,f),d--)}if(!u)for(;d--;d)i.unshift("..");!u||""===i[0]||i[0]&&o(i[0])||i.unshift("");var m=i.join("/");return n&&"/"!==m.substr(-1)&&(m+="/"),m};function l(e){return e.valueOf?e.valueOf():Object.prototype.valueOf.call(e)}const s=function e(t,n){if(t===n)return!0;if(null==t||null==n)return!1;if(Array.isArray(t))return Array.isArray(n)&&t.length===n.length&&t.every((function(t,r){return e(t,n[r])}));if("object"==typeof t||"object"==typeof n){var r=l(t),o=l(n);return r!==t||o!==n?e(r,o):Object.keys(Object.assign({},t,n)).every((function(r){return e(t[r],n[r])}))}return!1};var u=n(8776);function c(e){return"/"===e.charAt(0)?e:"/"+e}function d(e){return"/"===e.charAt(0)?e.substr(1):e}function f(e,t){return function(e,t){return 0===e.toLowerCase().indexOf(t.toLowerCase())&&-1!=="/?#".indexOf(e.charAt(t.length))}(e,t)?e.substr(t.length):e}function p(e){return"/"===e.charAt(e.length-1)?e.slice(0,-1):e}function m(e){var t=e.pathname,n=e.search,r=e.hash,o=t||"/";return n&&"?"!==n&&(o+="?"===n.charAt(0)?n:"?"+n),r&&"#"!==r&&(o+="#"===r.charAt(0)?r:"#"+r),o}function h(e,t,n,o){var a;"string"==typeof e?(a=function(e){var t=e||"/",n="",r="",o=t.indexOf("#");-1!==o&&(r=t.substr(o),t=t.substr(0,o));var a=t.indexOf("?");return-1!==a&&(n=t.substr(a),t=t.substr(0,a)),{pathname:t,search:"?"===n?"":n,hash:"#"===r?"":r}}(e),a.state=t):(void 0===(a=(0,r.Z)({},e)).pathname&&(a.pathname=""),a.search?"?"!==a.search.charAt(0)&&(a.search="?"+a.search):a.search="",a.hash?"#"!==a.hash.charAt(0)&&(a.hash="#"+a.hash):a.hash="",void 0!==t&&void 0===a.state&&(a.state=t));try{a.pathname=decodeURI(a.pathname)}catch(l){throw l instanceof URIError?new URIError('Pathname "'+a.pathname+'" could not be decoded. This is likely caused by an invalid percent-encoding.'):l}return n&&(a.key=n),o?a.pathname?"/"!==a.pathname.charAt(0)&&(a.pathname=i(a.pathname,o.pathname)):a.pathname=o.pathname:a.pathname||(a.pathname="/"),a}function g(e,t){return e.pathname===t.pathname&&e.search===t.search&&e.hash===t.hash&&e.key===t.key&&s(e.state,t.state)}function v(){var e=null;var t=[];return{setPrompt:function(t){return e=t,function(){e===t&&(e=null)}},confirmTransitionTo:function(t,n,r,o){if(null!=e){var a="function"==typeof e?e(t,n):e;"string"==typeof a?"function"==typeof r?r(a,o):o(!0):o(!1!==a)}else o(!0)},appendListener:function(e){var n=!0;function r(){n&&e.apply(void 0,arguments)}return t.push(r),function(){n=!1,t=t.filter((function(e){return e!==r}))}},notifyListeners:function(){for(var e=arguments.length,n=new Array(e),r=0;rt?n.splice(t,n.length-t,o):n.push(o),d({action:r,location:o,index:t,entries:n})}}))},replace:function(e,t){var r="REPLACE",o=h(e,t,f(),w.location);c.confirmTransitionTo(o,r,n,(function(e){e&&(w.entries[w.index]=o,d({action:r,location:o}))}))},go:y,goBack:function(){y(-1)},goForward:function(){y(1)},canGo:function(e){var t=w.index+e;return t>=0&&t{"use strict";var r=n(9864),o={childContextTypes:!0,contextType:!0,contextTypes:!0,defaultProps:!0,displayName:!0,getDefaultProps:!0,getDerivedStateFromError:!0,getDerivedStateFromProps:!0,mixins:!0,propTypes:!0,type:!0},a={name:!0,length:!0,prototype:!0,caller:!0,callee:!0,arguments:!0,arity:!0},i={$$typeof:!0,compare:!0,defaultProps:!0,displayName:!0,propTypes:!0,type:!0},l={};function s(e){return r.isMemo(e)?i:l[e.$$typeof]||o}l[r.ForwardRef]={$$typeof:!0,render:!0,defaultProps:!0,displayName:!0,propTypes:!0},l[r.Memo]=i;var u=Object.defineProperty,c=Object.getOwnPropertyNames,d=Object.getOwnPropertySymbols,f=Object.getOwnPropertyDescriptor,p=Object.getPrototypeOf,m=Object.prototype;e.exports=function e(t,n,r){if("string"!=typeof n){if(m){var o=p(n);o&&o!==m&&e(t,o,r)}var i=c(n);d&&(i=i.concat(d(n)));for(var l=s(t),h=s(n),g=0;g{e.exports=Array.isArray||function(e){return"[object Array]"==Object.prototype.toString.call(e)}},6743:(e,t,n)=>{"use strict";n.r(t)},2497:(e,t,n)=>{"use strict";n.r(t)},2295:(e,t,n)=>{"use strict";n.r(t)},4865:function(e,t,n){var r,o;r=function(){var e,t,n={version:"0.2.0"},r=n.settings={minimum:.08,easing:"ease",positionUsing:"",speed:200,trickle:!0,trickleRate:.02,trickleSpeed:800,showSpinner:!0,barSelector:'[role="bar"]',spinnerSelector:'[role="spinner"]',parent:"body",template:'
'};function o(e,t,n){return en?n:e}function a(e){return 100*(-1+e)}function i(e,t,n){var o;return(o="translate3d"===r.positionUsing?{transform:"translate3d("+a(e)+"%,0,0)"}:"translate"===r.positionUsing?{transform:"translate("+a(e)+"%,0)"}:{"margin-left":a(e)+"%"}).transition="all "+t+"ms "+n,o}n.configure=function(e){var t,n;for(t in e)void 0!==(n=e[t])&&e.hasOwnProperty(t)&&(r[t]=n);return this},n.status=null,n.set=function(e){var t=n.isStarted();e=o(e,r.minimum,1),n.status=1===e?null:e;var a=n.render(!t),u=a.querySelector(r.barSelector),c=r.speed,d=r.easing;return a.offsetWidth,l((function(t){""===r.positionUsing&&(r.positionUsing=n.getPositioningCSS()),s(u,i(e,c,d)),1===e?(s(a,{transition:"none",opacity:1}),a.offsetWidth,setTimeout((function(){s(a,{transition:"all "+c+"ms linear",opacity:0}),setTimeout((function(){n.remove(),t()}),c)}),c)):setTimeout(t,c)})),this},n.isStarted=function(){return"number"==typeof n.status},n.start=function(){n.status||n.set(0);var e=function(){setTimeout((function(){n.status&&(n.trickle(),e())}),r.trickleSpeed)};return r.trickle&&e(),this},n.done=function(e){return e||n.status?n.inc(.3+.5*Math.random()).set(1):this},n.inc=function(e){var t=n.status;return t?("number"!=typeof e&&(e=(1-t)*o(Math.random()*t,.1,.95)),t=o(t+e,0,.994),n.set(t)):n.start()},n.trickle=function(){return n.inc(Math.random()*r.trickleRate)},e=0,t=0,n.promise=function(r){return r&&"resolved"!==r.state()?(0===t&&n.start(),e++,t++,r.always((function(){0==--t?(e=0,n.done()):n.set((e-t)/e)})),this):this},n.render=function(e){if(n.isRendered())return document.getElementById("nprogress");c(document.documentElement,"nprogress-busy");var t=document.createElement("div");t.id="nprogress",t.innerHTML=r.template;var o,i=t.querySelector(r.barSelector),l=e?"-100":a(n.status||0),u=document.querySelector(r.parent);return s(i,{transition:"all 0 linear",transform:"translate3d("+l+"%,0,0)"}),r.showSpinner||(o=t.querySelector(r.spinnerSelector))&&p(o),u!=document.body&&c(u,"nprogress-custom-parent"),u.appendChild(t),t},n.remove=function(){d(document.documentElement,"nprogress-busy"),d(document.querySelector(r.parent),"nprogress-custom-parent");var e=document.getElementById("nprogress");e&&p(e)},n.isRendered=function(){return!!document.getElementById("nprogress")},n.getPositioningCSS=function(){var e=document.body.style,t="WebkitTransform"in e?"Webkit":"MozTransform"in e?"Moz":"msTransform"in e?"ms":"OTransform"in e?"O":"";return t+"Perspective"in e?"translate3d":t+"Transform"in e?"translate":"margin"};var l=function(){var e=[];function t(){var n=e.shift();n&&n(t)}return function(n){e.push(n),1==e.length&&t()}}(),s=function(){var e=["Webkit","O","Moz","ms"],t={};function n(e){return e.replace(/^-ms-/,"ms-").replace(/-([\da-z])/gi,(function(e,t){return t.toUpperCase()}))}function r(t){var n=document.body.style;if(t in n)return t;for(var r,o=e.length,a=t.charAt(0).toUpperCase()+t.slice(1);o--;)if((r=e[o]+a)in n)return r;return t}function o(e){return e=n(e),t[e]||(t[e]=r(e))}function a(e,t,n){t=o(t),e.style[t]=n}return function(e,t){var n,r,o=arguments;if(2==o.length)for(n in t)void 0!==(r=t[n])&&t.hasOwnProperty(n)&&a(e,n,r);else a(e,o[1],o[2])}}();function u(e,t){return("string"==typeof e?e:f(e)).indexOf(" "+t+" ")>=0}function c(e,t){var n=f(e),r=n+t;u(n,t)||(e.className=r.substring(1))}function d(e,t){var n,r=f(e);u(e,t)&&(n=r.replace(" "+t+" "," "),e.className=n.substring(1,n.length-1))}function f(e){return(" "+(e.className||"")+" ").replace(/\s+/gi," ")}function p(e){e&&e.parentNode&&e.parentNode.removeChild(e)}return n},void 0===(o="function"==typeof r?r.call(t,n,t,e):r)||(e.exports=o)},7418:e=>{"use strict";var t=Object.getOwnPropertySymbols,n=Object.prototype.hasOwnProperty,r=Object.prototype.propertyIsEnumerable;e.exports=function(){try{if(!Object.assign)return!1;var e=new String("abc");if(e[5]="de","5"===Object.getOwnPropertyNames(e)[0])return!1;for(var t={},n=0;n<10;n++)t["_"+String.fromCharCode(n)]=n;if("0123456789"!==Object.getOwnPropertyNames(t).map((function(e){return t[e]})).join(""))return!1;var r={};return"abcdefghijklmnopqrst".split("").forEach((function(e){r[e]=e})),"abcdefghijklmnopqrst"===Object.keys(Object.assign({},r)).join("")}catch(o){return!1}}()?Object.assign:function(e,o){for(var a,i,l=function(e){if(null==e)throw new TypeError("Object.assign cannot be called with null or undefined");return Object(e)}(e),s=1;s{function n(e){let t,n=[];for(let r of e.split(",").map((e=>e.trim())))if(/^-?\d+$/.test(r))n.push(parseInt(r,10));else if(t=r.match(/^(-?\d+)(-|\.\.\.?|\u2025|\u2026|\u22EF)(-?\d+)$/)){let[e,r,o,a]=t;if(r&&a){r=parseInt(r),a=parseInt(a);const e=r{"use strict";n.r(t),n.d(t,{default:()=>a});var r=function(){var e=/(?:^|\s)lang(?:uage)?-([\w-]+)(?=\s|$)/i,t=0,n={},r={util:{encode:function e(t){return t instanceof o?new o(t.type,e(t.content),t.alias):Array.isArray(t)?t.map(e):t.replace(/&/g,"&").replace(/=d.reach);S+=E.value.length,E=E.next){var C=E.value;if(t.length>e.length)return;if(!(C instanceof o)){var x,A=1;if(b){if(!(x=a(k,S,e,v))||x.index>=e.length)break;var _=x.index,D=x.index+x[0].length,T=S;for(T+=E.value.length;_>=T;)T+=(E=E.next).value.length;if(S=T-=E.value.length,E.value instanceof o)continue;for(var I=E;I!==t.tail&&(Td.reach&&(d.reach=R);var N=E.prev;if(P&&(N=s(t,N,P),S+=P.length),u(t,N,A),E=s(t,N,new o(f,g?r.tokenize(O,g):O,y,O)),L&&s(t,E,L),A>1){var j={cause:f+","+m,reach:R};i(e,t,n,E.prev,S,j),d&&j.reach>d.reach&&(d.reach=j.reach)}}}}}}function l(){var e={value:null,prev:null,next:null},t={value:null,prev:e,next:null};e.next=t,this.head=e,this.tail=t,this.length=0}function s(e,t,n){var r=t.next,o={value:n,prev:t,next:r};return t.next=o,r.prev=o,e.length++,o}function u(e,t,n){for(var r=t.next,o=0;o"+a.content+""},r}(),o=r;r.default=r,o.languages.markup={comment:{pattern://,greedy:!0},prolog:{pattern:/<\?[\s\S]+?\?>/,greedy:!0},doctype:{pattern:/"'[\]]|"[^"]*"|'[^']*')+(?:\[(?:[^<"'\]]|"[^"]*"|'[^']*'|<(?!!--)|)*\]\s*)?>/i,greedy:!0,inside:{"internal-subset":{pattern:/(^[^\[]*\[)[\s\S]+(?=\]>$)/,lookbehind:!0,greedy:!0,inside:null},string:{pattern:/"[^"]*"|'[^']*'/,greedy:!0},punctuation:/^$|[[\]]/,"doctype-tag":/^DOCTYPE/i,name:/[^\s<>'"]+/}},cdata:{pattern://i,greedy:!0},tag:{pattern:/<\/?(?!\d)[^\s>\/=$<%]+(?:\s(?:\s*[^\s>\/=]+(?:\s*=\s*(?:"[^"]*"|'[^']*'|[^\s'">=]+(?=[\s>]))|(?=[\s/>])))+)?\s*\/?>/,greedy:!0,inside:{tag:{pattern:/^<\/?[^\s>\/]+/,inside:{punctuation:/^<\/?/,namespace:/^[^\s>\/:]+:/}},"special-attr":[],"attr-value":{pattern:/=\s*(?:"[^"]*"|'[^']*'|[^\s'">=]+)/,inside:{punctuation:[{pattern:/^=/,alias:"attr-equals"},/"|'/]}},punctuation:/\/?>/,"attr-name":{pattern:/[^\s>\/]+/,inside:{namespace:/^[^\s>\/:]+:/}}}},entity:[{pattern:/&[\da-z]{1,8};/i,alias:"named-entity"},/&#x?[\da-f]{1,8};/i]},o.languages.markup.tag.inside["attr-value"].inside.entity=o.languages.markup.entity,o.languages.markup.doctype.inside["internal-subset"].inside=o.languages.markup,o.hooks.add("wrap",(function(e){"entity"===e.type&&(e.attributes.title=e.content.replace(/&/,"&"))})),Object.defineProperty(o.languages.markup.tag,"addInlined",{value:function(e,t){var n={};n["language-"+t]={pattern:/(^$)/i,lookbehind:!0,inside:o.languages[t]},n.cdata=/^$/i;var r={"included-cdata":{pattern://i,inside:n}};r["language-"+t]={pattern:/[\s\S]+/,inside:o.languages[t]};var a={};a[e]={pattern:RegExp(/(<__[^>]*>)(?:))*\]\]>|(?!)/.source.replace(/__/g,(function(){return e})),"i"),lookbehind:!0,greedy:!0,inside:r},o.languages.insertBefore("markup","cdata",a)}}),Object.defineProperty(o.languages.markup.tag,"addAttribute",{value:function(e,t){o.languages.markup.tag.inside["special-attr"].push({pattern:RegExp(/(^|["'\s])/.source+"(?:"+e+")"+/\s*=\s*(?:"[^"]*"|'[^']*'|[^\s'">=]+(?=[\s>]))/.source,"i"),lookbehind:!0,inside:{"attr-name":/^[^\s=]+/,"attr-value":{pattern:/=[\s\S]+/,inside:{value:{pattern:/(^=\s*(["']|(?!["'])))\S[\s\S]*(?=\2$)/,lookbehind:!0,alias:[t,"language-"+t],inside:o.languages[t]},punctuation:[{pattern:/^=/,alias:"attr-equals"},/"|'/]}}}})}}),o.languages.html=o.languages.markup,o.languages.mathml=o.languages.markup,o.languages.svg=o.languages.markup,o.languages.xml=o.languages.extend("markup",{}),o.languages.ssml=o.languages.xml,o.languages.atom=o.languages.xml,o.languages.rss=o.languages.xml,function(e){var t="\\b(?:BASH|BASHOPTS|BASH_ALIASES|BASH_ARGC|BASH_ARGV|BASH_CMDS|BASH_COMPLETION_COMPAT_DIR|BASH_LINENO|BASH_REMATCH|BASH_SOURCE|BASH_VERSINFO|BASH_VERSION|COLORTERM|COLUMNS|COMP_WORDBREAKS|DBUS_SESSION_BUS_ADDRESS|DEFAULTS_PATH|DESKTOP_SESSION|DIRSTACK|DISPLAY|EUID|GDMSESSION|GDM_LANG|GNOME_KEYRING_CONTROL|GNOME_KEYRING_PID|GPG_AGENT_INFO|GROUPS|HISTCONTROL|HISTFILE|HISTFILESIZE|HISTSIZE|HOME|HOSTNAME|HOSTTYPE|IFS|INSTANCE|JOB|LANG|LANGUAGE|LC_ADDRESS|LC_ALL|LC_IDENTIFICATION|LC_MEASUREMENT|LC_MONETARY|LC_NAME|LC_NUMERIC|LC_PAPER|LC_TELEPHONE|LC_TIME|LESSCLOSE|LESSOPEN|LINES|LOGNAME|LS_COLORS|MACHTYPE|MAILCHECK|MANDATORY_PATH|NO_AT_BRIDGE|OLDPWD|OPTERR|OPTIND|ORBIT_SOCKETDIR|OSTYPE|PAPERSIZE|PATH|PIPESTATUS|PPID|PS1|PS2|PS3|PS4|PWD|RANDOM|REPLY|SECONDS|SELINUX_INIT|SESSION|SESSIONTYPE|SESSION_MANAGER|SHELL|SHELLOPTS|SHLVL|SSH_AUTH_SOCK|TERM|UID|UPSTART_EVENTS|UPSTART_INSTANCE|UPSTART_JOB|UPSTART_SESSION|USER|WINDOWID|XAUTHORITY|XDG_CONFIG_DIRS|XDG_CURRENT_DESKTOP|XDG_DATA_DIRS|XDG_GREETER_DATA_DIR|XDG_MENU_PREFIX|XDG_RUNTIME_DIR|XDG_SEAT|XDG_SEAT_PATH|XDG_SESSION_DESKTOP|XDG_SESSION_ID|XDG_SESSION_PATH|XDG_SESSION_TYPE|XDG_VTNR|XMODIFIERS)\\b",n={pattern:/(^(["']?)\w+\2)[ \t]+\S.*/,lookbehind:!0,alias:"punctuation",inside:null},r={bash:n,environment:{pattern:RegExp("\\$"+t),alias:"constant"},variable:[{pattern:/\$?\(\([\s\S]+?\)\)/,greedy:!0,inside:{variable:[{pattern:/(^\$\(\([\s\S]+)\)\)/,lookbehind:!0},/^\$\(\(/],number:/\b0x[\dA-Fa-f]+\b|(?:\b\d+(?:\.\d*)?|\B\.\d+)(?:[Ee]-?\d+)?/,operator:/--|\+\+|\*\*=?|<<=?|>>=?|&&|\|\||[=!+\-*/%<>^&|]=?|[?~:]/,punctuation:/\(\(?|\)\)?|,|;/}},{pattern:/\$\((?:\([^)]+\)|[^()])+\)|`[^`]+`/,greedy:!0,inside:{variable:/^\$\(|^`|\)$|`$/}},{pattern:/\$\{[^}]+\}/,greedy:!0,inside:{operator:/:[-=?+]?|[!\/]|##?|%%?|\^\^?|,,?/,punctuation:/[\[\]]/,environment:{pattern:RegExp("(\\{)"+t),lookbehind:!0,alias:"constant"}}},/\$(?:\w+|[#?*!@$])/],entity:/\\(?:[abceEfnrtv\\"]|O?[0-7]{1,3}|U[0-9a-fA-F]{8}|u[0-9a-fA-F]{4}|x[0-9a-fA-F]{1,2})/};e.languages.bash={shebang:{pattern:/^#!\s*\/.*/,alias:"important"},comment:{pattern:/(^|[^"{\\$])#.*/,lookbehind:!0},"function-name":[{pattern:/(\bfunction\s+)[\w-]+(?=(?:\s*\(?:\s*\))?\s*\{)/,lookbehind:!0,alias:"function"},{pattern:/\b[\w-]+(?=\s*\(\s*\)\s*\{)/,alias:"function"}],"for-or-select":{pattern:/(\b(?:for|select)\s+)\w+(?=\s+in\s)/,alias:"variable",lookbehind:!0},"assign-left":{pattern:/(^|[\s;|&]|[<>]\()\w+(?=\+?=)/,inside:{environment:{pattern:RegExp("(^|[\\s;|&]|[<>]\\()"+t),lookbehind:!0,alias:"constant"}},alias:"variable",lookbehind:!0},string:[{pattern:/((?:^|[^<])<<-?\s*)(\w+)\s[\s\S]*?(?:\r?\n|\r)\2/,lookbehind:!0,greedy:!0,inside:r},{pattern:/((?:^|[^<])<<-?\s*)(["'])(\w+)\2\s[\s\S]*?(?:\r?\n|\r)\3/,lookbehind:!0,greedy:!0,inside:{bash:n}},{pattern:/(^|[^\\](?:\\\\)*)"(?:\\[\s\S]|\$\([^)]+\)|\$(?!\()|`[^`]+`|[^"\\`$])*"/,lookbehind:!0,greedy:!0,inside:r},{pattern:/(^|[^$\\])'[^']*'/,lookbehind:!0,greedy:!0},{pattern:/\$'(?:[^'\\]|\\[\s\S])*'/,greedy:!0,inside:{entity:r.entity}}],environment:{pattern:RegExp("\\$?"+t),alias:"constant"},variable:r.variable,function:{pattern:/(^|[\s;|&]|[<>]\()(?:add|apropos|apt|apt-cache|apt-get|aptitude|aspell|automysqlbackup|awk|basename|bash|bc|bconsole|bg|bzip2|cal|cat|cfdisk|chgrp|chkconfig|chmod|chown|chroot|cksum|clear|cmp|column|comm|composer|cp|cron|crontab|csplit|curl|cut|date|dc|dd|ddrescue|debootstrap|df|diff|diff3|dig|dir|dircolors|dirname|dirs|dmesg|docker|docker-compose|du|egrep|eject|env|ethtool|expand|expect|expr|fdformat|fdisk|fg|fgrep|file|find|fmt|fold|format|free|fsck|ftp|fuser|gawk|git|gparted|grep|groupadd|groupdel|groupmod|groups|grub-mkconfig|gzip|halt|head|hg|history|host|hostname|htop|iconv|id|ifconfig|ifdown|ifup|import|install|ip|jobs|join|kill|killall|less|link|ln|locate|logname|logrotate|look|lpc|lpr|lprint|lprintd|lprintq|lprm|ls|lsof|lynx|make|man|mc|mdadm|mkconfig|mkdir|mke2fs|mkfifo|mkfs|mkisofs|mknod|mkswap|mmv|more|most|mount|mtools|mtr|mutt|mv|nano|nc|netstat|nice|nl|node|nohup|notify-send|npm|nslookup|op|open|parted|passwd|paste|pathchk|ping|pkill|pnpm|podman|podman-compose|popd|pr|printcap|printenv|ps|pushd|pv|quota|quotacheck|quotactl|ram|rar|rcp|reboot|remsync|rename|renice|rev|rm|rmdir|rpm|rsync|scp|screen|sdiff|sed|sendmail|seq|service|sftp|sh|shellcheck|shuf|shutdown|sleep|slocate|sort|split|ssh|stat|strace|su|sudo|sum|suspend|swapon|sync|tac|tail|tar|tee|time|timeout|top|touch|tr|traceroute|tsort|tty|umount|uname|unexpand|uniq|units|unrar|unshar|unzip|update-grub|uptime|useradd|userdel|usermod|users|uudecode|uuencode|v|vcpkg|vdir|vi|vim|virsh|vmstat|wait|watch|wc|wget|whereis|which|who|whoami|write|xargs|xdg-open|yarn|yes|zenity|zip|zsh|zypper)(?=$|[)\s;|&])/,lookbehind:!0},keyword:{pattern:/(^|[\s;|&]|[<>]\()(?:case|do|done|elif|else|esac|fi|for|function|if|in|select|then|until|while)(?=$|[)\s;|&])/,lookbehind:!0},builtin:{pattern:/(^|[\s;|&]|[<>]\()(?:\.|:|alias|bind|break|builtin|caller|cd|command|continue|declare|echo|enable|eval|exec|exit|export|getopts|hash|help|let|local|logout|mapfile|printf|pwd|read|readarray|readonly|return|set|shift|shopt|source|test|times|trap|type|typeset|ulimit|umask|unalias|unset)(?=$|[)\s;|&])/,lookbehind:!0,alias:"class-name"},boolean:{pattern:/(^|[\s;|&]|[<>]\()(?:false|true)(?=$|[)\s;|&])/,lookbehind:!0},"file-descriptor":{pattern:/\B&\d\b/,alias:"important"},operator:{pattern:/\d?<>|>\||\+=|=[=~]?|!=?|<<[<-]?|[&\d]?>>|\d[<>]&?|[<>][&=]?|&[>&]?|\|[&|]?/,inside:{"file-descriptor":{pattern:/^\d/,alias:"important"}}},punctuation:/\$?\(\(?|\)\)?|\.\.|[{}[\];\\]/,number:{pattern:/(^|\s)(?:[1-9]\d*|0)(?:[.,]\d+)?\b/,lookbehind:!0}},n.inside=e.languages.bash;for(var o=["comment","function-name","for-or-select","assign-left","string","environment","function","keyword","builtin","boolean","file-descriptor","operator","punctuation","number"],a=r.variable[1].inside,i=0;i]=?|[!=]=?=?|--?|\+\+?|&&?|\|\|?|[?*/~^%]/,punctuation:/[{}[\];(),.:]/},o.languages.c=o.languages.extend("clike",{comment:{pattern:/\/\/(?:[^\r\n\\]|\\(?:\r\n?|\n|(?![\r\n])))*|\/\*[\s\S]*?(?:\*\/|$)/,greedy:!0},string:{pattern:/"(?:\\(?:\r\n|[\s\S])|[^"\\\r\n])*"/,greedy:!0},"class-name":{pattern:/(\b(?:enum|struct)\s+(?:__attribute__\s*\(\([\s\S]*?\)\)\s*)?)\w+|\b[a-z]\w*_t\b/,lookbehind:!0},keyword:/\b(?:_Alignas|_Alignof|_Atomic|_Bool|_Complex|_Generic|_Imaginary|_Noreturn|_Static_assert|_Thread_local|__attribute__|asm|auto|break|case|char|const|continue|default|do|double|else|enum|extern|float|for|goto|if|inline|int|long|register|return|short|signed|sizeof|static|struct|switch|typedef|typeof|union|unsigned|void|volatile|while)\b/,function:/\b[a-z_]\w*(?=\s*\()/i,number:/(?:\b0x(?:[\da-f]+(?:\.[\da-f]*)?|\.[\da-f]+)(?:p[+-]?\d+)?|(?:\b\d+(?:\.\d*)?|\B\.\d+)(?:e[+-]?\d+)?)[ful]{0,4}/i,operator:/>>=?|<<=?|->|([-+&|:])\1|[?:~]|[-+*/%&|^!=<>]=?/}),o.languages.insertBefore("c","string",{char:{pattern:/'(?:\\(?:\r\n|[\s\S])|[^'\\\r\n]){0,32}'/,greedy:!0}}),o.languages.insertBefore("c","string",{macro:{pattern:/(^[\t ]*)#\s*[a-z](?:[^\r\n\\/]|\/(?!\*)|\/\*(?:[^*]|\*(?!\/))*\*\/|\\(?:\r\n|[\s\S]))*/im,lookbehind:!0,greedy:!0,alias:"property",inside:{string:[{pattern:/^(#\s*include\s*)<[^>]+>/,lookbehind:!0},o.languages.c.string],char:o.languages.c.char,comment:o.languages.c.comment,"macro-name":[{pattern:/(^#\s*define\s+)\w+\b(?!\()/i,lookbehind:!0},{pattern:/(^#\s*define\s+)\w+\b(?=\()/i,lookbehind:!0,alias:"function"}],directive:{pattern:/^(#\s*)[a-z]+/,lookbehind:!0,alias:"keyword"},"directive-hash":/^#/,punctuation:/##|\\(?=[\r\n])/,expression:{pattern:/\S[\s\S]*/,inside:o.languages.c}}}}),o.languages.insertBefore("c","function",{constant:/\b(?:EOF|NULL|SEEK_CUR|SEEK_END|SEEK_SET|__DATE__|__FILE__|__LINE__|__TIMESTAMP__|__TIME__|__func__|stderr|stdin|stdout)\b/}),delete o.languages.c.boolean,function(e){var t=/\b(?:alignas|alignof|asm|auto|bool|break|case|catch|char|char16_t|char32_t|char8_t|class|co_await|co_return|co_yield|compl|concept|const|const_cast|consteval|constexpr|constinit|continue|decltype|default|delete|do|double|dynamic_cast|else|enum|explicit|export|extern|final|float|for|friend|goto|if|import|inline|int|int16_t|int32_t|int64_t|int8_t|long|module|mutable|namespace|new|noexcept|nullptr|operator|override|private|protected|public|register|reinterpret_cast|requires|return|short|signed|sizeof|static|static_assert|static_cast|struct|switch|template|this|thread_local|throw|try|typedef|typeid|typename|uint16_t|uint32_t|uint64_t|uint8_t|union|unsigned|using|virtual|void|volatile|wchar_t|while)\b/,n=/\b(?!)\w+(?:\s*\.\s*\w+)*\b/.source.replace(//g,(function(){return t.source}));e.languages.cpp=e.languages.extend("c",{"class-name":[{pattern:RegExp(/(\b(?:class|concept|enum|struct|typename)\s+)(?!)\w+/.source.replace(//g,(function(){return t.source}))),lookbehind:!0},/\b[A-Z]\w*(?=\s*::\s*\w+\s*\()/,/\b[A-Z_]\w*(?=\s*::\s*~\w+\s*\()/i,/\b\w+(?=\s*<(?:[^<>]|<(?:[^<>]|<[^<>]*>)*>)*>\s*::\s*\w+\s*\()/],keyword:t,number:{pattern:/(?:\b0b[01']+|\b0x(?:[\da-f']+(?:\.[\da-f']*)?|\.[\da-f']+)(?:p[+-]?[\d']+)?|(?:\b[\d']+(?:\.[\d']*)?|\B\.[\d']+)(?:e[+-]?[\d']+)?)[ful]{0,4}/i,greedy:!0},operator:/>>=?|<<=?|->|--|\+\+|&&|\|\||[?:~]|<=>|[-+*/%&|^!=<>]=?|\b(?:and|and_eq|bitand|bitor|not|not_eq|or|or_eq|xor|xor_eq)\b/,boolean:/\b(?:false|true)\b/}),e.languages.insertBefore("cpp","string",{module:{pattern:RegExp(/(\b(?:import|module)\s+)/.source+"(?:"+/"(?:\\(?:\r\n|[\s\S])|[^"\\\r\n])*"|<[^<>\r\n]*>/.source+"|"+/(?:\s*:\s*)?|:\s*/.source.replace(//g,(function(){return n}))+")"),lookbehind:!0,greedy:!0,inside:{string:/^[<"][\s\S]+/,operator:/:/,punctuation:/\./}},"raw-string":{pattern:/R"([^()\\ ]{0,16})\([\s\S]*?\)\1"/,alias:"string",greedy:!0}}),e.languages.insertBefore("cpp","keyword",{"generic-function":{pattern:/\b(?!operator\b)[a-z_]\w*\s*<(?:[^<>]|<[^<>]*>)*>(?=\s*\()/i,inside:{function:/^\w+/,generic:{pattern:/<[\s\S]+/,alias:"class-name",inside:e.languages.cpp}}}}),e.languages.insertBefore("cpp","operator",{"double-colon":{pattern:/::/,alias:"punctuation"}}),e.languages.insertBefore("cpp","class-name",{"base-clause":{pattern:/(\b(?:class|struct)\s+\w+\s*:\s*)[^;{}"'\s]+(?:\s+[^;{}"'\s]+)*(?=\s*[;{])/,lookbehind:!0,greedy:!0,inside:e.languages.extend("cpp",{})}}),e.languages.insertBefore("inside","double-colon",{"class-name":/\b[a-z_]\w*\b(?!\s*::)/i},e.languages.cpp["base-clause"])}(o),function(e){var t=/(?:"(?:\\(?:\r\n|[\s\S])|[^"\\\r\n])*"|'(?:\\(?:\r\n|[\s\S])|[^'\\\r\n])*')/;e.languages.css={comment:/\/\*[\s\S]*?\*\//,atrule:{pattern:/@[\w-](?:[^;{\s]|\s+(?![\s{]))*(?:;|(?=\s*\{))/,inside:{rule:/^@[\w-]+/,"selector-function-argument":{pattern:/(\bselector\s*\(\s*(?![\s)]))(?:[^()\s]|\s+(?![\s)])|\((?:[^()]|\([^()]*\))*\))+(?=\s*\))/,lookbehind:!0,alias:"selector"},keyword:{pattern:/(^|[^\w-])(?:and|not|only|or)(?![\w-])/,lookbehind:!0}}},url:{pattern:RegExp("\\burl\\((?:"+t.source+"|"+/(?:[^\\\r\n()"']|\\[\s\S])*/.source+")\\)","i"),greedy:!0,inside:{function:/^url/i,punctuation:/^\(|\)$/,string:{pattern:RegExp("^"+t.source+"$"),alias:"url"}}},selector:{pattern:RegExp("(^|[{}\\s])[^{}\\s](?:[^{};\"'\\s]|\\s+(?![\\s{])|"+t.source+")*(?=\\s*\\{)"),lookbehind:!0},string:{pattern:t,greedy:!0},property:{pattern:/(^|[^-\w\xA0-\uFFFF])(?!\s)[-_a-z\xA0-\uFFFF](?:(?!\s)[-\w\xA0-\uFFFF])*(?=\s*:)/i,lookbehind:!0},important:/!important\b/i,function:{pattern:/(^|[^-a-z0-9])[-a-z0-9]+(?=\()/i,lookbehind:!0},punctuation:/[(){};:,]/},e.languages.css.atrule.inside.rest=e.languages.css;var n=e.languages.markup;n&&(n.tag.addInlined("style","css"),n.tag.addAttribute("style","css"))}(o),function(e){var t,n=/("|')(?:\\(?:\r\n|[\s\S])|(?!\1)[^\\\r\n])*\1/;e.languages.css.selector={pattern:e.languages.css.selector.pattern,lookbehind:!0,inside:t={"pseudo-element":/:(?:after|before|first-letter|first-line|selection)|::[-\w]+/,"pseudo-class":/:[-\w]+/,class:/\.[-\w]+/,id:/#[-\w]+/,attribute:{pattern:RegExp("\\[(?:[^[\\]\"']|"+n.source+")*\\]"),greedy:!0,inside:{punctuation:/^\[|\]$/,"case-sensitivity":{pattern:/(\s)[si]$/i,lookbehind:!0,alias:"keyword"},namespace:{pattern:/^(\s*)(?:(?!\s)[-*\w\xA0-\uFFFF])*\|(?!=)/,lookbehind:!0,inside:{punctuation:/\|$/}},"attr-name":{pattern:/^(\s*)(?:(?!\s)[-\w\xA0-\uFFFF])+/,lookbehind:!0},"attr-value":[n,{pattern:/(=\s*)(?:(?!\s)[-\w\xA0-\uFFFF])+(?=\s*$)/,lookbehind:!0}],operator:/[|~*^$]?=/}},"n-th":[{pattern:/(\(\s*)[+-]?\d*[\dn](?:\s*[+-]\s*\d+)?(?=\s*\))/,lookbehind:!0,inside:{number:/[\dn]+/,operator:/[+-]/}},{pattern:/(\(\s*)(?:even|odd)(?=\s*\))/i,lookbehind:!0}],combinator:/>|\+|~|\|\|/,punctuation:/[(),]/}},e.languages.css.atrule.inside["selector-function-argument"].inside=t,e.languages.insertBefore("css","property",{variable:{pattern:/(^|[^-\w\xA0-\uFFFF])--(?!\s)[-_a-z\xA0-\uFFFF](?:(?!\s)[-\w\xA0-\uFFFF])*/i,lookbehind:!0}});var r={pattern:/(\b\d+)(?:%|[a-z]+(?![\w-]))/,lookbehind:!0},o={pattern:/(^|[^\w.-])-?(?:\d+(?:\.\d+)?|\.\d+)/,lookbehind:!0};e.languages.insertBefore("css","function",{operator:{pattern:/(\s)[+\-*\/](?=\s)/,lookbehind:!0},hexcode:{pattern:/\B#[\da-f]{3,8}\b/i,alias:"color"},color:[{pattern:/(^|[^\w-])(?:AliceBlue|AntiqueWhite|Aqua|Aquamarine|Azure|Beige|Bisque|Black|BlanchedAlmond|Blue|BlueViolet|Brown|BurlyWood|CadetBlue|Chartreuse|Chocolate|Coral|CornflowerBlue|Cornsilk|Crimson|Cyan|DarkBlue|DarkCyan|DarkGoldenRod|DarkGr[ae]y|DarkGreen|DarkKhaki|DarkMagenta|DarkOliveGreen|DarkOrange|DarkOrchid|DarkRed|DarkSalmon|DarkSeaGreen|DarkSlateBlue|DarkSlateGr[ae]y|DarkTurquoise|DarkViolet|DeepPink|DeepSkyBlue|DimGr[ae]y|DodgerBlue|FireBrick|FloralWhite|ForestGreen|Fuchsia|Gainsboro|GhostWhite|Gold|GoldenRod|Gr[ae]y|Green|GreenYellow|HoneyDew|HotPink|IndianRed|Indigo|Ivory|Khaki|Lavender|LavenderBlush|LawnGreen|LemonChiffon|LightBlue|LightCoral|LightCyan|LightGoldenRodYellow|LightGr[ae]y|LightGreen|LightPink|LightSalmon|LightSeaGreen|LightSkyBlue|LightSlateGr[ae]y|LightSteelBlue|LightYellow|Lime|LimeGreen|Linen|Magenta|Maroon|MediumAquaMarine|MediumBlue|MediumOrchid|MediumPurple|MediumSeaGreen|MediumSlateBlue|MediumSpringGreen|MediumTurquoise|MediumVioletRed|MidnightBlue|MintCream|MistyRose|Moccasin|NavajoWhite|Navy|OldLace|Olive|OliveDrab|Orange|OrangeRed|Orchid|PaleGoldenRod|PaleGreen|PaleTurquoise|PaleVioletRed|PapayaWhip|PeachPuff|Peru|Pink|Plum|PowderBlue|Purple|Red|RosyBrown|RoyalBlue|SaddleBrown|Salmon|SandyBrown|SeaGreen|SeaShell|Sienna|Silver|SkyBlue|SlateBlue|SlateGr[ae]y|Snow|SpringGreen|SteelBlue|Tan|Teal|Thistle|Tomato|Transparent|Turquoise|Violet|Wheat|White|WhiteSmoke|Yellow|YellowGreen)(?![\w-])/i,lookbehind:!0},{pattern:/\b(?:hsl|rgb)\(\s*\d{1,3}\s*,\s*\d{1,3}%?\s*,\s*\d{1,3}%?\s*\)\B|\b(?:hsl|rgb)a\(\s*\d{1,3}\s*,\s*\d{1,3}%?\s*,\s*\d{1,3}%?\s*,\s*(?:0|0?\.\d+|1)\s*\)\B/i,inside:{unit:r,number:o,function:/[\w-]+(?=\()/,punctuation:/[(),]/}}],entity:/\\[\da-f]{1,8}/i,unit:r,number:o})}(o),o.languages.javascript=o.languages.extend("clike",{"class-name":[o.languages.clike["class-name"],{pattern:/(^|[^$\w\xA0-\uFFFF])(?!\s)[_$A-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*(?=\.(?:constructor|prototype))/,lookbehind:!0}],keyword:[{pattern:/((?:^|\})\s*)catch\b/,lookbehind:!0},{pattern:/(^|[^.]|\.\.\.\s*)\b(?:as|assert(?=\s*\{)|async(?=\s*(?:function\b|\(|[$\w\xA0-\uFFFF]|$))|await|break|case|class|const|continue|debugger|default|delete|do|else|enum|export|extends|finally(?=\s*(?:\{|$))|for|from(?=\s*(?:['"]|$))|function|(?:get|set)(?=\s*(?:[#\[$\w\xA0-\uFFFF]|$))|if|implements|import|in|instanceof|interface|let|new|null|of|package|private|protected|public|return|static|super|switch|this|throw|try|typeof|undefined|var|void|while|with|yield)\b/,lookbehind:!0}],function:/#?(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*(?=\s*(?:\.\s*(?:apply|bind|call)\s*)?\()/,number:{pattern:RegExp(/(^|[^\w$])/.source+"(?:"+/NaN|Infinity/.source+"|"+/0[bB][01]+(?:_[01]+)*n?/.source+"|"+/0[oO][0-7]+(?:_[0-7]+)*n?/.source+"|"+/0[xX][\dA-Fa-f]+(?:_[\dA-Fa-f]+)*n?/.source+"|"+/\d+(?:_\d+)*n/.source+"|"+/(?:\d+(?:_\d+)*(?:\.(?:\d+(?:_\d+)*)?)?|\.\d+(?:_\d+)*)(?:[Ee][+-]?\d+(?:_\d+)*)?/.source+")"+/(?![\w$])/.source),lookbehind:!0},operator:/--|\+\+|\*\*=?|=>|&&=?|\|\|=?|[!=]==|<<=?|>>>?=?|[-+*/%&|^!=<>]=?|\.{3}|\?\?=?|\?\.?|[~:]/}),o.languages.javascript["class-name"][0].pattern=/(\b(?:class|extends|implements|instanceof|interface|new)\s+)[\w.\\]+/,o.languages.insertBefore("javascript","keyword",{regex:{pattern:/((?:^|[^$\w\xA0-\uFFFF."'\])\s]|\b(?:return|yield))\s*)\/(?:\[(?:[^\]\\\r\n]|\\.)*\]|\\.|[^/\\\[\r\n])+\/[dgimyus]{0,7}(?=(?:\s|\/\*(?:[^*]|\*(?!\/))*\*\/)*(?:$|[\r\n,.;:})\]]|\/\/))/,lookbehind:!0,greedy:!0,inside:{"regex-source":{pattern:/^(\/)[\s\S]+(?=\/[a-z]*$)/,lookbehind:!0,alias:"language-regex",inside:o.languages.regex},"regex-delimiter":/^\/|\/$/,"regex-flags":/^[a-z]+$/}},"function-variable":{pattern:/#?(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*(?=\s*[=:]\s*(?:async\s*)?(?:\bfunction\b|(?:\((?:[^()]|\([^()]*\))*\)|(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*)\s*=>))/,alias:"function"},parameter:[{pattern:/(function(?:\s+(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*)?\s*\(\s*)(?!\s)(?:[^()\s]|\s+(?![\s)])|\([^()]*\))+(?=\s*\))/,lookbehind:!0,inside:o.languages.javascript},{pattern:/(^|[^$\w\xA0-\uFFFF])(?!\s)[_$a-z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*(?=\s*=>)/i,lookbehind:!0,inside:o.languages.javascript},{pattern:/(\(\s*)(?!\s)(?:[^()\s]|\s+(?![\s)])|\([^()]*\))+(?=\s*\)\s*=>)/,lookbehind:!0,inside:o.languages.javascript},{pattern:/((?:\b|\s|^)(?!(?:as|async|await|break|case|catch|class|const|continue|debugger|default|delete|do|else|enum|export|extends|finally|for|from|function|get|if|implements|import|in|instanceof|interface|let|new|null|of|package|private|protected|public|return|set|static|super|switch|this|throw|try|typeof|undefined|var|void|while|with|yield)(?![$\w\xA0-\uFFFF]))(?:(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*\s*)\(\s*|\]\s*\(\s*)(?!\s)(?:[^()\s]|\s+(?![\s)])|\([^()]*\))+(?=\s*\)\s*\{)/,lookbehind:!0,inside:o.languages.javascript}],constant:/\b[A-Z](?:[A-Z_]|\dx?)*\b/}),o.languages.insertBefore("javascript","string",{hashbang:{pattern:/^#!.*/,greedy:!0,alias:"comment"},"template-string":{pattern:/`(?:\\[\s\S]|\$\{(?:[^{}]|\{(?:[^{}]|\{[^}]*\})*\})+\}|(?!\$\{)[^\\`])*`/,greedy:!0,inside:{"template-punctuation":{pattern:/^`|`$/,alias:"string"},interpolation:{pattern:/((?:^|[^\\])(?:\\{2})*)\$\{(?:[^{}]|\{(?:[^{}]|\{[^}]*\})*\})+\}/,lookbehind:!0,inside:{"interpolation-punctuation":{pattern:/^\$\{|\}$/,alias:"punctuation"},rest:o.languages.javascript}},string:/[\s\S]+/}},"string-property":{pattern:/((?:^|[,{])[ \t]*)(["'])(?:\\(?:\r\n|[\s\S])|(?!\2)[^\\\r\n])*\2(?=\s*:)/m,lookbehind:!0,greedy:!0,alias:"property"}}),o.languages.insertBefore("javascript","operator",{"literal-property":{pattern:/((?:^|[,{])[ \t]*)(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*(?=\s*:)/m,lookbehind:!0,alias:"property"}}),o.languages.markup&&(o.languages.markup.tag.addInlined("script","javascript"),o.languages.markup.tag.addAttribute(/on(?:abort|blur|change|click|composition(?:end|start|update)|dblclick|error|focus(?:in|out)?|key(?:down|up)|load|mouse(?:down|enter|leave|move|out|over|up)|reset|resize|scroll|select|slotchange|submit|unload|wheel)/.source,"javascript")),o.languages.js=o.languages.javascript,function(e){var t=/#(?!\{).+/,n={pattern:/#\{[^}]+\}/,alias:"variable"};e.languages.coffeescript=e.languages.extend("javascript",{comment:t,string:[{pattern:/'(?:\\[\s\S]|[^\\'])*'/,greedy:!0},{pattern:/"(?:\\[\s\S]|[^\\"])*"/,greedy:!0,inside:{interpolation:n}}],keyword:/\b(?:and|break|by|catch|class|continue|debugger|delete|do|each|else|extend|extends|false|finally|for|if|in|instanceof|is|isnt|let|loop|namespace|new|no|not|null|of|off|on|or|own|return|super|switch|then|this|throw|true|try|typeof|undefined|unless|until|when|while|window|with|yes|yield)\b/,"class-member":{pattern:/@(?!\d)\w+/,alias:"variable"}}),e.languages.insertBefore("coffeescript","comment",{"multiline-comment":{pattern:/###[\s\S]+?###/,alias:"comment"},"block-regex":{pattern:/\/{3}[\s\S]*?\/{3}/,alias:"regex",inside:{comment:t,interpolation:n}}}),e.languages.insertBefore("coffeescript","string",{"inline-javascript":{pattern:/`(?:\\[\s\S]|[^\\`])*`/,inside:{delimiter:{pattern:/^`|`$/,alias:"punctuation"},script:{pattern:/[\s\S]+/,alias:"language-javascript",inside:e.languages.javascript}}},"multiline-string":[{pattern:/'''[\s\S]*?'''/,greedy:!0,alias:"string"},{pattern:/"""[\s\S]*?"""/,greedy:!0,alias:"string",inside:{interpolation:n}}]}),e.languages.insertBefore("coffeescript","keyword",{property:/(?!\d)\w+(?=\s*:(?!:))/}),delete e.languages.coffeescript["template-string"],e.languages.coffee=e.languages.coffeescript}(o),function(e){var t=/[*&][^\s[\]{},]+/,n=/!(?:<[\w\-%#;/?:@&=+$,.!~*'()[\]]+>|(?:[a-zA-Z\d-]*!)?[\w\-%#;/?:@&=+$.~*'()]+)?/,r="(?:"+n.source+"(?:[ \t]+"+t.source+")?|"+t.source+"(?:[ \t]+"+n.source+")?)",o=/(?:[^\s\x00-\x08\x0e-\x1f!"#%&'*,\-:>?@[\]`{|}\x7f-\x84\x86-\x9f\ud800-\udfff\ufffe\uffff]|[?:-])(?:[ \t]*(?:(?![#:])|:))*/.source.replace(//g,(function(){return/[^\s\x00-\x08\x0e-\x1f,[\]{}\x7f-\x84\x86-\x9f\ud800-\udfff\ufffe\uffff]/.source})),a=/"(?:[^"\\\r\n]|\\.)*"|'(?:[^'\\\r\n]|\\.)*'/.source;function i(e,t){t=(t||"").replace(/m/g,"")+"m";var n=/([:\-,[{]\s*(?:\s<>[ \t]+)?)(?:<>)(?=[ \t]*(?:$|,|\]|\}|(?:[\r\n]\s*)?#))/.source.replace(/<>/g,(function(){return r})).replace(/<>/g,(function(){return e}));return RegExp(n,t)}e.languages.yaml={scalar:{pattern:RegExp(/([\-:]\s*(?:\s<>[ \t]+)?[|>])[ \t]*(?:((?:\r?\n|\r)[ \t]+)\S[^\r\n]*(?:\2[^\r\n]+)*)/.source.replace(/<>/g,(function(){return r}))),lookbehind:!0,alias:"string"},comment:/#.*/,key:{pattern:RegExp(/((?:^|[:\-,[{\r\n?])[ \t]*(?:<>[ \t]+)?)<>(?=\s*:\s)/.source.replace(/<>/g,(function(){return r})).replace(/<>/g,(function(){return"(?:"+o+"|"+a+")"}))),lookbehind:!0,greedy:!0,alias:"atrule"},directive:{pattern:/(^[ \t]*)%.+/m,lookbehind:!0,alias:"important"},datetime:{pattern:i(/\d{4}-\d\d?-\d\d?(?:[tT]|[ \t]+)\d\d?:\d{2}:\d{2}(?:\.\d*)?(?:[ \t]*(?:Z|[-+]\d\d?(?::\d{2})?))?|\d{4}-\d{2}-\d{2}|\d\d?:\d{2}(?::\d{2}(?:\.\d*)?)?/.source),lookbehind:!0,alias:"number"},boolean:{pattern:i(/false|true/.source,"i"),lookbehind:!0,alias:"important"},null:{pattern:i(/null|~/.source,"i"),lookbehind:!0,alias:"important"},string:{pattern:i(a),lookbehind:!0,greedy:!0},number:{pattern:i(/[+-]?(?:0x[\da-f]+|0o[0-7]+|(?:\d+(?:\.\d*)?|\.\d+)(?:e[+-]?\d+)?|\.inf|\.nan)/.source,"i"),lookbehind:!0},tag:n,important:t,punctuation:/---|[:[\]{}\-,|>?]|\.\.\./},e.languages.yml=e.languages.yaml}(o),function(e){var t=/(?:\\.|[^\\\n\r]|(?:\n|\r\n?)(?![\r\n]))/.source;function n(e){return e=e.replace(//g,(function(){return t})),RegExp(/((?:^|[^\\])(?:\\{2})*)/.source+"(?:"+e+")")}var r=/(?:\\.|``(?:[^`\r\n]|`(?!`))+``|`[^`\r\n]+`|[^\\|\r\n`])+/.source,o=/\|?__(?:\|__)+\|?(?:(?:\n|\r\n?)|(?![\s\S]))/.source.replace(/__/g,(function(){return r})),a=/\|?[ \t]*:?-{3,}:?[ \t]*(?:\|[ \t]*:?-{3,}:?[ \t]*)+\|?(?:\n|\r\n?)/.source;e.languages.markdown=e.languages.extend("markup",{}),e.languages.insertBefore("markdown","prolog",{"front-matter-block":{pattern:/(^(?:\s*[\r\n])?)---(?!.)[\s\S]*?[\r\n]---(?!.)/,lookbehind:!0,greedy:!0,inside:{punctuation:/^---|---$/,"front-matter":{pattern:/\S+(?:\s+\S+)*/,alias:["yaml","language-yaml"],inside:e.languages.yaml}}},blockquote:{pattern:/^>(?:[\t ]*>)*/m,alias:"punctuation"},table:{pattern:RegExp("^"+o+a+"(?:"+o+")*","m"),inside:{"table-data-rows":{pattern:RegExp("^("+o+a+")(?:"+o+")*$"),lookbehind:!0,inside:{"table-data":{pattern:RegExp(r),inside:e.languages.markdown},punctuation:/\|/}},"table-line":{pattern:RegExp("^("+o+")"+a+"$"),lookbehind:!0,inside:{punctuation:/\||:?-{3,}:?/}},"table-header-row":{pattern:RegExp("^"+o+"$"),inside:{"table-header":{pattern:RegExp(r),alias:"important",inside:e.languages.markdown},punctuation:/\|/}}}},code:[{pattern:/((?:^|\n)[ \t]*\n|(?:^|\r\n?)[ \t]*\r\n?)(?: {4}|\t).+(?:(?:\n|\r\n?)(?: {4}|\t).+)*/,lookbehind:!0,alias:"keyword"},{pattern:/^```[\s\S]*?^```$/m,greedy:!0,inside:{"code-block":{pattern:/^(```.*(?:\n|\r\n?))[\s\S]+?(?=(?:\n|\r\n?)^```$)/m,lookbehind:!0},"code-language":{pattern:/^(```).+/,lookbehind:!0},punctuation:/```/}}],title:[{pattern:/\S.*(?:\n|\r\n?)(?:==+|--+)(?=[ \t]*$)/m,alias:"important",inside:{punctuation:/==+$|--+$/}},{pattern:/(^\s*)#.+/m,lookbehind:!0,alias:"important",inside:{punctuation:/^#+|#+$/}}],hr:{pattern:/(^\s*)([*-])(?:[\t ]*\2){2,}(?=\s*$)/m,lookbehind:!0,alias:"punctuation"},list:{pattern:/(^\s*)(?:[*+-]|\d+\.)(?=[\t ].)/m,lookbehind:!0,alias:"punctuation"},"url-reference":{pattern:/!?\[[^\]]+\]:[\t ]+(?:\S+|<(?:\\.|[^>\\])+>)(?:[\t ]+(?:"(?:\\.|[^"\\])*"|'(?:\\.|[^'\\])*'|\((?:\\.|[^)\\])*\)))?/,inside:{variable:{pattern:/^(!?\[)[^\]]+/,lookbehind:!0},string:/(?:"(?:\\.|[^"\\])*"|'(?:\\.|[^'\\])*'|\((?:\\.|[^)\\])*\))$/,punctuation:/^[\[\]!:]|[<>]/},alias:"url"},bold:{pattern:n(/\b__(?:(?!_)|_(?:(?!_))+_)+__\b|\*\*(?:(?!\*)|\*(?:(?!\*))+\*)+\*\*/.source),lookbehind:!0,greedy:!0,inside:{content:{pattern:/(^..)[\s\S]+(?=..$)/,lookbehind:!0,inside:{}},punctuation:/\*\*|__/}},italic:{pattern:n(/\b_(?:(?!_)|__(?:(?!_))+__)+_\b|\*(?:(?!\*)|\*\*(?:(?!\*))+\*\*)+\*/.source),lookbehind:!0,greedy:!0,inside:{content:{pattern:/(^.)[\s\S]+(?=.$)/,lookbehind:!0,inside:{}},punctuation:/[*_]/}},strike:{pattern:n(/(~~?)(?:(?!~))+\2/.source),lookbehind:!0,greedy:!0,inside:{content:{pattern:/(^~~?)[\s\S]+(?=\1$)/,lookbehind:!0,inside:{}},punctuation:/~~?/}},"code-snippet":{pattern:/(^|[^\\`])(?:``[^`\r\n]+(?:`[^`\r\n]+)*``(?!`)|`[^`\r\n]+`(?!`))/,lookbehind:!0,greedy:!0,alias:["code","keyword"]},url:{pattern:n(/!?\[(?:(?!\]))+\](?:\([^\s)]+(?:[\t ]+"(?:\\.|[^"\\])*")?\)|[ \t]?\[(?:(?!\]))+\])/.source),lookbehind:!0,greedy:!0,inside:{operator:/^!/,content:{pattern:/(^\[)[^\]]+(?=\])/,lookbehind:!0,inside:{}},variable:{pattern:/(^\][ \t]?\[)[^\]]+(?=\]$)/,lookbehind:!0},url:{pattern:/(^\]\()[^\s)]+/,lookbehind:!0},string:{pattern:/(^[ \t]+)"(?:\\.|[^"\\])*"(?=\)$)/,lookbehind:!0}}}}),["url","bold","italic","strike"].forEach((function(t){["url","bold","italic","strike","code-snippet"].forEach((function(n){t!==n&&(e.languages.markdown[t].inside.content.inside[n]=e.languages.markdown[n])}))})),e.hooks.add("after-tokenize",(function(e){"markdown"!==e.language&&"md"!==e.language||function e(t){if(t&&"string"!=typeof t)for(var n=0,r=t.length;n",quot:'"'},s=String.fromCodePoint||String.fromCharCode;e.languages.md=e.languages.markdown}(o),o.languages.graphql={comment:/#.*/,description:{pattern:/(?:"""(?:[^"]|(?!""")")*"""|"(?:\\.|[^\\"\r\n])*")(?=\s*[a-z_])/i,greedy:!0,alias:"string",inside:{"language-markdown":{pattern:/(^"(?:"")?)(?!\1)[\s\S]+(?=\1$)/,lookbehind:!0,inside:o.languages.markdown}}},string:{pattern:/"""(?:[^"]|(?!""")")*"""|"(?:\\.|[^\\"\r\n])*"/,greedy:!0},number:/(?:\B-|\b)\d+(?:\.\d+)?(?:e[+-]?\d+)?\b/i,boolean:/\b(?:false|true)\b/,variable:/\$[a-z_]\w*/i,directive:{pattern:/@[a-z_]\w*/i,alias:"function"},"attr-name":{pattern:/\b[a-z_]\w*(?=\s*(?:\((?:[^()"]|"(?:\\.|[^\\"\r\n])*")*\))?:)/i,greedy:!0},"atom-input":{pattern:/\b[A-Z]\w*Input\b/,alias:"class-name"},scalar:/\b(?:Boolean|Float|ID|Int|String)\b/,constant:/\b[A-Z][A-Z_\d]*\b/,"class-name":{pattern:/(\b(?:enum|implements|interface|on|scalar|type|union)\s+|&\s*|:\s*|\[)[A-Z_]\w*/,lookbehind:!0},fragment:{pattern:/(\bfragment\s+|\.{3}\s*(?!on\b))[a-zA-Z_]\w*/,lookbehind:!0,alias:"function"},"definition-mutation":{pattern:/(\bmutation\s+)[a-zA-Z_]\w*/,lookbehind:!0,alias:"function"},"definition-query":{pattern:/(\bquery\s+)[a-zA-Z_]\w*/,lookbehind:!0,alias:"function"},keyword:/\b(?:directive|enum|extend|fragment|implements|input|interface|mutation|on|query|repeatable|scalar|schema|subscription|type|union)\b/,operator:/[!=|&]|\.{3}/,"property-query":/\w+(?=\s*\()/,object:/\w+(?=\s*\{)/,punctuation:/[!(){}\[\]:=,]/,property:/\w+/},o.hooks.add("after-tokenize",(function(e){if("graphql"===e.language)for(var t=e.tokens.filter((function(e){return"string"!=typeof e&&"comment"!==e.type&&"scalar"!==e.type})),n=0;n0)){var l=f(/^\{$/,/^\}$/);if(-1===l)continue;for(var s=n;s=0&&p(u,"variable-input")}}}}function c(e){return t[n+e]}function d(e,t){t=t||0;for(var n=0;n?|<|>)?|>[>=]?|\b(?:AND|BETWEEN|DIV|ILIKE|IN|IS|LIKE|NOT|OR|REGEXP|RLIKE|SOUNDS LIKE|XOR)\b/i,punctuation:/[;[\]()`,.]/},function(e){var t=e.languages.javascript["template-string"],n=t.pattern.source,r=t.inside.interpolation,o=r.inside["interpolation-punctuation"],a=r.pattern.source;function i(t,r){if(e.languages[t])return{pattern:RegExp("((?:"+r+")\\s*)"+n),lookbehind:!0,greedy:!0,inside:{"template-punctuation":{pattern:/^`|`$/,alias:"string"},"embedded-code":{pattern:/[\s\S]+/,alias:t}}}}function l(e,t){return"___"+t.toUpperCase()+"_"+e+"___"}function s(t,n,r){var o={code:t,grammar:n,language:r};return e.hooks.run("before-tokenize",o),o.tokens=e.tokenize(o.code,o.grammar),e.hooks.run("after-tokenize",o),o.tokens}function u(t){var n={};n["interpolation-punctuation"]=o;var a=e.tokenize(t,n);if(3===a.length){var i=[1,1];i.push.apply(i,s(a[1],e.languages.javascript,"javascript")),a.splice.apply(a,i)}return new e.Token("interpolation",a,r.alias,t)}function c(t,n,r){var o=e.tokenize(t,{interpolation:{pattern:RegExp(a),lookbehind:!0}}),i=0,c={},d=s(o.map((function(e){if("string"==typeof e)return e;for(var n,o=e.content;-1!==t.indexOf(n=l(i++,r)););return c[n]=o,n})).join(""),n,r),f=Object.keys(c);return i=0,function e(t){for(var n=0;n=f.length)return;var r=t[n];if("string"==typeof r||"string"==typeof r.content){var o=f[i],a="string"==typeof r?r:r.content,l=a.indexOf(o);if(-1!==l){++i;var s=a.substring(0,l),d=u(c[o]),p=a.substring(l+o.length),m=[];if(s&&m.push(s),m.push(d),p){var h=[p];e(h),m.push.apply(m,h)}"string"==typeof r?(t.splice.apply(t,[n,1].concat(m)),n+=m.length-1):r.content=m}}else{var g=r.content;Array.isArray(g)?e(g):e([g])}}}(d),new e.Token(r,d,"language-"+r,t)}e.languages.javascript["template-string"]=[i("css",/\b(?:styled(?:\([^)]*\))?(?:\s*\.\s*\w+(?:\([^)]*\))*)*|css(?:\s*\.\s*(?:global|resolve))?|createGlobalStyle|keyframes)/.source),i("html",/\bhtml|\.\s*(?:inner|outer)HTML\s*\+?=/.source),i("svg",/\bsvg/.source),i("markdown",/\b(?:markdown|md)/.source),i("graphql",/\b(?:gql|graphql(?:\s*\.\s*experimental)?)/.source),i("sql",/\bsql/.source),t].filter(Boolean);var d={javascript:!0,js:!0,typescript:!0,ts:!0,jsx:!0,tsx:!0};function f(e){return"string"==typeof e?e:Array.isArray(e)?e.map(f).join(""):f(e.content)}e.hooks.add("after-tokenize",(function(t){t.language in d&&function t(n){for(var r=0,o=n.length;r]|<(?:[^<>]|<[^<>]*>)*>)*>)?/,lookbehind:!0,greedy:!0,inside:null},builtin:/\b(?:Array|Function|Promise|any|boolean|console|never|number|string|symbol|unknown)\b/}),e.languages.typescript.keyword.push(/\b(?:abstract|declare|is|keyof|readonly|require)\b/,/\b(?:asserts|infer|interface|module|namespace|type)\b(?=\s*(?:[{_$a-zA-Z\xA0-\uFFFF]|$))/,/\btype\b(?=\s*(?:[\{*]|$))/),delete e.languages.typescript.parameter,delete e.languages.typescript["literal-property"];var t=e.languages.extend("typescript",{});delete t["class-name"],e.languages.typescript["class-name"].inside=t,e.languages.insertBefore("typescript","function",{decorator:{pattern:/@[$\w\xA0-\uFFFF]+/,inside:{at:{pattern:/^@/,alias:"operator"},function:/^[\s\S]+/}},"generic-function":{pattern:/#?(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*\s*<(?:[^<>]|<(?:[^<>]|<[^<>]*>)*>)*>(?=\s*\()/,greedy:!0,inside:{function:/^#?(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*/,generic:{pattern:/<[\s\S]+/,alias:"class-name",inside:t}}}}),e.languages.ts=e.languages.typescript}(o),function(e){function t(e,t){return RegExp(e.replace(//g,(function(){return/(?!\s)[_$a-zA-Z\xA0-\uFFFF](?:(?!\s)[$\w\xA0-\uFFFF])*/.source})),t)}e.languages.insertBefore("javascript","function-variable",{"method-variable":{pattern:RegExp("(\\.\\s*)"+e.languages.javascript["function-variable"].pattern.source),lookbehind:!0,alias:["function-variable","method","function","property-access"]}}),e.languages.insertBefore("javascript","function",{method:{pattern:RegExp("(\\.\\s*)"+e.languages.javascript.function.source),lookbehind:!0,alias:["function","property-access"]}}),e.languages.insertBefore("javascript","constant",{"known-class-name":[{pattern:/\b(?:(?:Float(?:32|64)|(?:Int|Uint)(?:8|16|32)|Uint8Clamped)?Array|ArrayBuffer|BigInt|Boolean|DataView|Date|Error|Function|Intl|JSON|(?:Weak)?(?:Map|Set)|Math|Number|Object|Promise|Proxy|Reflect|RegExp|String|Symbol|WebAssembly)\b/,alias:"class-name"},{pattern:/\b(?:[A-Z]\w*)Error\b/,alias:"class-name"}]}),e.languages.insertBefore("javascript","keyword",{imports:{pattern:t(/(\bimport\b\s*)(?:(?:\s*,\s*(?:\*\s*as\s+|\{[^{}]*\}))?|\*\s*as\s+|\{[^{}]*\})(?=\s*\bfrom\b)/.source),lookbehind:!0,inside:e.languages.javascript},exports:{pattern:t(/(\bexport\b\s*)(?:\*(?:\s*as\s+)?(?=\s*\bfrom\b)|\{[^{}]*\})/.source),lookbehind:!0,inside:e.languages.javascript}}),e.languages.javascript.keyword.unshift({pattern:/\b(?:as|default|export|from|import)\b/,alias:"module"},{pattern:/\b(?:await|break|catch|continue|do|else|finally|for|if|return|switch|throw|try|while|yield)\b/,alias:"control-flow"},{pattern:/\bnull\b/,alias:["null","nil"]},{pattern:/\bundefined\b/,alias:"nil"}),e.languages.insertBefore("javascript","operator",{spread:{pattern:/\.{3}/,alias:"operator"},arrow:{pattern:/=>/,alias:"operator"}}),e.languages.insertBefore("javascript","punctuation",{"property-access":{pattern:t(/(\.\s*)#?/.source),lookbehind:!0},"maybe-class-name":{pattern:/(^|[^$\w\xA0-\uFFFF])[A-Z][$\w\xA0-\uFFFF]+/,lookbehind:!0},dom:{pattern:/\b(?:document|(?:local|session)Storage|location|navigator|performance|window)\b/,alias:"variable"},console:{pattern:/\bconsole(?=\s*\.)/,alias:"class-name"}});for(var n=["function","function-variable","method","method-variable","property-access"],r=0;r*\.{3}(?:[^{}]|)*\})/.source;function a(e,t){return e=e.replace(//g,(function(){return n})).replace(//g,(function(){return r})).replace(//g,(function(){return o})),RegExp(e,t)}o=a(o).source,e.languages.jsx=e.languages.extend("markup",t),e.languages.jsx.tag.pattern=a(/<\/?(?:[\w.:-]+(?:+(?:[\w.:$-]+(?:=(?:"(?:\\[\s\S]|[^\\"])*"|'(?:\\[\s\S]|[^\\'])*'|[^\s{'"/>=]+|))?|))**\/?)?>/.source),e.languages.jsx.tag.inside.tag.pattern=/^<\/?[^\s>\/]*/,e.languages.jsx.tag.inside["attr-value"].pattern=/=(?!\{)(?:"(?:\\[\s\S]|[^\\"])*"|'(?:\\[\s\S]|[^\\'])*'|[^\s'">]+)/,e.languages.jsx.tag.inside.tag.inside["class-name"]=/^[A-Z]\w*(?:\.[A-Z]\w*)*$/,e.languages.jsx.tag.inside.comment=t.comment,e.languages.insertBefore("inside","attr-name",{spread:{pattern:a(//.source),inside:e.languages.jsx}},e.languages.jsx.tag),e.languages.insertBefore("inside","special-attr",{script:{pattern:a(/=/.source),alias:"language-javascript",inside:{"script-punctuation":{pattern:/^=(?=\{)/,alias:"punctuation"},rest:e.languages.jsx}}},e.languages.jsx.tag);var i=function(e){return e?"string"==typeof e?e:"string"==typeof e.content?e.content:e.content.map(i).join(""):""},l=function(t){for(var n=[],r=0;r0&&n[n.length-1].tagName===i(o.content[0].content[1])&&n.pop():"/>"===o.content[o.content.length-1].content||n.push({tagName:i(o.content[0].content[1]),openedBraces:0}):n.length>0&&"punctuation"===o.type&&"{"===o.content?n[n.length-1].openedBraces++:n.length>0&&n[n.length-1].openedBraces>0&&"punctuation"===o.type&&"}"===o.content?n[n.length-1].openedBraces--:a=!0),(a||"string"==typeof o)&&n.length>0&&0===n[n.length-1].openedBraces){var s=i(o);r0&&("string"==typeof t[r-1]||"plain-text"===t[r-1].type)&&(s=i(t[r-1])+s,t.splice(r-1,1),r--),t[r]=new e.Token("plain-text",s,null,s)}o.content&&"string"!=typeof o.content&&l(o.content)}};e.hooks.add("after-tokenize",(function(e){"jsx"!==e.language&&"tsx"!==e.language||l(e.tokens)}))}(o),function(e){e.languages.diff={coord:[/^(?:\*{3}|-{3}|\+{3}).*$/m,/^@@.*@@$/m,/^\d.*$/m]};var t={"deleted-sign":"-","deleted-arrow":"<","inserted-sign":"+","inserted-arrow":">",unchanged:" ",diff:"!"};Object.keys(t).forEach((function(n){var r=t[n],o=[];/^\w+$/.test(n)||o.push(/\w+/.exec(n)[0]),"diff"===n&&o.push("bold"),e.languages.diff[n]={pattern:RegExp("^(?:["+r+"].*(?:\r\n?|\n|(?![\\s\\S])))+","m"),alias:o,inside:{line:{pattern:/(.)(?=[\s\S]).*(?:\r\n?|\n)?/,lookbehind:!0},prefix:{pattern:/[\s\S]/,alias:/\w+/.exec(n)[0]}}}})),Object.defineProperty(e.languages.diff,"PREFIXES",{value:t})}(o),o.languages.git={comment:/^#.*/m,deleted:/^[-\u2013].*/m,inserted:/^\+.*/m,string:/("|')(?:\\.|(?!\1)[^\\\r\n])*\1/,command:{pattern:/^.*\$ git .*$/m,inside:{parameter:/\s--?\w+/}},coord:/^@@.*@@$/m,"commit-sha1":/^commit \w{40}$/m},o.languages.go=o.languages.extend("clike",{string:{pattern:/(^|[^\\])"(?:\\.|[^"\\\r\n])*"|`[^`]*`/,lookbehind:!0,greedy:!0},keyword:/\b(?:break|case|chan|const|continue|default|defer|else|fallthrough|for|func|go(?:to)?|if|import|interface|map|package|range|return|select|struct|switch|type|var)\b/,boolean:/\b(?:_|false|iota|nil|true)\b/,number:[/\b0(?:b[01_]+|o[0-7_]+)i?\b/i,/\b0x(?:[a-f\d_]+(?:\.[a-f\d_]*)?|\.[a-f\d_]+)(?:p[+-]?\d+(?:_\d+)*)?i?(?!\w)/i,/(?:\b\d[\d_]*(?:\.[\d_]*)?|\B\.\d[\d_]*)(?:e[+-]?[\d_]+)?i?(?!\w)/i],operator:/[*\/%^!=]=?|\+[=+]?|-[=-]?|\|[=|]?|&(?:=|&|\^=?)?|>(?:>=?|=)?|<(?:<=?|=|-)?|:=|\.\.\./,builtin:/\b(?:append|bool|byte|cap|close|complex|complex(?:64|128)|copy|delete|error|float(?:32|64)|u?int(?:8|16|32|64)?|imag|len|make|new|panic|print(?:ln)?|real|recover|rune|string|uintptr)\b/}),o.languages.insertBefore("go","string",{char:{pattern:/'(?:\\.|[^'\\\r\n]){0,10}'/,greedy:!0}}),delete o.languages.go["class-name"],function(e){function t(e,t){return"___"+e.toUpperCase()+t+"___"}Object.defineProperties(e.languages["markup-templating"]={},{buildPlaceholders:{value:function(n,r,o,a){if(n.language===r){var i=n.tokenStack=[];n.code=n.code.replace(o,(function(e){if("function"==typeof a&&!a(e))return e;for(var o,l=i.length;-1!==n.code.indexOf(o=t(r,l));)++l;return i[l]=e,o})),n.grammar=e.languages.markup}}},tokenizePlaceholders:{value:function(n,r){if(n.language===r&&n.tokenStack){n.grammar=e.languages[r];var o=0,a=Object.keys(n.tokenStack);!function i(l){for(var s=0;s=a.length);s++){var u=l[s];if("string"==typeof u||u.content&&"string"==typeof u.content){var c=a[o],d=n.tokenStack[c],f="string"==typeof u?u:u.content,p=t(r,c),m=f.indexOf(p);if(m>-1){++o;var h=f.substring(0,m),g=new e.Token(r,e.tokenize(d,n.grammar),"language-"+r,d),v=f.substring(m+p.length),b=[];h&&b.push.apply(b,i([h])),b.push(g),v&&b.push.apply(b,i([v])),"string"==typeof u?l.splice.apply(l,[s,1].concat(b)):u.content=b}}else u.content&&i(u.content)}return l}(n.tokens)}}}})}(o),function(e){e.languages.handlebars={comment:/\{\{![\s\S]*?\}\}/,delimiter:{pattern:/^\{\{\{?|\}\}\}?$/,alias:"punctuation"},string:/(["'])(?:\\.|(?!\1)[^\\\r\n])*\1/,number:/\b0x[\dA-Fa-f]+\b|(?:\b\d+(?:\.\d*)?|\B\.\d+)(?:[Ee][+-]?\d+)?/,boolean:/\b(?:false|true)\b/,block:{pattern:/^(\s*(?:~\s*)?)[#\/]\S+?(?=\s*(?:~\s*)?$|\s)/,lookbehind:!0,alias:"keyword"},brackets:{pattern:/\[[^\]]+\]/,inside:{punctuation:/\[|\]/,variable:/[\s\S]+/}},punctuation:/[!"#%&':()*+,.\/;<=>@\[\\\]^`{|}~]/,variable:/[^!"#%&'()*+,\/;<=>@\[\\\]^`{|}~\s]+/},e.hooks.add("before-tokenize",(function(t){e.languages["markup-templating"].buildPlaceholders(t,"handlebars",/\{\{\{[\s\S]+?\}\}\}|\{\{[\s\S]+?\}\}/g)})),e.hooks.add("after-tokenize",(function(t){e.languages["markup-templating"].tokenizePlaceholders(t,"handlebars")})),e.languages.hbs=e.languages.handlebars}(o),o.languages.json={property:{pattern:/(^|[^\\])"(?:\\.|[^\\"\r\n])*"(?=\s*:)/,lookbehind:!0,greedy:!0},string:{pattern:/(^|[^\\])"(?:\\.|[^\\"\r\n])*"(?!\s*:)/,lookbehind:!0,greedy:!0},comment:{pattern:/\/\/.*|\/\*[\s\S]*?(?:\*\/|$)/,greedy:!0},number:/-?\b\d+(?:\.\d+)?(?:e[+-]?\d+)?\b/i,punctuation:/[{}[\],]/,operator:/:/,boolean:/\b(?:false|true)\b/,null:{pattern:/\bnull\b/,alias:"keyword"}},o.languages.webmanifest=o.languages.json,o.languages.less=o.languages.extend("css",{comment:[/\/\*[\s\S]*?\*\//,{pattern:/(^|[^\\])\/\/.*/,lookbehind:!0}],atrule:{pattern:/@[\w-](?:\((?:[^(){}]|\([^(){}]*\))*\)|[^(){};\s]|\s+(?!\s))*?(?=\s*\{)/,inside:{punctuation:/[:()]/}},selector:{pattern:/(?:@\{[\w-]+\}|[^{};\s@])(?:@\{[\w-]+\}|\((?:[^(){}]|\([^(){}]*\))*\)|[^(){};@\s]|\s+(?!\s))*?(?=\s*\{)/,inside:{variable:/@+[\w-]+/}},property:/(?:@\{[\w-]+\}|[\w-])+(?:\+_?)?(?=\s*:)/,operator:/[+\-*\/]/}),o.languages.insertBefore("less","property",{variable:[{pattern:/@[\w-]+\s*:/,inside:{punctuation:/:/}},/@@?[\w-]+/],"mixin-usage":{pattern:/([{;]\s*)[.#](?!\d)[\w-].*?(?=[(;])/,lookbehind:!0,alias:"function"}}),o.languages.makefile={comment:{pattern:/(^|[^\\])#(?:\\(?:\r\n|[\s\S])|[^\\\r\n])*/,lookbehind:!0},string:{pattern:/(["'])(?:\\(?:\r\n|[\s\S])|(?!\1)[^\\\r\n])*\1/,greedy:!0},"builtin-target":{pattern:/\.[A-Z][^:#=\s]+(?=\s*:(?!=))/,alias:"builtin"},target:{pattern:/^(?:[^:=\s]|[ \t]+(?![\s:]))+(?=\s*:(?!=))/m,alias:"symbol",inside:{variable:/\$+(?:(?!\$)[^(){}:#=\s]+|(?=[({]))/}},variable:/\$+(?:(?!\$)[^(){}:#=\s]+|\([@*%<^+?][DF]\)|(?=[({]))/,keyword:/-include\b|\b(?:define|else|endef|endif|export|ifn?def|ifn?eq|include|override|private|sinclude|undefine|unexport|vpath)\b/,function:{pattern:/(\()(?:abspath|addsuffix|and|basename|call|dir|error|eval|file|filter(?:-out)?|findstring|firstword|flavor|foreach|guile|if|info|join|lastword|load|notdir|or|origin|patsubst|realpath|shell|sort|strip|subst|suffix|value|warning|wildcard|word(?:list|s)?)(?=[ \t])/,lookbehind:!0},operator:/(?:::|[?:+!])?=|[|@]/,punctuation:/[:;(){}]/},o.languages.objectivec=o.languages.extend("c",{string:{pattern:/@?"(?:\\(?:\r\n|[\s\S])|[^"\\\r\n])*"/,greedy:!0},keyword:/\b(?:asm|auto|break|case|char|const|continue|default|do|double|else|enum|extern|float|for|goto|if|in|inline|int|long|register|return|self|short|signed|sizeof|static|struct|super|switch|typedef|typeof|union|unsigned|void|volatile|while)\b|(?:@interface|@end|@implementation|@protocol|@class|@public|@protected|@private|@property|@try|@catch|@finally|@throw|@synthesize|@dynamic|@selector)\b/,operator:/-[->]?|\+\+?|!=?|<>?=?|==?|&&?|\|\|?|[~^%?*\/@]/}),delete o.languages.objectivec["class-name"],o.languages.objc=o.languages.objectivec,o.languages.ocaml={comment:{pattern:/\(\*[\s\S]*?\*\)/,greedy:!0},char:{pattern:/'(?:[^\\\r\n']|\\(?:.|[ox]?[0-9a-f]{1,3}))'/i,greedy:!0},string:[{pattern:/"(?:\\(?:[\s\S]|\r\n)|[^\\\r\n"])*"/,greedy:!0},{pattern:/\{([a-z_]*)\|[\s\S]*?\|\1\}/,greedy:!0}],number:[/\b(?:0b[01][01_]*|0o[0-7][0-7_]*)\b/i,/\b0x[a-f0-9][a-f0-9_]*(?:\.[a-f0-9_]*)?(?:p[+-]?\d[\d_]*)?(?!\w)/i,/\b\d[\d_]*(?:\.[\d_]*)?(?:e[+-]?\d[\d_]*)?(?!\w)/i],directive:{pattern:/\B#\w+/,alias:"property"},label:{pattern:/\B~\w+/,alias:"property"},"type-variable":{pattern:/\B'\w+/,alias:"function"},variant:{pattern:/`\w+/,alias:"symbol"},keyword:/\b(?:as|assert|begin|class|constraint|do|done|downto|else|end|exception|external|for|fun|function|functor|if|in|include|inherit|initializer|lazy|let|match|method|module|mutable|new|nonrec|object|of|open|private|rec|sig|struct|then|to|try|type|val|value|virtual|when|where|while|with)\b/,boolean:/\b(?:false|true)\b/,"operator-like-punctuation":{pattern:/\[[<>|]|[>|]\]|\{<|>\}/,alias:"punctuation"},operator:/\.[.~]|:[=>]|[=<>@^|&+\-*\/$%!?~][!$%&*+\-.\/:<=>?@^|~]*|\b(?:and|asr|land|lor|lsl|lsr|lxor|mod|or)\b/,punctuation:/;;|::|[(){}\[\].,:;#]|\b_\b/},o.languages.python={comment:{pattern:/(^|[^\\])#.*/,lookbehind:!0,greedy:!0},"string-interpolation":{pattern:/(?:f|fr|rf)(?:("""|''')[\s\S]*?\1|("|')(?:\\.|(?!\2)[^\\\r\n])*\2)/i,greedy:!0,inside:{interpolation:{pattern:/((?:^|[^{])(?:\{\{)*)\{(?!\{)(?:[^{}]|\{(?!\{)(?:[^{}]|\{(?!\{)(?:[^{}])+\})+\})+\}/,lookbehind:!0,inside:{"format-spec":{pattern:/(:)[^:(){}]+(?=\}$)/,lookbehind:!0},"conversion-option":{pattern:/![sra](?=[:}]$)/,alias:"punctuation"},rest:null}},string:/[\s\S]+/}},"triple-quoted-string":{pattern:/(?:[rub]|br|rb)?("""|''')[\s\S]*?\1/i,greedy:!0,alias:"string"},string:{pattern:/(?:[rub]|br|rb)?("|')(?:\\.|(?!\1)[^\\\r\n])*\1/i,greedy:!0},function:{pattern:/((?:^|\s)def[ \t]+)[a-zA-Z_]\w*(?=\s*\()/g,lookbehind:!0},"class-name":{pattern:/(\bclass\s+)\w+/i,lookbehind:!0},decorator:{pattern:/(^[\t ]*)@\w+(?:\.\w+)*/m,lookbehind:!0,alias:["annotation","punctuation"],inside:{punctuation:/\./}},keyword:/\b(?:_(?=\s*:)|and|as|assert|async|await|break|case|class|continue|def|del|elif|else|except|exec|finally|for|from|global|if|import|in|is|lambda|match|nonlocal|not|or|pass|print|raise|return|try|while|with|yield)\b/,builtin:/\b(?:__import__|abs|all|any|apply|ascii|basestring|bin|bool|buffer|bytearray|bytes|callable|chr|classmethod|cmp|coerce|compile|complex|delattr|dict|dir|divmod|enumerate|eval|execfile|file|filter|float|format|frozenset|getattr|globals|hasattr|hash|help|hex|id|input|int|intern|isinstance|issubclass|iter|len|list|locals|long|map|max|memoryview|min|next|object|oct|open|ord|pow|property|range|raw_input|reduce|reload|repr|reversed|round|set|setattr|slice|sorted|staticmethod|str|sum|super|tuple|type|unichr|unicode|vars|xrange|zip)\b/,boolean:/\b(?:False|None|True)\b/,number:/\b0(?:b(?:_?[01])+|o(?:_?[0-7])+|x(?:_?[a-f0-9])+)\b|(?:\b\d+(?:_\d+)*(?:\.(?:\d+(?:_\d+)*)?)?|\B\.\d+(?:_\d+)*)(?:e[+-]?\d+(?:_\d+)*)?j?(?!\w)/i,operator:/[-+%=]=?|!=|:=|\*\*?=?|\/\/?=?|<[<=>]?|>[=>]?|[&|^~]/,punctuation:/[{}[\];(),.:]/},o.languages.python["string-interpolation"].inside.interpolation.inside.rest=o.languages.python,o.languages.py=o.languages.python,o.languages.reason=o.languages.extend("clike",{string:{pattern:/"(?:\\(?:\r\n|[\s\S])|[^\\\r\n"])*"/,greedy:!0},"class-name":/\b[A-Z]\w*/,keyword:/\b(?:and|as|assert|begin|class|constraint|do|done|downto|else|end|exception|external|for|fun|function|functor|if|in|include|inherit|initializer|lazy|let|method|module|mutable|new|nonrec|object|of|open|or|private|rec|sig|struct|switch|then|to|try|type|val|virtual|when|while|with)\b/,operator:/\.{3}|:[:=]|\|>|->|=(?:==?|>)?|<=?|>=?|[|^?'#!~`]|[+\-*\/]\.?|\b(?:asr|land|lor|lsl|lsr|lxor|mod)\b/}),o.languages.insertBefore("reason","class-name",{char:{pattern:/'(?:\\x[\da-f]{2}|\\o[0-3][0-7][0-7]|\\\d{3}|\\.|[^'\\\r\n])'/,greedy:!0},constructor:/\b[A-Z]\w*\b(?!\s*\.)/,label:{pattern:/\b[a-z]\w*(?=::)/,alias:"symbol"}}),delete o.languages.reason.function,function(e){e.languages.sass=e.languages.extend("css",{comment:{pattern:/^([ \t]*)\/[\/*].*(?:(?:\r?\n|\r)\1[ \t].+)*/m,lookbehind:!0,greedy:!0}}),e.languages.insertBefore("sass","atrule",{"atrule-line":{pattern:/^(?:[ \t]*)[@+=].+/m,greedy:!0,inside:{atrule:/(?:@[\w-]+|[+=])/}}}),delete e.languages.sass.atrule;var t=/\$[-\w]+|#\{\$[-\w]+\}/,n=[/[+*\/%]|[=!]=|<=?|>=?|\b(?:and|not|or)\b/,{pattern:/(\s)-(?=\s)/,lookbehind:!0}];e.languages.insertBefore("sass","property",{"variable-line":{pattern:/^[ \t]*\$.+/m,greedy:!0,inside:{punctuation:/:/,variable:t,operator:n}},"property-line":{pattern:/^[ \t]*(?:[^:\s]+ *:.*|:[^:\s].*)/m,greedy:!0,inside:{property:[/[^:\s]+(?=\s*:)/,{pattern:/(:)[^:\s]+/,lookbehind:!0}],punctuation:/:/,variable:t,operator:n,important:e.languages.sass.important}}}),delete e.languages.sass.property,delete e.languages.sass.important,e.languages.insertBefore("sass","punctuation",{selector:{pattern:/^([ \t]*)\S(?:,[^,\r\n]+|[^,\r\n]*)(?:,[^,\r\n]+)*(?:,(?:\r?\n|\r)\1[ \t]+\S(?:,[^,\r\n]+|[^,\r\n]*)(?:,[^,\r\n]+)*)*/m,lookbehind:!0,greedy:!0}})}(o),o.languages.scss=o.languages.extend("css",{comment:{pattern:/(^|[^\\])(?:\/\*[\s\S]*?\*\/|\/\/.*)/,lookbehind:!0},atrule:{pattern:/@[\w-](?:\([^()]+\)|[^()\s]|\s+(?!\s))*?(?=\s+[{;])/,inside:{rule:/@[\w-]+/}},url:/(?:[-a-z]+-)?url(?=\()/i,selector:{pattern:/(?=\S)[^@;{}()]?(?:[^@;{}()\s]|\s+(?!\s)|#\{\$[-\w]+\})+(?=\s*\{(?:\}|\s|[^}][^:{}]*[:{][^}]))/,inside:{parent:{pattern:/&/,alias:"important"},placeholder:/%[-\w]+/,variable:/\$[-\w]+|#\{\$[-\w]+\}/}},property:{pattern:/(?:[-\w]|\$[-\w]|#\{\$[-\w]+\})+(?=\s*:)/,inside:{variable:/\$[-\w]+|#\{\$[-\w]+\}/}}}),o.languages.insertBefore("scss","atrule",{keyword:[/@(?:content|debug|each|else(?: if)?|extend|for|forward|function|if|import|include|mixin|return|use|warn|while)\b/i,{pattern:/( )(?:from|through)(?= )/,lookbehind:!0}]}),o.languages.insertBefore("scss","important",{variable:/\$[-\w]+|#\{\$[-\w]+\}/}),o.languages.insertBefore("scss","function",{"module-modifier":{pattern:/\b(?:as|hide|show|with)\b/i,alias:"keyword"},placeholder:{pattern:/%[-\w]+/,alias:"selector"},statement:{pattern:/\B!(?:default|optional)\b/i,alias:"keyword"},boolean:/\b(?:false|true)\b/,null:{pattern:/\bnull\b/,alias:"keyword"},operator:{pattern:/(\s)(?:[-+*\/%]|[=!]=|<=?|>=?|and|not|or)(?=\s)/,lookbehind:!0}}),o.languages.scss.atrule.inside.rest=o.languages.scss,function(e){var t={pattern:/(\b\d+)(?:%|[a-z]+)/,lookbehind:!0},n={pattern:/(^|[^\w.-])-?(?:\d+(?:\.\d+)?|\.\d+)/,lookbehind:!0},r={comment:{pattern:/(^|[^\\])(?:\/\*[\s\S]*?\*\/|\/\/.*)/,lookbehind:!0},url:{pattern:/\burl\((["']?).*?\1\)/i,greedy:!0},string:{pattern:/("|')(?:(?!\1)[^\\\r\n]|\\(?:\r\n|[\s\S]))*\1/,greedy:!0},interpolation:null,func:null,important:/\B!(?:important|optional)\b/i,keyword:{pattern:/(^|\s+)(?:(?:else|for|if|return|unless)(?=\s|$)|@[\w-]+)/,lookbehind:!0},hexcode:/#[\da-f]{3,6}/i,color:[/\b(?:AliceBlue|AntiqueWhite|Aqua|Aquamarine|Azure|Beige|Bisque|Black|BlanchedAlmond|Blue|BlueViolet|Brown|BurlyWood|CadetBlue|Chartreuse|Chocolate|Coral|CornflowerBlue|Cornsilk|Crimson|Cyan|DarkBlue|DarkCyan|DarkGoldenRod|DarkGr[ae]y|DarkGreen|DarkKhaki|DarkMagenta|DarkOliveGreen|DarkOrange|DarkOrchid|DarkRed|DarkSalmon|DarkSeaGreen|DarkSlateBlue|DarkSlateGr[ae]y|DarkTurquoise|DarkViolet|DeepPink|DeepSkyBlue|DimGr[ae]y|DodgerBlue|FireBrick|FloralWhite|ForestGreen|Fuchsia|Gainsboro|GhostWhite|Gold|GoldenRod|Gr[ae]y|Green|GreenYellow|HoneyDew|HotPink|IndianRed|Indigo|Ivory|Khaki|Lavender|LavenderBlush|LawnGreen|LemonChiffon|LightBlue|LightCoral|LightCyan|LightGoldenRodYellow|LightGr[ae]y|LightGreen|LightPink|LightSalmon|LightSeaGreen|LightSkyBlue|LightSlateGr[ae]y|LightSteelBlue|LightYellow|Lime|LimeGreen|Linen|Magenta|Maroon|MediumAquaMarine|MediumBlue|MediumOrchid|MediumPurple|MediumSeaGreen|MediumSlateBlue|MediumSpringGreen|MediumTurquoise|MediumVioletRed|MidnightBlue|MintCream|MistyRose|Moccasin|NavajoWhite|Navy|OldLace|Olive|OliveDrab|Orange|OrangeRed|Orchid|PaleGoldenRod|PaleGreen|PaleTurquoise|PaleVioletRed|PapayaWhip|PeachPuff|Peru|Pink|Plum|PowderBlue|Purple|Red|RosyBrown|RoyalBlue|SaddleBrown|Salmon|SandyBrown|SeaGreen|SeaShell|Sienna|Silver|SkyBlue|SlateBlue|SlateGr[ae]y|Snow|SpringGreen|SteelBlue|Tan|Teal|Thistle|Tomato|Transparent|Turquoise|Violet|Wheat|White|WhiteSmoke|Yellow|YellowGreen)\b/i,{pattern:/\b(?:hsl|rgb)\(\s*\d{1,3}\s*,\s*\d{1,3}%?\s*,\s*\d{1,3}%?\s*\)\B|\b(?:hsl|rgb)a\(\s*\d{1,3}\s*,\s*\d{1,3}%?\s*,\s*\d{1,3}%?\s*,\s*(?:0|0?\.\d+|1)\s*\)\B/i,inside:{unit:t,number:n,function:/[\w-]+(?=\()/,punctuation:/[(),]/}}],entity:/\\[\da-f]{1,8}/i,unit:t,boolean:/\b(?:false|true)\b/,operator:[/~|[+!\/%<>?=]=?|[-:]=|\*[*=]?|\.{2,3}|&&|\|\||\B-\B|\b(?:and|in|is(?: a| defined| not|nt)?|not|or)\b/],number:n,punctuation:/[{}()\[\];:,]/};r.interpolation={pattern:/\{[^\r\n}:]+\}/,alias:"variable",inside:{delimiter:{pattern:/^\{|\}$/,alias:"punctuation"},rest:r}},r.func={pattern:/[\w-]+\([^)]*\).*/,inside:{function:/^[^(]+/,rest:r}},e.languages.stylus={"atrule-declaration":{pattern:/(^[ \t]*)@.+/m,lookbehind:!0,inside:{atrule:/^@[\w-]+/,rest:r}},"variable-declaration":{pattern:/(^[ \t]*)[\w$-]+\s*.?=[ \t]*(?:\{[^{}]*\}|\S.*|$)/m,lookbehind:!0,inside:{variable:/^\S+/,rest:r}},statement:{pattern:/(^[ \t]*)(?:else|for|if|return|unless)[ \t].+/m,lookbehind:!0,inside:{keyword:/^\S+/,rest:r}},"property-declaration":{pattern:/((?:^|\{)([ \t]*))(?:[\w-]|\{[^}\r\n]+\})+(?:\s*:\s*|[ \t]+)(?!\s)[^{\r\n]*(?:;|[^{\r\n,]$(?!(?:\r?\n|\r)(?:\{|\2[ \t])))/m,lookbehind:!0,inside:{property:{pattern:/^[^\s:]+/,inside:{interpolation:r.interpolation}},rest:r}},selector:{pattern:/(^[ \t]*)(?:(?=\S)(?:[^{}\r\n:()]|::?[\w-]+(?:\([^)\r\n]*\)|(?![\w-]))|\{[^}\r\n]+\})+)(?:(?:\r?\n|\r)(?:\1(?:(?=\S)(?:[^{}\r\n:()]|::?[\w-]+(?:\([^)\r\n]*\)|(?![\w-]))|\{[^}\r\n]+\})+)))*(?:,$|\{|(?=(?:\r?\n|\r)(?:\{|\1[ \t])))/m,lookbehind:!0,inside:{interpolation:r.interpolation,comment:r.comment,punctuation:/[{},]/}},func:r.func,string:r.string,comment:{pattern:/(^|[^\\])(?:\/\*[\s\S]*?\*\/|\/\/.*)/,lookbehind:!0,greedy:!0},interpolation:r.interpolation,punctuation:/[{}()\[\];:.]/}}(o),function(e){var t=e.util.clone(e.languages.typescript);e.languages.tsx=e.languages.extend("jsx",t),delete e.languages.tsx.parameter,delete e.languages.tsx["literal-property"];var n=e.languages.tsx.tag;n.pattern=RegExp(/(^|[^\w$]|(?=<\/))/.source+"(?:"+n.pattern.source+")",n.pattern.flags),n.lookbehind=!0}(o),o.languages.wasm={comment:[/\(;[\s\S]*?;\)/,{pattern:/;;.*/,greedy:!0}],string:{pattern:/"(?:\\[\s\S]|[^"\\])*"/,greedy:!0},keyword:[{pattern:/\b(?:align|offset)=/,inside:{operator:/=/}},{pattern:/\b(?:(?:f32|f64|i32|i64)(?:\.(?:abs|add|and|ceil|clz|const|convert_[su]\/i(?:32|64)|copysign|ctz|demote\/f64|div(?:_[su])?|eqz?|extend_[su]\/i32|floor|ge(?:_[su])?|gt(?:_[su])?|le(?:_[su])?|load(?:(?:8|16|32)_[su])?|lt(?:_[su])?|max|min|mul|neg?|nearest|or|popcnt|promote\/f32|reinterpret\/[fi](?:32|64)|rem_[su]|rot[lr]|shl|shr_[su]|sqrt|store(?:8|16|32)?|sub|trunc(?:_[su]\/f(?:32|64))?|wrap\/i64|xor))?|memory\.(?:grow|size))\b/,inside:{punctuation:/\./}},/\b(?:anyfunc|block|br(?:_if|_table)?|call(?:_indirect)?|data|drop|elem|else|end|export|func|get_(?:global|local)|global|if|import|local|loop|memory|module|mut|nop|offset|param|result|return|select|set_(?:global|local)|start|table|tee_local|then|type|unreachable)\b/],variable:/\$[\w!#$%&'*+\-./:<=>?@\\^`|~]+/,number:/[+-]?\b(?:\d(?:_?\d)*(?:\.\d(?:_?\d)*)?(?:[eE][+-]?\d(?:_?\d)*)?|0x[\da-fA-F](?:_?[\da-fA-F])*(?:\.[\da-fA-F](?:_?[\da-fA-D])*)?(?:[pP][+-]?\d(?:_?\d)*)?)\b|\binf\b|\bnan(?::0x[\da-fA-F](?:_?[\da-fA-D])*)?\b/,punctuation:/[()]/};const a=o},9901:e=>{e.exports&&(e.exports={core:{meta:{path:"components/prism-core.js",option:"mandatory"},core:"Core"},themes:{meta:{path:"themes/{id}.css",link:"index.html?theme={id}",exclusive:!0},prism:{title:"Default",option:"default"},"prism-dark":"Dark","prism-funky":"Funky","prism-okaidia":{title:"Okaidia",owner:"ocodia"},"prism-twilight":{title:"Twilight",owner:"remybach"},"prism-coy":{title:"Coy",owner:"tshedor"},"prism-solarizedlight":{title:"Solarized Light",owner:"hectormatos2011 "},"prism-tomorrow":{title:"Tomorrow Night",owner:"Rosey"}},languages:{meta:{path:"components/prism-{id}",noCSS:!0,examplesPath:"examples/prism-{id}",addCheckAll:!0},markup:{title:"Markup",alias:["html","xml","svg","mathml","ssml","atom","rss"],aliasTitles:{html:"HTML",xml:"XML",svg:"SVG",mathml:"MathML",ssml:"SSML",atom:"Atom",rss:"RSS"},option:"default"},css:{title:"CSS",option:"default",modify:"markup"},clike:{title:"C-like",option:"default"},javascript:{title:"JavaScript",require:"clike",modify:"markup",optional:"regex",alias:"js",option:"default"},abap:{title:"ABAP",owner:"dellagustin"},abnf:{title:"ABNF",owner:"RunDevelopment"},actionscript:{title:"ActionScript",require:"javascript",modify:"markup",owner:"Golmote"},ada:{title:"Ada",owner:"Lucretia"},agda:{title:"Agda",owner:"xy-ren"},al:{title:"AL",owner:"RunDevelopment"},antlr4:{title:"ANTLR4",alias:"g4",owner:"RunDevelopment"},apacheconf:{title:"Apache Configuration",owner:"GuiTeK"},apex:{title:"Apex",require:["clike","sql"],owner:"RunDevelopment"},apl:{title:"APL",owner:"ngn"},applescript:{title:"AppleScript",owner:"Golmote"},aql:{title:"AQL",owner:"RunDevelopment"},arduino:{title:"Arduino",require:"cpp",alias:"ino",owner:"dkern"},arff:{title:"ARFF",owner:"Golmote"},armasm:{title:"ARM Assembly",alias:"arm-asm",owner:"RunDevelopment"},arturo:{title:"Arturo",alias:"art",optional:["bash","css","javascript","markup","markdown","sql"],owner:"drkameleon"},asciidoc:{alias:"adoc",title:"AsciiDoc",owner:"Golmote"},aspnet:{title:"ASP.NET (C#)",require:["markup","csharp"],owner:"nauzilus"},asm6502:{title:"6502 Assembly",owner:"kzurawel"},asmatmel:{title:"Atmel AVR Assembly",owner:"cerkit"},autohotkey:{title:"AutoHotkey",owner:"aviaryan"},autoit:{title:"AutoIt",owner:"Golmote"},avisynth:{title:"AviSynth",alias:"avs",owner:"Zinfidel"},"avro-idl":{title:"Avro IDL",alias:"avdl",owner:"RunDevelopment"},awk:{title:"AWK",alias:"gawk",aliasTitles:{gawk:"GAWK"},owner:"RunDevelopment"},bash:{title:"Bash",alias:["sh","shell"],aliasTitles:{sh:"Shell",shell:"Shell"},owner:"zeitgeist87"},basic:{title:"BASIC",owner:"Golmote"},batch:{title:"Batch",owner:"Golmote"},bbcode:{title:"BBcode",alias:"shortcode",aliasTitles:{shortcode:"Shortcode"},owner:"RunDevelopment"},bbj:{title:"BBj",owner:"hyyan"},bicep:{title:"Bicep",owner:"johnnyreilly"},birb:{title:"Birb",require:"clike",owner:"Calamity210"},bison:{title:"Bison",require:"c",owner:"Golmote"},bnf:{title:"BNF",alias:"rbnf",aliasTitles:{rbnf:"RBNF"},owner:"RunDevelopment"},bqn:{title:"BQN",owner:"yewscion"},brainfuck:{title:"Brainfuck",owner:"Golmote"},brightscript:{title:"BrightScript",owner:"RunDevelopment"},bro:{title:"Bro",owner:"wayward710"},bsl:{title:"BSL (1C:Enterprise)",alias:"oscript",aliasTitles:{oscript:"OneScript"},owner:"Diversus23"},c:{title:"C",require:"clike",owner:"zeitgeist87"},csharp:{title:"C#",require:"clike",alias:["cs","dotnet"],owner:"mvalipour"},cpp:{title:"C++",require:"c",owner:"zeitgeist87"},cfscript:{title:"CFScript",require:"clike",alias:"cfc",owner:"mjclemente"},chaiscript:{title:"ChaiScript",require:["clike","cpp"],owner:"RunDevelopment"},cil:{title:"CIL",owner:"sbrl"},cilkc:{title:"Cilk/C",require:"c",alias:"cilk-c",owner:"OpenCilk"},cilkcpp:{title:"Cilk/C++",require:"cpp",alias:["cilk-cpp","cilk"],owner:"OpenCilk"},clojure:{title:"Clojure",owner:"troglotit"},cmake:{title:"CMake",owner:"mjrogozinski"},cobol:{title:"COBOL",owner:"RunDevelopment"},coffeescript:{title:"CoffeeScript",require:"javascript",alias:"coffee",owner:"R-osey"},concurnas:{title:"Concurnas",alias:"conc",owner:"jasontatton"},csp:{title:"Content-Security-Policy",owner:"ScottHelme"},cooklang:{title:"Cooklang",owner:"ahue"},coq:{title:"Coq",owner:"RunDevelopment"},crystal:{title:"Crystal",require:"ruby",owner:"MakeNowJust"},"css-extras":{title:"CSS Extras",require:"css",modify:"css",owner:"milesj"},csv:{title:"CSV",owner:"RunDevelopment"},cue:{title:"CUE",owner:"RunDevelopment"},cypher:{title:"Cypher",owner:"RunDevelopment"},d:{title:"D",require:"clike",owner:"Golmote"},dart:{title:"Dart",require:"clike",owner:"Golmote"},dataweave:{title:"DataWeave",owner:"machaval"},dax:{title:"DAX",owner:"peterbud"},dhall:{title:"Dhall",owner:"RunDevelopment"},diff:{title:"Diff",owner:"uranusjr"},django:{title:"Django/Jinja2",require:"markup-templating",alias:"jinja2",owner:"romanvm"},"dns-zone-file":{title:"DNS zone file",owner:"RunDevelopment",alias:"dns-zone"},docker:{title:"Docker",alias:"dockerfile",owner:"JustinBeckwith"},dot:{title:"DOT (Graphviz)",alias:"gv",optional:"markup",owner:"RunDevelopment"},ebnf:{title:"EBNF",owner:"RunDevelopment"},editorconfig:{title:"EditorConfig",owner:"osipxd"},eiffel:{title:"Eiffel",owner:"Conaclos"},ejs:{title:"EJS",require:["javascript","markup-templating"],owner:"RunDevelopment",alias:"eta",aliasTitles:{eta:"Eta"}},elixir:{title:"Elixir",owner:"Golmote"},elm:{title:"Elm",owner:"zwilias"},etlua:{title:"Embedded Lua templating",require:["lua","markup-templating"],owner:"RunDevelopment"},erb:{title:"ERB",require:["ruby","markup-templating"],owner:"Golmote"},erlang:{title:"Erlang",owner:"Golmote"},"excel-formula":{title:"Excel Formula",alias:["xlsx","xls"],owner:"RunDevelopment"},fsharp:{title:"F#",require:"clike",owner:"simonreynolds7"},factor:{title:"Factor",owner:"catb0t"},false:{title:"False",owner:"edukisto"},"firestore-security-rules":{title:"Firestore security rules",require:"clike",owner:"RunDevelopment"},flow:{title:"Flow",require:"javascript",owner:"Golmote"},fortran:{title:"Fortran",owner:"Golmote"},ftl:{title:"FreeMarker Template Language",require:"markup-templating",owner:"RunDevelopment"},gml:{title:"GameMaker Language",alias:"gamemakerlanguage",require:"clike",owner:"LiarOnce"},gap:{title:"GAP (CAS)",owner:"RunDevelopment"},gcode:{title:"G-code",owner:"RunDevelopment"},gdscript:{title:"GDScript",owner:"RunDevelopment"},gedcom:{title:"GEDCOM",owner:"Golmote"},gettext:{title:"gettext",alias:"po",owner:"RunDevelopment"},gherkin:{title:"Gherkin",owner:"hason"},git:{title:"Git",owner:"lgiraudel"},glsl:{title:"GLSL",require:"c",owner:"Golmote"},gn:{title:"GN",alias:"gni",owner:"RunDevelopment"},"linker-script":{title:"GNU Linker Script",alias:"ld",owner:"RunDevelopment"},go:{title:"Go",require:"clike",owner:"arnehormann"},"go-module":{title:"Go module",alias:"go-mod",owner:"RunDevelopment"},gradle:{title:"Gradle",require:"clike",owner:"zeabdelkhalek-badido18"},graphql:{title:"GraphQL",optional:"markdown",owner:"Golmote"},groovy:{title:"Groovy",require:"clike",owner:"robfletcher"},haml:{title:"Haml",require:"ruby",optional:["css","css-extras","coffeescript","erb","javascript","less","markdown","scss","textile"],owner:"Golmote"},handlebars:{title:"Handlebars",require:"markup-templating",alias:["hbs","mustache"],aliasTitles:{mustache:"Mustache"},owner:"Golmote"},haskell:{title:"Haskell",alias:"hs",owner:"bholst"},haxe:{title:"Haxe",require:"clike",optional:"regex",owner:"Golmote"},hcl:{title:"HCL",owner:"outsideris"},hlsl:{title:"HLSL",require:"c",owner:"RunDevelopment"},hoon:{title:"Hoon",owner:"matildepark"},http:{title:"HTTP",optional:["csp","css","hpkp","hsts","javascript","json","markup","uri"],owner:"danielgtaylor"},hpkp:{title:"HTTP Public-Key-Pins",owner:"ScottHelme"},hsts:{title:"HTTP Strict-Transport-Security",owner:"ScottHelme"},ichigojam:{title:"IchigoJam",owner:"BlueCocoa"},icon:{title:"Icon",owner:"Golmote"},"icu-message-format":{title:"ICU Message Format",owner:"RunDevelopment"},idris:{title:"Idris",alias:"idr",owner:"KeenS",require:"haskell"},ignore:{title:".ignore",owner:"osipxd",alias:["gitignore","hgignore","npmignore"],aliasTitles:{gitignore:".gitignore",hgignore:".hgignore",npmignore:".npmignore"}},inform7:{title:"Inform 7",owner:"Golmote"},ini:{title:"Ini",owner:"aviaryan"},io:{title:"Io",owner:"AlesTsurko"},j:{title:"J",owner:"Golmote"},java:{title:"Java",require:"clike",owner:"sherblot"},javadoc:{title:"JavaDoc",require:["markup","java","javadoclike"],modify:"java",optional:"scala",owner:"RunDevelopment"},javadoclike:{title:"JavaDoc-like",modify:["java","javascript","php"],owner:"RunDevelopment"},javastacktrace:{title:"Java stack trace",owner:"RunDevelopment"},jexl:{title:"Jexl",owner:"czosel"},jolie:{title:"Jolie",require:"clike",owner:"thesave"},jq:{title:"JQ",owner:"RunDevelopment"},jsdoc:{title:"JSDoc",require:["javascript","javadoclike","typescript"],modify:"javascript",optional:["actionscript","coffeescript"],owner:"RunDevelopment"},"js-extras":{title:"JS Extras",require:"javascript",modify:"javascript",optional:["actionscript","coffeescript","flow","n4js","typescript"],owner:"RunDevelopment"},json:{title:"JSON",alias:"webmanifest",aliasTitles:{webmanifest:"Web App Manifest"},owner:"CupOfTea696"},json5:{title:"JSON5",require:"json",owner:"RunDevelopment"},jsonp:{title:"JSONP",require:"json",owner:"RunDevelopment"},jsstacktrace:{title:"JS stack trace",owner:"sbrl"},"js-templates":{title:"JS Templates",require:"javascript",modify:"javascript",optional:["css","css-extras","graphql","markdown","markup","sql"],owner:"RunDevelopment"},julia:{title:"Julia",owner:"cdagnino"},keepalived:{title:"Keepalived Configure",owner:"dev-itsheng"},keyman:{title:"Keyman",owner:"mcdurdin"},kotlin:{title:"Kotlin",alias:["kt","kts"],aliasTitles:{kts:"Kotlin Script"},require:"clike",owner:"Golmote"},kumir:{title:"KuMir (\u041a\u0443\u041c\u0438\u0440)",alias:"kum",owner:"edukisto"},kusto:{title:"Kusto",owner:"RunDevelopment"},latex:{title:"LaTeX",alias:["tex","context"],aliasTitles:{tex:"TeX",context:"ConTeXt"},owner:"japborst"},latte:{title:"Latte",require:["clike","markup-templating","php"],owner:"nette"},less:{title:"Less",require:"css",optional:"css-extras",owner:"Golmote"},lilypond:{title:"LilyPond",require:"scheme",alias:"ly",owner:"RunDevelopment"},liquid:{title:"Liquid",require:"markup-templating",owner:"cinhtau"},lisp:{title:"Lisp",alias:["emacs","elisp","emacs-lisp"],owner:"JuanCaicedo"},livescript:{title:"LiveScript",owner:"Golmote"},llvm:{title:"LLVM IR",owner:"porglezomp"},log:{title:"Log file",optional:"javastacktrace",owner:"RunDevelopment"},lolcode:{title:"LOLCODE",owner:"Golmote"},lua:{title:"Lua",owner:"Golmote"},magma:{title:"Magma (CAS)",owner:"RunDevelopment"},makefile:{title:"Makefile",owner:"Golmote"},markdown:{title:"Markdown",require:"markup",optional:"yaml",alias:"md",owner:"Golmote"},"markup-templating":{title:"Markup templating",require:"markup",owner:"Golmote"},mata:{title:"Mata",owner:"RunDevelopment"},matlab:{title:"MATLAB",owner:"Golmote"},maxscript:{title:"MAXScript",owner:"RunDevelopment"},mel:{title:"MEL",owner:"Golmote"},mermaid:{title:"Mermaid",owner:"RunDevelopment"},metafont:{title:"METAFONT",owner:"LaeriExNihilo"},mizar:{title:"Mizar",owner:"Golmote"},mongodb:{title:"MongoDB",owner:"airs0urce",require:"javascript"},monkey:{title:"Monkey",owner:"Golmote"},moonscript:{title:"MoonScript",alias:"moon",owner:"RunDevelopment"},n1ql:{title:"N1QL",owner:"TMWilds"},n4js:{title:"N4JS",require:"javascript",optional:"jsdoc",alias:"n4jsd",owner:"bsmith-n4"},"nand2tetris-hdl":{title:"Nand To Tetris HDL",owner:"stephanmax"},naniscript:{title:"Naninovel Script",owner:"Elringus",alias:"nani"},nasm:{title:"NASM",owner:"rbmj"},neon:{title:"NEON",owner:"nette"},nevod:{title:"Nevod",owner:"nezaboodka"},nginx:{title:"nginx",owner:"volado"},nim:{title:"Nim",owner:"Golmote"},nix:{title:"Nix",owner:"Golmote"},nsis:{title:"NSIS",owner:"idleberg"},objectivec:{title:"Objective-C",require:"c",alias:"objc",owner:"uranusjr"},ocaml:{title:"OCaml",owner:"Golmote"},odin:{title:"Odin",owner:"edukisto"},opencl:{title:"OpenCL",require:"c",modify:["c","cpp"],owner:"Milania1"},openqasm:{title:"OpenQasm",alias:"qasm",owner:"RunDevelopment"},oz:{title:"Oz",owner:"Golmote"},parigp:{title:"PARI/GP",owner:"Golmote"},parser:{title:"Parser",require:"markup",owner:"Golmote"},pascal:{title:"Pascal",alias:"objectpascal",aliasTitles:{objectpascal:"Object Pascal"},owner:"Golmote"},pascaligo:{title:"Pascaligo",owner:"DefinitelyNotAGoat"},psl:{title:"PATROL Scripting Language",owner:"bertysentry"},pcaxis:{title:"PC-Axis",alias:"px",owner:"RunDevelopment"},peoplecode:{title:"PeopleCode",alias:"pcode",owner:"RunDevelopment"},perl:{title:"Perl",owner:"Golmote"},php:{title:"PHP",require:"markup-templating",owner:"milesj"},phpdoc:{title:"PHPDoc",require:["php","javadoclike"],modify:"php",owner:"RunDevelopment"},"php-extras":{title:"PHP Extras",require:"php",modify:"php",owner:"milesj"},"plant-uml":{title:"PlantUML",alias:"plantuml",owner:"RunDevelopment"},plsql:{title:"PL/SQL",require:"sql",owner:"Golmote"},powerquery:{title:"PowerQuery",alias:["pq","mscript"],owner:"peterbud"},powershell:{title:"PowerShell",owner:"nauzilus"},processing:{title:"Processing",require:"clike",owner:"Golmote"},prolog:{title:"Prolog",owner:"Golmote"},promql:{title:"PromQL",owner:"arendjr"},properties:{title:".properties",owner:"Golmote"},protobuf:{title:"Protocol Buffers",require:"clike",owner:"just-boris"},pug:{title:"Pug",require:["markup","javascript"],optional:["coffeescript","ejs","handlebars","less","livescript","markdown","scss","stylus","twig"],owner:"Golmote"},puppet:{title:"Puppet",owner:"Golmote"},pure:{title:"Pure",optional:["c","cpp","fortran"],owner:"Golmote"},purebasic:{title:"PureBasic",require:"clike",alias:"pbfasm",owner:"HeX0R101"},purescript:{title:"PureScript",require:"haskell",alias:"purs",owner:"sriharshachilakapati"},python:{title:"Python",alias:"py",owner:"multipetros"},qsharp:{title:"Q#",require:"clike",alias:"qs",owner:"fedonman"},q:{title:"Q (kdb+ database)",owner:"Golmote"},qml:{title:"QML",require:"javascript",owner:"RunDevelopment"},qore:{title:"Qore",require:"clike",owner:"temnroegg"},r:{title:"R",owner:"Golmote"},racket:{title:"Racket",require:"scheme",alias:"rkt",owner:"RunDevelopment"},cshtml:{title:"Razor C#",alias:"razor",require:["markup","csharp"],optional:["css","css-extras","javascript","js-extras"],owner:"RunDevelopment"},jsx:{title:"React JSX",require:["markup","javascript"],optional:["jsdoc","js-extras","js-templates"],owner:"vkbansal"},tsx:{title:"React TSX",require:["jsx","typescript"]},reason:{title:"Reason",require:"clike",owner:"Golmote"},regex:{title:"Regex",owner:"RunDevelopment"},rego:{title:"Rego",owner:"JordanSh"},renpy:{title:"Ren'py",alias:"rpy",owner:"HyuchiaDiego"},rescript:{title:"ReScript",alias:"res",owner:"vmarcosp"},rest:{title:"reST (reStructuredText)",owner:"Golmote"},rip:{title:"Rip",owner:"ravinggenius"},roboconf:{title:"Roboconf",owner:"Golmote"},robotframework:{title:"Robot Framework",alias:"robot",owner:"RunDevelopment"},ruby:{title:"Ruby",require:"clike",alias:"rb",owner:"samflores"},rust:{title:"Rust",owner:"Golmote"},sas:{title:"SAS",optional:["groovy","lua","sql"],owner:"Golmote"},sass:{title:"Sass (Sass)",require:"css",optional:"css-extras",owner:"Golmote"},scss:{title:"Sass (SCSS)",require:"css",optional:"css-extras",owner:"MoOx"},scala:{title:"Scala",require:"java",owner:"jozic"},scheme:{title:"Scheme",owner:"bacchus123"},"shell-session":{title:"Shell session",require:"bash",alias:["sh-session","shellsession"],owner:"RunDevelopment"},smali:{title:"Smali",owner:"RunDevelopment"},smalltalk:{title:"Smalltalk",owner:"Golmote"},smarty:{title:"Smarty",require:"markup-templating",optional:"php",owner:"Golmote"},sml:{title:"SML",alias:"smlnj",aliasTitles:{smlnj:"SML/NJ"},owner:"RunDevelopment"},solidity:{title:"Solidity (Ethereum)",alias:"sol",require:"clike",owner:"glachaud"},"solution-file":{title:"Solution file",alias:"sln",owner:"RunDevelopment"},soy:{title:"Soy (Closure Template)",require:"markup-templating",owner:"Golmote"},sparql:{title:"SPARQL",require:"turtle",owner:"Triply-Dev",alias:"rq"},"splunk-spl":{title:"Splunk SPL",owner:"RunDevelopment"},sqf:{title:"SQF: Status Quo Function (Arma 3)",require:"clike",owner:"RunDevelopment"},sql:{title:"SQL",owner:"multipetros"},squirrel:{title:"Squirrel",require:"clike",owner:"RunDevelopment"},stan:{title:"Stan",owner:"RunDevelopment"},stata:{title:"Stata Ado",require:["mata","java","python"],owner:"RunDevelopment"},iecst:{title:"Structured Text (IEC 61131-3)",owner:"serhioromano"},stylus:{title:"Stylus",owner:"vkbansal"},supercollider:{title:"SuperCollider",alias:"sclang",owner:"RunDevelopment"},swift:{title:"Swift",owner:"chrischares"},systemd:{title:"Systemd configuration file",owner:"RunDevelopment"},"t4-templating":{title:"T4 templating",owner:"RunDevelopment"},"t4-cs":{title:"T4 Text Templates (C#)",require:["t4-templating","csharp"],alias:"t4",owner:"RunDevelopment"},"t4-vb":{title:"T4 Text Templates (VB)",require:["t4-templating","vbnet"],owner:"RunDevelopment"},tap:{title:"TAP",owner:"isaacs",require:"yaml"},tcl:{title:"Tcl",owner:"PeterChaplin"},tt2:{title:"Template Toolkit 2",require:["clike","markup-templating"],owner:"gflohr"},textile:{title:"Textile",require:"markup",optional:"css",owner:"Golmote"},toml:{title:"TOML",owner:"RunDevelopment"},tremor:{title:"Tremor",alias:["trickle","troy"],owner:"darach",aliasTitles:{trickle:"trickle",troy:"troy"}},turtle:{title:"Turtle",alias:"trig",aliasTitles:{trig:"TriG"},owner:"jakubklimek"},twig:{title:"Twig",require:"markup-templating",owner:"brandonkelly"},typescript:{title:"TypeScript",require:"javascript",optional:"js-templates",alias:"ts",owner:"vkbansal"},typoscript:{title:"TypoScript",alias:"tsconfig",aliasTitles:{tsconfig:"TSConfig"},owner:"dkern"},unrealscript:{title:"UnrealScript",alias:["uscript","uc"],owner:"RunDevelopment"},uorazor:{title:"UO Razor Script",owner:"jaseowns"},uri:{title:"URI",alias:"url",aliasTitles:{url:"URL"},owner:"RunDevelopment"},v:{title:"V",require:"clike",owner:"taggon"},vala:{title:"Vala",require:"clike",optional:"regex",owner:"TemplarVolk"},vbnet:{title:"VB.Net",require:"basic",owner:"Bigsby"},velocity:{title:"Velocity",require:"markup",owner:"Golmote"},verilog:{title:"Verilog",owner:"a-rey"},vhdl:{title:"VHDL",owner:"a-rey"},vim:{title:"vim",owner:"westonganger"},"visual-basic":{title:"Visual Basic",alias:["vb","vba"],aliasTitles:{vba:"VBA"},owner:"Golmote"},warpscript:{title:"WarpScript",owner:"RunDevelopment"},wasm:{title:"WebAssembly",owner:"Golmote"},"web-idl":{title:"Web IDL",alias:"webidl",owner:"RunDevelopment"},wgsl:{title:"WGSL",owner:"Dr4gonthree"},wiki:{title:"Wiki markup",require:"markup",owner:"Golmote"},wolfram:{title:"Wolfram language",alias:["mathematica","nb","wl"],aliasTitles:{mathematica:"Mathematica",nb:"Mathematica Notebook"},owner:"msollami"},wren:{title:"Wren",owner:"clsource"},xeora:{title:"Xeora",require:"markup",alias:"xeoracube",aliasTitles:{xeoracube:"XeoraCube"},owner:"freakmaxi"},"xml-doc":{title:"XML doc (.net)",require:"markup",modify:["csharp","fsharp","vbnet"],owner:"RunDevelopment"},xojo:{title:"Xojo (REALbasic)",owner:"Golmote"},xquery:{title:"XQuery",require:"markup",owner:"Golmote"},yaml:{title:"YAML",alias:"yml",owner:"hason"},yang:{title:"YANG",owner:"RunDevelopment"},zig:{title:"Zig",owner:"RunDevelopment"}},plugins:{meta:{path:"plugins/{id}/prism-{id}",link:"plugins/{id}/"},"line-highlight":{title:"Line Highlight",description:"Highlights specific lines and/or line ranges."},"line-numbers":{title:"Line Numbers",description:"Line number at the beginning of code lines.",owner:"kuba-kubula"},"show-invisibles":{title:"Show Invisibles",description:"Show hidden characters such as tabs and line breaks.",optional:["autolinker","data-uri-highlight"]},autolinker:{title:"Autolinker",description:"Converts URLs and emails in code to clickable links. Parses Markdown links in comments."},wpd:{title:"WebPlatform Docs",description:'Makes tokens link to WebPlatform.org documentation. The links open in a new tab.'},"custom-class":{title:"Custom Class",description:"This plugin allows you to prefix Prism's default classes (.comment can become .namespace--comment) or replace them with your defined ones (like .editor__comment). You can even add new classes.",owner:"dvkndn",noCSS:!0},"file-highlight":{title:"File Highlight",description:"Fetch external files and highlight them with Prism. Used on the Prism website itself.",noCSS:!0},"show-language":{title:"Show Language",description:"Display the highlighted language in code blocks (inline code does not show the label).",owner:"nauzilus",noCSS:!0,require:"toolbar"},"jsonp-highlight":{title:"JSONP Highlight",description:"Fetch content with JSONP and highlight some interesting content (e.g. GitHub/Gists or Bitbucket API).",noCSS:!0,owner:"nauzilus"},"highlight-keywords":{title:"Highlight Keywords",description:"Adds special CSS classes for each keyword for fine-grained highlighting.",owner:"vkbansal",noCSS:!0},"remove-initial-line-feed":{title:"Remove initial line feed",description:"Removes the initial line feed in code blocks.",owner:"Golmote",noCSS:!0},"inline-color":{title:"Inline color",description:"Adds a small inline preview for colors in style sheets.",require:"css-extras",owner:"RunDevelopment"},previewers:{title:"Previewers",description:"Previewers for angles, colors, gradients, easing and time.",require:"css-extras",owner:"Golmote"},autoloader:{title:"Autoloader",description:"Automatically loads the needed languages to highlight the code blocks.",owner:"Golmote",noCSS:!0},"keep-markup":{title:"Keep Markup",description:"Prevents custom markup from being dropped out during highlighting.",owner:"Golmote",optional:"normalize-whitespace",noCSS:!0},"command-line":{title:"Command Line",description:"Display a command line with a prompt and, optionally, the output/response from the commands.",owner:"chriswells0"},"unescaped-markup":{title:"Unescaped Markup",description:"Write markup without having to escape anything."},"normalize-whitespace":{title:"Normalize Whitespace",description:"Supports multiple operations to normalize whitespace in code blocks.",owner:"zeitgeist87",optional:"unescaped-markup",noCSS:!0},"data-uri-highlight":{title:"Data-URI Highlight",description:"Highlights data-URI contents.",owner:"Golmote",noCSS:!0},toolbar:{title:"Toolbar",description:"Attach a toolbar for plugins to easily register buttons on the top of a code block.",owner:"mAAdhaTTah"},"copy-to-clipboard":{title:"Copy to Clipboard Button",description:"Add a button that copies the code block to the clipboard when clicked.",owner:"mAAdhaTTah",require:"toolbar",noCSS:!0},"download-button":{title:"Download Button",description:"A button in the toolbar of a code block adding a convenient way to download a code file.",owner:"Golmote",require:"toolbar",noCSS:!0},"match-braces":{title:"Match braces",description:"Highlights matching braces.",owner:"RunDevelopment"},"diff-highlight":{title:"Diff Highlight",description:"Highlights the code inside diff blocks.",owner:"RunDevelopment",require:"diff"},"filter-highlight-all":{title:"Filter highlightAll",description:"Filters the elements the highlightAll and highlightAllUnder methods actually highlight.",owner:"RunDevelopment",noCSS:!0},treeview:{title:"Treeview",description:"A language with special styles to highlight file system tree structures.",owner:"Golmote"}}})},2885:(e,t,n)=>{const r=n(9901),o=n(9642),a=new Set;function i(e){void 0===e?e=Object.keys(r.languages).filter((e=>"meta"!=e)):Array.isArray(e)||(e=[e]);const t=[...a,...Object.keys(Prism.languages)];o(r,e,t).load((e=>{if(!(e in r.languages))return void(i.silent||console.warn("Language does not exist: "+e));const t="./prism-"+e;delete n.c[n(6500).resolve(t)],delete Prism.languages[e],n(6500)(t),a.add(e)}))}i.silent=!1,e.exports=i},6726:(e,t,n)=>{var r={"./":2885};function o(e){var t=a(e);return n(t)}function a(e){if(!n.o(r,e)){var t=new Error("Cannot find module '"+e+"'");throw t.code="MODULE_NOT_FOUND",t}return r[e]}o.keys=function(){return Object.keys(r)},o.resolve=a,e.exports=o,o.id=6726},6500:(e,t,n)=>{var r={"./":2885};function o(e){var t=a(e);return n(t)}function a(e){if(!n.o(r,e)){var t=new Error("Cannot find module '"+e+"'");throw t.code="MODULE_NOT_FOUND",t}return r[e]}o.keys=function(){return Object.keys(r)},o.resolve=a,e.exports=o,o.id=6500},9642:e=>{"use strict";var t=function(){var e=function(){};function t(e,t){Array.isArray(e)?e.forEach(t):null!=e&&t(e,0)}function n(e){for(var t={},n=0,r=e.length;n "));var l={},s=e[r];if(s){function u(t){if(!(t in e))throw new Error(r+" depends on an unknown component "+t);if(!(t in l))for(var i in o(t,a),l[t]=!0,n[t])l[i]=!0}t(s.require,u),t(s.optional,u),t(s.modify,u)}n[r]=l,a.pop()}}return function(e){var t=n[e];return t||(o(e,r),t=n[e]),t}}function o(e){for(var t in e)return!0;return!1}return function(a,i,l){var s=function(e){var t={};for(var n in e){var r=e[n];for(var o in r)if("meta"!=o){var a=r[o];t[o]="string"==typeof a?{title:a}:a}}return t}(a),u=function(e){var n;return function(r){if(r in e)return r;if(!n)for(var o in n={},e){var a=e[o];t(a&&a.alias,(function(t){if(t in n)throw new Error(t+" cannot be alias for both "+o+" and "+n[t]);if(t in e)throw new Error(t+" cannot be alias of "+o+" because it is a component.");n[t]=o}))}return n[r]||r}}(s);i=i.map(u),l=(l||[]).map(u);var c=n(i),d=n(l);i.forEach((function e(n){var r=s[n];t(r&&r.require,(function(t){t in d||(c[t]=!0,e(t))}))}));for(var f,p=r(s),m=c;o(m);){for(var h in f={},m){var g=s[h];t(g&&g.modify,(function(e){e in d&&(f[e]=!0)}))}for(var v in d)if(!(v in c))for(var b in p(v))if(b in c){f[v]=!0;break}for(var y in m=f)c[y]=!0}var w={getIds:function(){var e=[];return w.load((function(t){e.push(t)})),e},load:function(t,n){return function(t,n,r,o){var a=o?o.series:void 0,i=o?o.parallel:e,l={},s={};function u(e){if(e in l)return l[e];s[e]=!0;var o,c=[];for(var d in t(e))d in n&&c.push(d);if(0===c.length)o=r(e);else{var f=i(c.map((function(e){var t=u(e);return delete s[e],t})));a?o=a(f,(function(){return r(e)})):r(e)}return l[e]=o}for(var c in n)u(c);var d=[];for(var f in s)d.push(l[f]);return i(d)}(p,c,t,n)}};return w}}();e.exports=t},2703:(e,t,n)=>{"use strict";var r=n(414);function o(){}function a(){}a.resetWarningCache=o,e.exports=function(){function e(e,t,n,o,a,i){if(i!==r){var l=new Error("Calling PropTypes validators directly is not supported by the `prop-types` package. Use PropTypes.checkPropTypes() to call them. Read more at http://fb.me/use-check-prop-types");throw l.name="Invariant Violation",l}}function t(){return e}e.isRequired=e;var n={array:e,bigint:e,bool:e,func:e,number:e,object:e,string:e,symbol:e,any:e,arrayOf:t,element:e,elementType:e,instanceOf:t,node:e,objectOf:t,oneOf:t,oneOfType:t,shape:t,exact:t,checkPropTypes:a,resetWarningCache:o};return n.PropTypes=n,n}},5697:(e,t,n)=>{e.exports=n(2703)()},414:e=>{"use strict";e.exports="SECRET_DO_NOT_PASS_THIS_OR_YOU_WILL_BE_FIRED"},4448:(e,t,n)=>{"use strict";var r=n(7294),o=n(7418),a=n(3840);function i(e){for(var t="https://reactjs.org/docs/error-decoder.html?invariant="+e,n=1;n